pyexcel 簡單用法

安裝

pip install pyexcel

支持不同的格式(xls,xlsx等),需要安裝不同的插件,pyexcel不考慮字體、樣式、公式和圖表。當你更新保存一個有格式的excel時,將會失去所有格式。 插件參考,需要寫有格式的參考xlsxwriteropenpyxl

演示數據

原有數據表:tables.xlsx
sheet1
在這裏插入圖片描述
sheet2

在這裏插入圖片描述

操作sheet

讀取單元格的兩種方式,直接使用sheet[row, column]或者sheet['A1'](不區分大小寫sheet['a1']

sheet = p.get_sheet(file_name='tables.xlsx')
print(sheet["A1"])
# 修改單元格的值
sheet["A1"] = "products"
print(sheet[0, 0])
print(sheet["a1"])
# Product
# products
# products
# 打印出所有
print(sheet.content)

+----------+-----------+-----------+-----------+-----------+-------+
| products | Quarter 1 | Quarter 2 | Quarter 3 | Quarter 4 | Year  |
+----------+-----------+-----------+-----------+-----------+-------+
| Apples   | 10000     | 5000      | 8000      | 6000      | 29000 |
+----------+-----------+-----------+-----------+-----------+-------+
| Pears    | 2000      | 3000      | 4000      | 5000      | 14000 |
+----------+-----------+-----------+-----------+-----------+-------+
| Bananas  | 6000      | 6000      | 6500      | 6000      | 24500 |
+----------+-----------+-----------+-----------+-----------+-------+
| Oranges  | 500       | 300       | 200       | 700       | 1700  |
+----------+-----------+-----------+-----------+-----------+-------+

整列、整行讀取sheet.row[1]sheet.column[2]

import pyexcel as p

sheet = p.get_sheet(file_name='tables.xlsx')
# 第二行 (索引從0開始的)
print(sheet.row[1])
# 第二列
print(sheet.column[1])
# 多少列
print(sheet.number_of_columns())
# 多少行
print(sheet.number_of_rows())

# 輸出
# ['Apples', 10000, 5000, 8000, 6000, 29000]
# ['Quarter 1', 10000, 2000, 6000, 500]
# 6
# 5

自定義列名代替索引

sheet = p.get_sheet(file_name='tables.xlsx')

# 自定義列名代替索引
sheet.name_columns_by_row(0)
print(sheet[1, "Quarter 1"])
print(sheet[1, 1])
print(sheet.column["Quarter 1"])
# 輸出
# 2000
# 2000
# [10000, 2000, 6000, 500]

玩轉數據操作

sheet = p.get_sheet(file_name="tables.xlsx", name_columns_by_row=0)
# 列名
print(list(sheet.colnames))
# 數據以字典格式讀出
print(sheet.to_dict())
print(dict(sheet.to_dict()))

# 按行獲取數據,不包括headers
print(list(sheet.rows()))
# 反序
print(list(sheet.rrows()))

# 按列獲取數據,不包括headers
print(list(sheet.columns()))
# 反序
print(list(sheet.rcolumns()))


# 數據扁平化,放在一個list中
print(list(sheet.enumerate()))
print(list(sheet.reverse()))
print(list(sheet.vertical()))
print(list(sheet.rvertical()))

輸出:

['Product', 'Quarter 1', 'Quarter 2', 'Quarter 3', 'Quarter 4', 'Year']
OrderedDict([('Product', ['Apples', 'Pears', 'Bananas', 'Oranges']), ('Quarter 1', [10000, 2000, 6000, 500]), ('Quarter 2', [5000, 3000, 6000, 300]), ('Quarter 3', [8000, 4000, 6500, 200]), ('Quarter 4', [6000, 5000, 6000, 700]), ('Year', [29000, 14000, 24500, 1700])])
{'Product': ['Apples', 'Pears', 'Bananas', 'Oranges'], 'Quarter 1': [10000, 2000, 6000, 500], 'Quarter 2': [5000, 3000, 6000, 300], 'Quarter 3': [8000, 4000, 6500, 200], 'Quarter 4': [6000, 5000, 6000, 700], 'Year': [29000, 14000, 24500, 1700]}
[['Apples', 10000, 5000, 8000, 6000, 29000], ['Pears', 2000, 3000, 4000, 5000, 14000], ['Bananas', 6000, 6000, 6500, 6000, 24500], ['Oranges', 500, 300, 200, 700, 1700]]
[['Oranges', 500, 300, 200, 700, 1700], ['Bananas', 6000, 6000, 6500, 6000, 24500], ['Pears', 2000, 3000, 4000, 5000, 14000], ['Apples', 10000, 5000, 8000, 6000, 29000]]
[['Apples', 'Pears', 'Bananas', 'Oranges'], [10000, 2000, 6000, 500], [5000, 3000, 6000, 300], [8000, 4000, 6500, 200], [6000, 5000, 6000, 700], [29000, 14000, 24500, 1700]]
[[29000, 14000, 24500, 1700], [6000, 5000, 6000, 700], [8000, 4000, 6500, 200], [5000, 3000, 6000, 300], [10000, 2000, 6000, 500], ['Apples', 'Pears', 'Bananas', 'Oranges']]
['Apples', 10000, 5000, 8000, 6000, 29000, 'Pears', 2000, 3000, 4000, 5000, 14000, 'Bananas', 6000, 6000, 6500, 6000, 24500, 'Oranges', 500, 300, 200, 700, 1700]
[1700, 700, 200, 300, 500, 'Oranges', 24500, 6000, 6500, 6000, 6000, 'Bananas', 14000, 5000, 4000, 3000, 2000, 'Pears', 29000, 6000, 8000, 5000, 10000, 'Apples']
['Apples', 'Pears', 'Bananas', 'Oranges', 10000, 2000, 6000, 500, 5000, 3000, 6000, 300, 8000, 4000, 6500, 200, 6000, 5000, 6000, 700, 29000, 14000, 24500, 1700]
[1700, 24500, 14000, 29000, 700, 6000, 5000, 6000, 200, 6500, 4000, 8000, 300, 6000, 3000, 5000, 500, 6000, 2000, 10000, 'Oranges', 'Bananas', 'Pears', 'Apples']

sheet = p.get_sheet(file_name="tables.xlsx", name_columns_by_row=0)
# 更新整個列
sheet.column["Product"] = [11, 12, 13, 14]

# 刪除整列,可以添加多個column["Product","Quarter 2"]
del sheet.column["Product"]

# 添加多列
extra_data = [["Column 4", "Column 5"], [10, 13],
              [11, 14], [12, 15], [12, 15]]
sheet2 = p.Sheet(extra_data)
sheet.column += sheet2
print(sheet.content)
# 輸出
+-----------+-----------+-----------+-----------+-------+----------+----------+
| Quarter 1 | Quarter 2 | Quarter 3 | Quarter 4 | Year  | Column 4 | Column 5 |
+===========+===========+===========+===========+=======+==========+==========+
| 10000     | 5000      | 8000      | 6000      | 29000 | 10       | 13       |
+-----------+-----------+-----------+-----------+-------+----------+----------+
| 2000      | 3000      | 4000      | 5000      | 14000 | 11       | 14       |
+-----------+-----------+-----------+-----------+-------+----------+----------+
| 6000      | 6000      | 6500      | 6000      | 24500 | 12       | 15       |
+-----------+-----------+-----------+-----------+-------+----------+----------+
| 500       | 300       | 200       | 700       | 1700  | 12       | 15       |
+-----------+-----------+-----------+-----------+-------+----------+----------+

處理數據的空格等其他符號

import pyexcel as p

data = [
    ["        Version", "        Comments", "       Author  "],
    ["  v0.0.1       ", " Release versions", "            Eda"],
    ["  v0.0.2  ", "Useful updates    ", "   Freud"]
]
sheet = p.Sheet(data)
print(sheet.content)


def cleanse_data(v):
    v = v.replace(" ", "")
    v = v.rstrip().strip()
    return v


sheet.map(cleanse_data)
print(sheet.content)
# 輸出
+-----------------+------------------------------+----------------------+
|         Version |         Comments             |        Author   |
+-----------------+------------------------------+----------------------+
|   v0.0.1        |  Release versions            |             Eda |
+-----------------+------------------------------+----------------------+
|   v0.0.2   | Useful updates     |    Freud        |
+-----------------+------------------------------+----------------------+
+---------+------------------+--------+
| Version | Comments         | Author |
+---------+------------------+--------+
| v0.0.1  | Release versions | Eda    |
+---------+------------------+--------+
| v0.0.2  | Useful updates   | Freud  |
+---------+------------------+--------+

過濾掉爲空的行

sheet = p.Sheet([[1, 2, 3], ['', '', ''], ['', '', ''], [1, 2, 3]])


def filter_row(row_index, row):
    result = [element for element in row if element != '']
    return len(result) == 0


del sheet.row[filter_row]
print(sheet)
# 輸出
pyexcel sheet:
+---+---+---+
| 1 | 2 | 3 |
+---+---+---+
| 1 | 2 | 3 |
+---+---+---+

過濾奇數行,並打印

sheet = p.get_sheet(file_name="tables.xlsx", name_columns_by_row=0)
print(sheet.content)
# 過濾奇數行
sheet.filter(row_indices=[0, 2])
print(sheet.content)
# 輸出
+---------+-----------+-----------+-----------+-----------+-------+
| Product | Quarter 1 | Quarter 2 | Quarter 3 | Quarter 4 | Year  |
+=========+===========+===========+===========+===========+=======+
| Apples  | 10000     | 5000      | 8000      | 6000      | 29000 |
+---------+-----------+-----------+-----------+-----------+-------+
| Pears   | 2000      | 3000      | 4000      | 5000      | 14000 |
+---------+-----------+-----------+-----------+-----------+-------+
| Bananas | 6000      | 6000      | 6500      | 6000      | 24500 |
+---------+-----------+-----------+-----------+-----------+-------+
| Oranges | 500       | 300       | 200       | 700       | 1700  |
+---------+-----------+-----------+-----------+-----------+-------+
+---------+-----------+-----------+-----------+-----------+-------+
| Product | Quarter 1 | Quarter 2 | Quarter 3 | Quarter 4 | Year  |
+=========+===========+===========+===========+===========+=======+
| Bananas | 6000      | 6000      | 6500      | 6000      | 24500 |
+---------+-----------+-----------+-----------+-----------+-------+
| Oranges | 500       | 300       | 200       | 700       | 1700  |
+---------+-----------+-----------+-----------+-----------+-------+

操作book

通過book獲取單元格值book[sheet_index][row, column]book["sheet_name"][row, column]
讀寫多個sheet

import pyexcel as p

# 寫book
content = {
    'Sheet_1':
        [
            [1.0, 2.0, 3.0],
            [4.0, 5.0, 6.0],
            [7.0, 8.0, 9.0]
        ],
    'Sheet_2':
        [
            ['X', 'Y', 'Z'],
            [1.0, 2.0, 3.0],
            [4.0, 5.0, 6.0]
        ],
    'Sheet_3':
        [
            ['O', 'P', 'Q'],
            [3.0, 2.0, 1.0],
            [4.0, 3.0, 2.0]
        ]
}
book = p.get_book(bookdict=content)
book.save_as("output.xlsx")
# 讀book
book = p.get_book(file_name="output.xlsx")
sheets = book.to_dict()
for name in sheets.keys():
    print(name)

print(book.sheet_by_name("Sheet_1"))
print(book.Sheet_1)
print(book["Sheet_1"])

合併多個sheet成爲一個sheet

import glob
merged = pyexcel.Sheet()
for file in glob.glob("*.csv"):
     merged.row += pyexcel.get_sheet(file_name=file)
merged.save_as("merged.csv")

假設你有兩個excel表格,每個有三張sheet表。你可以合併它們,得到一個Excel表,

book1 = pyexcel.get_book(file_name="book1.xls")
book2 = pyexcel.get_book(file_name="book2.xlsx")
merged_book = book1 + book2
merged_book = book1["Sheet 1"] + book2["Sheet 2"]
merged_book = book1["Sheet 1"] + book2
merged_book = book1 + book2["Sheet 2"]

讀取不同的數據類型

1. 獲取一個字典列表

import pyexcel as p

records = p.get_records(file_name="tables.xlsx")
for i in records:
    print(dict(i))

輸出:

{'Product': 'Apples', 'Quarter 1': 10000, 'Quarter 2': 5000, 'Quarter 3': 8000, 'Quarter 4': 6000, 'Year': 29000}
{'Product': 'Pears', 'Quarter 1': 2000, 'Quarter 2': 3000, 'Quarter 3': 4000, 'Quarter 4': 5000, 'Year': 14000}
{'Product': 'Bananas', 'Quarter 1': 6000, 'Quarter 2': 6000, 'Quarter 3': 6500, 'Quarter 4': 6000, 'Year': 24500}
{'Product': 'Oranges', 'Quarter 1': 500, 'Quarter 2': 300, 'Quarter 3': 200, 'Quarter 4': 700, 'Year': 1700}

2. 獲取一個列表

import pyexcel as p

my_array = p.get_array(file_name="tables.xlsx", start_row=1)
print(my_array)

輸出:

[['Apples', 10000, 5000, 8000, 6000, 29000], ['Pears', 2000, 3000, 4000, 5000, 14000], ['Bananas', 6000, 6000, 6500, 6000, 24500], ['Oranges', 500, 300, 200, 700, 1700]]

3. 獲取一個字典

import pyexcel as p

# name_columns_by_row=0  將第一行設爲頭
my_dict = p.get_dict(file_name="tables.xlsx", name_columns_by_row=0)
print(dict(my_dict))

輸出:

{'Product': ['Apples', 'Pears', 'Bananas', 'Oranges'], 'Quarter 1': [10000, 2000, 6000, 500], 'Quarter 2': [5000, 3000, 6000, 300], 'Quarter 3': [8000, 4000, 6500, 200], 'Quarter 4': [6000, 5000, 6000, 700], 'Year': [29000, 14000, 24500, 1700]}

4. 獲取所有sheet的字典

import pyexcel as p

book_dict = p.get_book_dict(file_name="tables.xlsx")
for key, item in book_dict.items():
    print({key: item})

輸出:

{'Sheet1': [['Product', 'Quarter 1', 'Quarter 2', 'Quarter 3', 'Quarter 4', 'Year'], ['Apples', 10000, 5000, 8000, 6000, 29000], ['Pears', 2000, 3000, 4000, 5000, 14000], ['Bananas', 6000, 6000, 6500, 6000, 24500], ['Oranges', 500, 300, 200, 700, 1700]]}
{'Sheet2': [['Product', 'Quarter 1', 'Quarter 2', 'Quarter 3', 'Quarter 4', 'Year'], ['Apples', 10000, 5000, 8000, 6000, 29000], ['Pears', 2000, 3000, 4000, 5000, 14000], ['Bananas', 6000, 6000, 6500, 6000, 24500], ['Oranges', 500, 300, 200, 700, 1700], ['Totals', 18500, 14300, 18700, 17700, 69200]]}


一行寫入(不同的數據類型)

1. 列表(array)

import pyexcel as p

data = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
p.save_as(array=data, dest_file_name="example.xlsx")
# 驗證寫入
results = p.get_sheet(file_name="example.xlsx")
print(results)

#pyexcel_sheet1:
#+---+---+---+
#| 1 | 2 | 3 |
#+---+---+---+
#| 4 | 5 | 6 |
#+---+---+---+
#| 7 | 8 | 9 |
#+---+---+---+

在這裏插入圖片描述
也可保存CSV文件:

p.save_as(array=data, dest_file_name="example.csv", dest_delimiter=':')
# 驗證寫入
with open("example.csv") as f:
    for line in f.readlines():
        print(line.rstrip())
# 1:2:3
# 4:5:6
# 7:8:9

2. 字典列表(recorders)

records = [
    {"year": 1903, "country": "Germany", "speed": "206.7km/h"},
    {"year": 1964, "country": "Japan", "speed": "210km/h"},
    {"year": 2008, "country": "China", "speed": "350km/h"}
]
p.save_as(records=records, dest_file_name='high_speed_rail.xlsx')

在這裏插入圖片描述

3. 單個鍵值對字典(adict)

henley_on_thames_facts = {
    "area": "5.58 square meters",
    "population": "11,619",
    "civial parish": "Henley-on-Thames",
    "latitude": "51.536",
    "longitude": "-0.898"
}
p.save_as(adict=henley_on_thames_facts, dest_file_name='henley.xlsx')

在這裏插入圖片描述

4. 一個單維數組字典(adict)

ccs_insights = {
    "year": ["2017", "2018", "2019", "2020", "2021"],
    "smart phones": [1.53, 1.64, 1.74, 1.82, 1.90],
    "feature phones": [0.46, 0.38, 0.30, 0.23, 0.17]
}
p.save_as(adict=ccs_insights, dest_file_name='ccs.xlsx')

在這裏插入圖片描述

**5.**寫入多個sheet(bookdict)

a_dictionary_of_two_dimensional_arrays = {
    'Sheet 1':
        [
            [1.0, 2.0, 3.0],
            [4.0, 5.0, 6.0],
            [7.0, 8.0, 9.0]
        ],
    'Sheet 2':
        [
            ['X', 'Y', 'Z'],
            [1.0, 2.0, 3.0],
            [4.0, 5.0, 6.0]
        ],
    'Sheet 3':
        [
            ['O', 'P', 'Q'],
            [3.0, 2.0, 1.0],
            [4.0, 3.0, 2.0]
        ]
}
# 如果想保持有序,則需要傳入有序的字典
# data = OrderedDict()
# data.update({"Sheet 2": a_dictionary_of_two_dimensional_arrays['Sheet 2']})
# data.update({"Sheet 1": a_dictionary_of_two_dimensional_arrays['Sheet 1']})
# data.update({"Sheet 3": a_dictionary_of_two_dimensional_arrays['Sheet 3']})
# p.save_book_as(bookdict=data, dest_file_name="book.xlsx")

p.save_book_as(
    bookdict=a_dictionary_of_two_dimensional_arrays,
    dest_file_name="book.xlsx"
)

將會看到生成了sheet1、sheet2、sheet3
在這裏插入圖片描述

一行轉換文件格式

# xls 轉換爲csv
p.save_as(file_name="birth.xls", dest_file_name="birth.csv")
# xls 轉爲後綴爲xlsx
p.save_as(file_name="birth.xls", dest_file_name="birth.xlsx") # change the file extension

合併文件

將目錄中的所有excel文件合併到一個文件中,每個文件成爲一個工作表

from pyexcel.cookbook import merge_all_to_a_book
import glob
merge_all_to_a_book(glob.glob("*.xlsx"), "output.xlsx")

在這裏插入圖片描述

拆分爲單個Excel文件

ouput.xlsx中有多個sheet,可拆分爲多個xlsx的文件,命名是源文件中的sheet名+"_"+ “split_output.xlsx”,例如ouput.xlsx中有sheet1,sheet2,則生成sheet1_split_output.xlsx和sheet2_split_output.xlsx

from pyexcel.cookbook import split_a_book
split_a_book("output.xlsx", "split_output.xlsx")

如果你只想抽出當中一個作爲單獨的文件則用:

from pyexcel.cookbook import extract_a_sheet_from_a_book
extract_a_sheet_from_a_book("output.xlsx", "Sheet 1", "split_output.xls")

處理大文件

處理大文件時,需要多加一行p.free_resources(),其他用法類似上邊介紹的,只是要用的方法前加一個i
例如:

# 注意比之前的方法多一個i
records = p.iget_records(file_name="your_file.xls")
# 每次後邊跟一行,釋放內存
p.free_resources()

處理數據庫數據

將數據庫表中的數據保存在Excel中(注意,在運行之前一定要先創建好category.xlsx,並且添加好表頭,否則會報錯!!

from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Integer, Column, String
from sqlalchemy.orm import sessionmaker
import pyexcel as p


# 創建連接
engine = create_engine(r'sqlite:///test.db')
# 聲明映射
Base = declarative_base()
# 創建會話
Session = sessionmaker(bind=engine)


class Book(Base):
    __tablename__ = 'books'

    id = Column(Integer, autoincrement=True, primary_key=True, nullable=False)
    name = Column(String, doc="書名", comment="書名")
    category_id = Column(Integer, nullable=False)

    def to_json(self):
        """返回json格式的數據"""
        _dict = self.__dict__
        if "_sa_instance_state" in _dict:
            del _dict["_sa_instance_state"]
        return _dict

    def to_dict(self):
        return {c.name: getattr(self, c.name, None)
                for c in self.__table__.columns}


class Category(Base):
    __tablename__ = "categories"
    id = Column(Integer, autoincrement=True, primary_key=True, nullable=False)
    name = Column(String, doc="分類", comment="分類")

    def to_json(self):
        """返回json格式的數據"""
        _dict = self.__dict__
        if "_sa_instance_state" in _dict:
            del _dict["_sa_instance_state"]
        return _dict

    def to_dict(self):
        return {c.name: getattr(self, c.name, None)
                for c in self.__table__.columns}


# 創建所有不存在的表
Base.metadata.create_all(engine)
session = Session()

category = Category(**{"name": "歷史"})
category1 = Category(**{"name": "軍事"})
category2 = Category(**{"name": "小說"})
category3 = Category(**{"name": "傳記"})

book = Book(**{"name": "水滸傳", "category_id": "1"})
book1 = Book(**{"name": "西遊記", "category_id": "2"})
book2 = Book(**{"name": "紅樓夢", "category_id": "3"})
book3 = Book(**{"name": "三國演義", "category_id": "2"})
book4 = Book(**{"name": "崑崙", "category_id": "1"})

# 添加書
# session.add_all([category, category1, category2, category3,book,book1,book2,book3,book4])
# session.flush()
# session.commit()

p.save_as(file_name="Category.xlsx", name_columns_by_row=0, dest_session=session, dest_table=Category)
sheet = p.get_sheet(session=session, table=Category)
print(sheet)
# 保存在文件中
sheet.save_as("Category.xlsx")

參考

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章