安裝
pip install pyexcel
支持不同的格式(xls,xlsx等),需要安裝不同的插件,pyexcel不考慮字體、樣式、公式和圖表。當你更新保存一個有格式的excel時,將會失去所有格式。 插件參考,需要寫有格式的參考xlsxwriter、openpyxl
演示數據
原有數據表:tables.xlsx
sheet1
sheet2
操作sheet
讀取單元格的兩種方式,直接使用sheet[row, column]
或者sheet['A1']
(不區分大小寫sheet['a1']
)
sheet = p.get_sheet(file_name='tables.xlsx')
print(sheet["A1"])
# 修改單元格的值
sheet["A1"] = "products"
print(sheet[0, 0])
print(sheet["a1"])
# Product
# products
# products
# 打印出所有
print(sheet.content)
+----------+-----------+-----------+-----------+-----------+-------+
| products | Quarter 1 | Quarter 2 | Quarter 3 | Quarter 4 | Year |
+----------+-----------+-----------+-----------+-----------+-------+
| Apples | 10000 | 5000 | 8000 | 6000 | 29000 |
+----------+-----------+-----------+-----------+-----------+-------+
| Pears | 2000 | 3000 | 4000 | 5000 | 14000 |
+----------+-----------+-----------+-----------+-----------+-------+
| Bananas | 6000 | 6000 | 6500 | 6000 | 24500 |
+----------+-----------+-----------+-----------+-----------+-------+
| Oranges | 500 | 300 | 200 | 700 | 1700 |
+----------+-----------+-----------+-----------+-----------+-------+
整列、整行讀取sheet.row[1]
,sheet.column[2]
import pyexcel as p
sheet = p.get_sheet(file_name='tables.xlsx')
# 第二行 (索引從0開始的)
print(sheet.row[1])
# 第二列
print(sheet.column[1])
# 多少列
print(sheet.number_of_columns())
# 多少行
print(sheet.number_of_rows())
# 輸出
# ['Apples', 10000, 5000, 8000, 6000, 29000]
# ['Quarter 1', 10000, 2000, 6000, 500]
# 6
# 5
自定義列名代替索引
sheet = p.get_sheet(file_name='tables.xlsx')
# 自定義列名代替索引
sheet.name_columns_by_row(0)
print(sheet[1, "Quarter 1"])
print(sheet[1, 1])
print(sheet.column["Quarter 1"])
# 輸出
# 2000
# 2000
# [10000, 2000, 6000, 500]
玩轉數據操作
sheet = p.get_sheet(file_name="tables.xlsx", name_columns_by_row=0)
# 列名
print(list(sheet.colnames))
# 數據以字典格式讀出
print(sheet.to_dict())
print(dict(sheet.to_dict()))
# 按行獲取數據,不包括headers
print(list(sheet.rows()))
# 反序
print(list(sheet.rrows()))
# 按列獲取數據,不包括headers
print(list(sheet.columns()))
# 反序
print(list(sheet.rcolumns()))
# 數據扁平化,放在一個list中
print(list(sheet.enumerate()))
print(list(sheet.reverse()))
print(list(sheet.vertical()))
print(list(sheet.rvertical()))
輸出:
['Product', 'Quarter 1', 'Quarter 2', 'Quarter 3', 'Quarter 4', 'Year']
OrderedDict([('Product', ['Apples', 'Pears', 'Bananas', 'Oranges']), ('Quarter 1', [10000, 2000, 6000, 500]), ('Quarter 2', [5000, 3000, 6000, 300]), ('Quarter 3', [8000, 4000, 6500, 200]), ('Quarter 4', [6000, 5000, 6000, 700]), ('Year', [29000, 14000, 24500, 1700])])
{'Product': ['Apples', 'Pears', 'Bananas', 'Oranges'], 'Quarter 1': [10000, 2000, 6000, 500], 'Quarter 2': [5000, 3000, 6000, 300], 'Quarter 3': [8000, 4000, 6500, 200], 'Quarter 4': [6000, 5000, 6000, 700], 'Year': [29000, 14000, 24500, 1700]}
[['Apples', 10000, 5000, 8000, 6000, 29000], ['Pears', 2000, 3000, 4000, 5000, 14000], ['Bananas', 6000, 6000, 6500, 6000, 24500], ['Oranges', 500, 300, 200, 700, 1700]]
[['Oranges', 500, 300, 200, 700, 1700], ['Bananas', 6000, 6000, 6500, 6000, 24500], ['Pears', 2000, 3000, 4000, 5000, 14000], ['Apples', 10000, 5000, 8000, 6000, 29000]]
[['Apples', 'Pears', 'Bananas', 'Oranges'], [10000, 2000, 6000, 500], [5000, 3000, 6000, 300], [8000, 4000, 6500, 200], [6000, 5000, 6000, 700], [29000, 14000, 24500, 1700]]
[[29000, 14000, 24500, 1700], [6000, 5000, 6000, 700], [8000, 4000, 6500, 200], [5000, 3000, 6000, 300], [10000, 2000, 6000, 500], ['Apples', 'Pears', 'Bananas', 'Oranges']]
['Apples', 10000, 5000, 8000, 6000, 29000, 'Pears', 2000, 3000, 4000, 5000, 14000, 'Bananas', 6000, 6000, 6500, 6000, 24500, 'Oranges', 500, 300, 200, 700, 1700]
[1700, 700, 200, 300, 500, 'Oranges', 24500, 6000, 6500, 6000, 6000, 'Bananas', 14000, 5000, 4000, 3000, 2000, 'Pears', 29000, 6000, 8000, 5000, 10000, 'Apples']
['Apples', 'Pears', 'Bananas', 'Oranges', 10000, 2000, 6000, 500, 5000, 3000, 6000, 300, 8000, 4000, 6500, 200, 6000, 5000, 6000, 700, 29000, 14000, 24500, 1700]
[1700, 24500, 14000, 29000, 700, 6000, 5000, 6000, 200, 6500, 4000, 8000, 300, 6000, 3000, 5000, 500, 6000, 2000, 10000, 'Oranges', 'Bananas', 'Pears', 'Apples']
sheet = p.get_sheet(file_name="tables.xlsx", name_columns_by_row=0)
# 更新整個列
sheet.column["Product"] = [11, 12, 13, 14]
# 刪除整列,可以添加多個column["Product","Quarter 2"]
del sheet.column["Product"]
# 添加多列
extra_data = [["Column 4", "Column 5"], [10, 13],
[11, 14], [12, 15], [12, 15]]
sheet2 = p.Sheet(extra_data)
sheet.column += sheet2
print(sheet.content)
# 輸出
+-----------+-----------+-----------+-----------+-------+----------+----------+
| Quarter 1 | Quarter 2 | Quarter 3 | Quarter 4 | Year | Column 4 | Column 5 |
+===========+===========+===========+===========+=======+==========+==========+
| 10000 | 5000 | 8000 | 6000 | 29000 | 10 | 13 |
+-----------+-----------+-----------+-----------+-------+----------+----------+
| 2000 | 3000 | 4000 | 5000 | 14000 | 11 | 14 |
+-----------+-----------+-----------+-----------+-------+----------+----------+
| 6000 | 6000 | 6500 | 6000 | 24500 | 12 | 15 |
+-----------+-----------+-----------+-----------+-------+----------+----------+
| 500 | 300 | 200 | 700 | 1700 | 12 | 15 |
+-----------+-----------+-----------+-----------+-------+----------+----------+
處理數據的空格等其他符號
import pyexcel as p
data = [
[" Version", " Comments", " Author "],
[" v0.0.1 ", " Release versions", " Eda"],
[" v0.0.2 ", "Useful updates ", " Freud"]
]
sheet = p.Sheet(data)
print(sheet.content)
def cleanse_data(v):
v = v.replace(" ", "")
v = v.rstrip().strip()
return v
sheet.map(cleanse_data)
print(sheet.content)
# 輸出
+-----------------+------------------------------+----------------------+
| Version | Comments | Author |
+-----------------+------------------------------+----------------------+
| v0.0.1 | Release versions | Eda |
+-----------------+------------------------------+----------------------+
| v0.0.2 | Useful updates | Freud |
+-----------------+------------------------------+----------------------+
+---------+------------------+--------+
| Version | Comments | Author |
+---------+------------------+--------+
| v0.0.1 | Release versions | Eda |
+---------+------------------+--------+
| v0.0.2 | Useful updates | Freud |
+---------+------------------+--------+
過濾掉爲空的行
sheet = p.Sheet([[1, 2, 3], ['', '', ''], ['', '', ''], [1, 2, 3]])
def filter_row(row_index, row):
result = [element for element in row if element != '']
return len(result) == 0
del sheet.row[filter_row]
print(sheet)
# 輸出
pyexcel sheet:
+---+---+---+
| 1 | 2 | 3 |
+---+---+---+
| 1 | 2 | 3 |
+---+---+---+
過濾奇數行,並打印
sheet = p.get_sheet(file_name="tables.xlsx", name_columns_by_row=0)
print(sheet.content)
# 過濾奇數行
sheet.filter(row_indices=[0, 2])
print(sheet.content)
# 輸出
+---------+-----------+-----------+-----------+-----------+-------+
| Product | Quarter 1 | Quarter 2 | Quarter 3 | Quarter 4 | Year |
+=========+===========+===========+===========+===========+=======+
| Apples | 10000 | 5000 | 8000 | 6000 | 29000 |
+---------+-----------+-----------+-----------+-----------+-------+
| Pears | 2000 | 3000 | 4000 | 5000 | 14000 |
+---------+-----------+-----------+-----------+-----------+-------+
| Bananas | 6000 | 6000 | 6500 | 6000 | 24500 |
+---------+-----------+-----------+-----------+-----------+-------+
| Oranges | 500 | 300 | 200 | 700 | 1700 |
+---------+-----------+-----------+-----------+-----------+-------+
+---------+-----------+-----------+-----------+-----------+-------+
| Product | Quarter 1 | Quarter 2 | Quarter 3 | Quarter 4 | Year |
+=========+===========+===========+===========+===========+=======+
| Bananas | 6000 | 6000 | 6500 | 6000 | 24500 |
+---------+-----------+-----------+-----------+-----------+-------+
| Oranges | 500 | 300 | 200 | 700 | 1700 |
+---------+-----------+-----------+-----------+-----------+-------+
操作book
通過book獲取單元格值book[sheet_index][row, column]
或book["sheet_name"][row, column]
讀寫多個sheet
import pyexcel as p
# 寫book
content = {
'Sheet_1':
[
[1.0, 2.0, 3.0],
[4.0, 5.0, 6.0],
[7.0, 8.0, 9.0]
],
'Sheet_2':
[
['X', 'Y', 'Z'],
[1.0, 2.0, 3.0],
[4.0, 5.0, 6.0]
],
'Sheet_3':
[
['O', 'P', 'Q'],
[3.0, 2.0, 1.0],
[4.0, 3.0, 2.0]
]
}
book = p.get_book(bookdict=content)
book.save_as("output.xlsx")
# 讀book
book = p.get_book(file_name="output.xlsx")
sheets = book.to_dict()
for name in sheets.keys():
print(name)
print(book.sheet_by_name("Sheet_1"))
print(book.Sheet_1)
print(book["Sheet_1"])
合併多個sheet成爲一個sheet
import glob
merged = pyexcel.Sheet()
for file in glob.glob("*.csv"):
merged.row += pyexcel.get_sheet(file_name=file)
merged.save_as("merged.csv")
假設你有兩個excel表格,每個有三張sheet表。你可以合併它們,得到一個Excel表,
book1 = pyexcel.get_book(file_name="book1.xls")
book2 = pyexcel.get_book(file_name="book2.xlsx")
merged_book = book1 + book2
merged_book = book1["Sheet 1"] + book2["Sheet 2"]
merged_book = book1["Sheet 1"] + book2
merged_book = book1 + book2["Sheet 2"]
讀取不同的數據類型
1. 獲取一個字典列表
import pyexcel as p
records = p.get_records(file_name="tables.xlsx")
for i in records:
print(dict(i))
輸出:
{'Product': 'Apples', 'Quarter 1': 10000, 'Quarter 2': 5000, 'Quarter 3': 8000, 'Quarter 4': 6000, 'Year': 29000}
{'Product': 'Pears', 'Quarter 1': 2000, 'Quarter 2': 3000, 'Quarter 3': 4000, 'Quarter 4': 5000, 'Year': 14000}
{'Product': 'Bananas', 'Quarter 1': 6000, 'Quarter 2': 6000, 'Quarter 3': 6500, 'Quarter 4': 6000, 'Year': 24500}
{'Product': 'Oranges', 'Quarter 1': 500, 'Quarter 2': 300, 'Quarter 3': 200, 'Quarter 4': 700, 'Year': 1700}
2. 獲取一個列表
import pyexcel as p
my_array = p.get_array(file_name="tables.xlsx", start_row=1)
print(my_array)
輸出:
[['Apples', 10000, 5000, 8000, 6000, 29000], ['Pears', 2000, 3000, 4000, 5000, 14000], ['Bananas', 6000, 6000, 6500, 6000, 24500], ['Oranges', 500, 300, 200, 700, 1700]]
3. 獲取一個字典
import pyexcel as p
# name_columns_by_row=0 將第一行設爲頭
my_dict = p.get_dict(file_name="tables.xlsx", name_columns_by_row=0)
print(dict(my_dict))
輸出:
{'Product': ['Apples', 'Pears', 'Bananas', 'Oranges'], 'Quarter 1': [10000, 2000, 6000, 500], 'Quarter 2': [5000, 3000, 6000, 300], 'Quarter 3': [8000, 4000, 6500, 200], 'Quarter 4': [6000, 5000, 6000, 700], 'Year': [29000, 14000, 24500, 1700]}
4. 獲取所有sheet的字典
import pyexcel as p
book_dict = p.get_book_dict(file_name="tables.xlsx")
for key, item in book_dict.items():
print({key: item})
輸出:
{'Sheet1': [['Product', 'Quarter 1', 'Quarter 2', 'Quarter 3', 'Quarter 4', 'Year'], ['Apples', 10000, 5000, 8000, 6000, 29000], ['Pears', 2000, 3000, 4000, 5000, 14000], ['Bananas', 6000, 6000, 6500, 6000, 24500], ['Oranges', 500, 300, 200, 700, 1700]]}
{'Sheet2': [['Product', 'Quarter 1', 'Quarter 2', 'Quarter 3', 'Quarter 4', 'Year'], ['Apples', 10000, 5000, 8000, 6000, 29000], ['Pears', 2000, 3000, 4000, 5000, 14000], ['Bananas', 6000, 6000, 6500, 6000, 24500], ['Oranges', 500, 300, 200, 700, 1700], ['Totals', 18500, 14300, 18700, 17700, 69200]]}
一行寫入(不同的數據類型)
1. 列表(array)
import pyexcel as p
data = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
p.save_as(array=data, dest_file_name="example.xlsx")
# 驗證寫入
results = p.get_sheet(file_name="example.xlsx")
print(results)
#pyexcel_sheet1:
#+---+---+---+
#| 1 | 2 | 3 |
#+---+---+---+
#| 4 | 5 | 6 |
#+---+---+---+
#| 7 | 8 | 9 |
#+---+---+---+
也可保存CSV文件:
p.save_as(array=data, dest_file_name="example.csv", dest_delimiter=':')
# 驗證寫入
with open("example.csv") as f:
for line in f.readlines():
print(line.rstrip())
# 1:2:3
# 4:5:6
# 7:8:9
2. 字典列表(recorders)
records = [
{"year": 1903, "country": "Germany", "speed": "206.7km/h"},
{"year": 1964, "country": "Japan", "speed": "210km/h"},
{"year": 2008, "country": "China", "speed": "350km/h"}
]
p.save_as(records=records, dest_file_name='high_speed_rail.xlsx')
3. 單個鍵值對字典(adict)
henley_on_thames_facts = {
"area": "5.58 square meters",
"population": "11,619",
"civial parish": "Henley-on-Thames",
"latitude": "51.536",
"longitude": "-0.898"
}
p.save_as(adict=henley_on_thames_facts, dest_file_name='henley.xlsx')
4. 一個單維數組字典(adict)
ccs_insights = {
"year": ["2017", "2018", "2019", "2020", "2021"],
"smart phones": [1.53, 1.64, 1.74, 1.82, 1.90],
"feature phones": [0.46, 0.38, 0.30, 0.23, 0.17]
}
p.save_as(adict=ccs_insights, dest_file_name='ccs.xlsx')
**5.**寫入多個sheet(bookdict)
a_dictionary_of_two_dimensional_arrays = {
'Sheet 1':
[
[1.0, 2.0, 3.0],
[4.0, 5.0, 6.0],
[7.0, 8.0, 9.0]
],
'Sheet 2':
[
['X', 'Y', 'Z'],
[1.0, 2.0, 3.0],
[4.0, 5.0, 6.0]
],
'Sheet 3':
[
['O', 'P', 'Q'],
[3.0, 2.0, 1.0],
[4.0, 3.0, 2.0]
]
}
# 如果想保持有序,則需要傳入有序的字典
# data = OrderedDict()
# data.update({"Sheet 2": a_dictionary_of_two_dimensional_arrays['Sheet 2']})
# data.update({"Sheet 1": a_dictionary_of_two_dimensional_arrays['Sheet 1']})
# data.update({"Sheet 3": a_dictionary_of_two_dimensional_arrays['Sheet 3']})
# p.save_book_as(bookdict=data, dest_file_name="book.xlsx")
p.save_book_as(
bookdict=a_dictionary_of_two_dimensional_arrays,
dest_file_name="book.xlsx"
)
將會看到生成了sheet1、sheet2、sheet3
一行轉換文件格式
# xls 轉換爲csv
p.save_as(file_name="birth.xls", dest_file_name="birth.csv")
# xls 轉爲後綴爲xlsx
p.save_as(file_name="birth.xls", dest_file_name="birth.xlsx") # change the file extension
合併文件
將目錄中的所有excel文件合併到一個文件中,每個文件成爲一個工作表
from pyexcel.cookbook import merge_all_to_a_book
import glob
merge_all_to_a_book(glob.glob("*.xlsx"), "output.xlsx")
拆分爲單個Excel文件
ouput.xlsx中有多個sheet,可拆分爲多個xlsx的文件,命名是源文件中的sheet名+"_"+ “split_output.xlsx”,例如ouput.xlsx中有sheet1,sheet2,則生成sheet1_split_output.xlsx和sheet2_split_output.xlsx
from pyexcel.cookbook import split_a_book
split_a_book("output.xlsx", "split_output.xlsx")
如果你只想抽出當中一個作爲單獨的文件則用:
from pyexcel.cookbook import extract_a_sheet_from_a_book
extract_a_sheet_from_a_book("output.xlsx", "Sheet 1", "split_output.xls")
處理大文件
處理大文件時,需要多加一行p.free_resources()
,其他用法類似上邊介紹的,只是要用的方法前加一個i
例如:
# 注意比之前的方法多一個i
records = p.iget_records(file_name="your_file.xls")
# 每次後邊跟一行,釋放內存
p.free_resources()
處理數據庫數據
將數據庫表中的數據保存在Excel中(注意,在運行之前一定要先創建好category.xlsx,並且添加好表頭,否則會報錯!!
)
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Integer, Column, String
from sqlalchemy.orm import sessionmaker
import pyexcel as p
# 創建連接
engine = create_engine(r'sqlite:///test.db')
# 聲明映射
Base = declarative_base()
# 創建會話
Session = sessionmaker(bind=engine)
class Book(Base):
__tablename__ = 'books'
id = Column(Integer, autoincrement=True, primary_key=True, nullable=False)
name = Column(String, doc="書名", comment="書名")
category_id = Column(Integer, nullable=False)
def to_json(self):
"""返回json格式的數據"""
_dict = self.__dict__
if "_sa_instance_state" in _dict:
del _dict["_sa_instance_state"]
return _dict
def to_dict(self):
return {c.name: getattr(self, c.name, None)
for c in self.__table__.columns}
class Category(Base):
__tablename__ = "categories"
id = Column(Integer, autoincrement=True, primary_key=True, nullable=False)
name = Column(String, doc="分類", comment="分類")
def to_json(self):
"""返回json格式的數據"""
_dict = self.__dict__
if "_sa_instance_state" in _dict:
del _dict["_sa_instance_state"]
return _dict
def to_dict(self):
return {c.name: getattr(self, c.name, None)
for c in self.__table__.columns}
# 創建所有不存在的表
Base.metadata.create_all(engine)
session = Session()
category = Category(**{"name": "歷史"})
category1 = Category(**{"name": "軍事"})
category2 = Category(**{"name": "小說"})
category3 = Category(**{"name": "傳記"})
book = Book(**{"name": "水滸傳", "category_id": "1"})
book1 = Book(**{"name": "西遊記", "category_id": "2"})
book2 = Book(**{"name": "紅樓夢", "category_id": "3"})
book3 = Book(**{"name": "三國演義", "category_id": "2"})
book4 = Book(**{"name": "崑崙", "category_id": "1"})
# 添加書
# session.add_all([category, category1, category2, category3,book,book1,book2,book3,book4])
# session.flush()
# session.commit()
p.save_as(file_name="Category.xlsx", name_columns_by_row=0, dest_session=session, dest_table=Category)
sheet = p.get_sheet(session=session, table=Category)
print(sheet)
# 保存在文件中
sheet.save_as("Category.xlsx")