1、提取word內的所有表格內容,寫到excel內
安裝上python-docx
# coding=utf-8
from docx import Document
from xlwt import Workbook
# 初始化路徑
xl_path = f"./path/excel.xls"
word_path = f"./path/word.doc"
# 讀word
doc = Document(word_path)
tables = []
for table in doc.tables:
table_temp = []
for row in table.rows:
row_temp = []
for cell in row.cells:
row_temp.append(cell.text)
table_temp.append(row_temp)
tables.append(table_temp)
tables_2 = list(filter(None, tables))
# 寫到表裏
Sheet_index = 0
workbook = Workbook(encoding='utf-8')
for table in tables_2:
worksheet = workbook.add_sheet('sheet' + str(Sheet_index), cell_overwrite_ok=True)
Sheet_index = Sheet_index + 1
for rows in table:
r = table.index(rows)
for cell in rows:
c = rows.index(cell)
worksheet.write(r, c, cell)
workbook.save(xl_path)
2、多個字段名一樣的excel合併成一個
import pandas as pd
xl_path = r'D:\path\excel.xls'
res = pd.DataFrame()
for parent, dirnames, filenames in os.walk(xl_path):
for filename in filenames:
df = pd.read_excel(os.path.join(parent, filename))
df_empty = res.append(df, ignore_index=True)
res.to_excel('D:\path\\new_excel.xls', encoding='utf_8_sig')