Python腳本之讀寫Excel【二】

之前分享過一期 Excel 讀寫【一】 這兒分享下其他的收穫。

一些嘗試

openpyxl

A. 普通寫入內容 【可寫可讀模式】

        header = ['ID', 'Product', 'Status', 'Fee', 'Tr updated']
        data = [[(10, 'zyoooo00', '100', 50, '2020-05-16'), (1, 'zyoooo01', '10', 5, '2020-05-15')], [(11, 'zyoooo1', '100', 50, '2020-05-16') , (10, 'zyoooo11', '10', 5, '2020-05-15')], [(12, 'zyoooo2', '100', 50, '2020-05-16'), (10, 'zyoooo0', '10', 5, '2020-05-15')], [(3, 'zyoooo3', '100', 50, '2020-05-16'), (10, 'zyoooo0', '10', 5, '2020-05-15')]]
    def insert_excel_data(self, header, all_data):
        """
        新建Excel 寫入內容
        :param header:    header = ['ID', 'Product', 'Status', 'Fee', 'Tr updated']
         :param all_data: [list1, list2, list3, list4]   list1對應第一個sheet的所有數據
        :return:
        """
        import openpyxl
        workbook = openpyxl.Workbook()

        for sh in range(len(all_data)):
            sheet = workbook.create_sheet(index=sh)
            Log.info(sheet)

            for b in range(len(header)):
                sheet.cell(row=1, column=b + 1, value=str(header[b]))

            for a in range(len(all_data[sh])):

                sheet.cell(row=2 + a, column=1, value=str(all_data[sh][a][0]))
                sheet.cell(row=2 + a, column=2, value=str(all_data[sh][a][1]))
                sheet.cell(row=2 + a, column=3, value=str(all_data[sh][a][2]))
                sheet.cell(row=2 + a, column=4, value=str(all_data[sh][a][3]))
                sheet.cell(row=2 + a, column=5, value=str(all_data[sh][a][4]))

            Log.info('當前sheet數據生成成功')

        # 【多出來一個sheet】
        ws = workbook["Sheet"]
        workbook.remove(ws)

        ex_time = time.strftime("%Y%m%d_%H%M%S")
        workbook.save(r'D:\work\gys\test_' + ex_time + '.xlsx')

        print('excel創建成功')
    def insert_excel_data_a(self, header, all_data):
        """
        新建Excel 寫入內容
        :param header:    header = ['ID', 'Product', 'Status', 'Fee', 'Tr updated']
         :param all_data: [list1, list2, list3, list4]   list1對應第一個sheet的所有數據
        :return:
        """
        import openpyxl
        workbook = openpyxl.Workbook()

        for sh in range(len(all_data)):
            sheet = workbook.create_sheet(index=sh)
            Log.info(sheet)

            for b in range(len(header)):
                sheet.cell(row=1, column=b + 1, value=str(header[b]))

            for a in range(len(all_data[sh])):

                for h in range(len(header)):
                    sheet.cell(row=2 + a, column=h + 1, value=str(all_data[sh][a][h]))

            Log.info('當前sheet數據生成成功')

        # 【多出來一個sheet】
        ws = workbook["Sheet"]
        workbook.remove(ws)

        ex_time = time.strftime("%Y%m%d_%H%M%S")
        workbook.save(r'D:\work\gys\test_' + ex_time + '.xlsx')

        print('excel創建成功')

B.只寫模式

其實這個也是比較推薦的; 但我電腦配置沒那麼好(一跑起來,內存要用近90%),所以最推薦的是 前面的 insert_excel_new()

    def insert_excel(self, all_data, header):
        """
        新建Excel 寫入內容 【只寫模式】
        :param header:    header = ['ID', 'Product', 'Status', 'Fee', 'Tr updated']
         :param all_data: [list1, list2, list3, list4]   list1對應第一個sheet的所有數據
        :return:
        """
        import openpyxl
        workbook = openpyxl.Workbook(write_only=True)
        for sh in range(len(all_data)):
            sheet = workbook.create_sheet(index=sh)
            Log.info(sheet)

            test_list = all_data[sh]
            test_list.insert(0, header)

            for b in range(len(test_list)):
                sheet.append(test_list[b])
            Log.info('當前sheet數據生成成功')

        ex_time = time.strftime("_%Y%m%d_%H%M%S")
        workbook.save(r'D:\work\gys\test_' + ex_time + '.xlsx')

        print('excel創建成功')

pandas

    def get_excel_data_new_1(self, file, sheet_index=0):
        """
        獲取excel
        :param file: 
        :param sheet_index: 序列
        :return: 
        """
        import numpy as np
        import pandas as pd

        data_old = pd.read_excel(file, sheet_name=sheet_index)

        data = np.array(data_old).tolist()

        print(data)
    def get_excel_data_new_2(self, file, sheet_name):
        """
        獲取excel
        :param file: 
        :param sheet_name: sheet name 
        :return: 
        """
        import numpy as np
        import pandas as pd
        df = pd.read_excel(file, sheet_name=None)

        df1 = df[sheet_name]
        data = np.array(df1).tolist()

        print(data)

xlwings

xlwings 好像有時候需要 打開某個Excel文件。
【下方代碼 若打開此Excel 不報錯;若沒打開,就報錯 ‘pywintypes.com_error: (-2147221164, ‘沒有註冊類’, None, None)‘】

    def get_excel_data_new_3(self, file, sheet_index=0):
        import xlwings as xw
        from openpyxl.utils import get_column_letter

        data = list()
        wb = xw.Book(file)
        sht = wb.sheets[sheet_index]

        rng = sht.range('a1').expand('table')
        tot_rows = rng.rows.count
        print(tot_rows)
        tot_ncols = rng.columns.count
        print(tot_ncols)

        # print(get_column_letter(1))  # A
        # print(get_column_letter(267))  # JG          # 推薦這樣獲取 tot_ncols 【1-N】

        # openpyxl.utils.get_column_letter -> 會把數字轉化爲字母。
        # openpyxl.utils.column_index_from_string -> 會把字母轉化爲數字。

        # chr_ele = chr(97 + tot_ncols)       # tot_ncols 小於等於25 【0-25】
        # print(chr_ele)

        for a in range(1, tot_rows + 1):
            # value = sht.range('A{shu}:{lie}{shu}'.format(shu=a, lie=chr_ele[tot_ncols - 1])).value
            value = sht.range('A{shu}:{lie}{shu}'.format(shu=a, lie=get_column_letter(tot_ncols))).value

            data.append(value)

        print(data)

一種有缺陷的思路

需求:將Excel轉CSV
思路:快速獲取Excel的內容,再使用 pandas 的 to_csv() 保存;

openpyxl的 Worksheet.columns 方法不能在只讀模式下使用。

    def delete_header(self, data_list):
        """
        返回 非表頭的所有數據,表頭
        :param data_list: 
        :return: 
        """
        temp = data_list[0]
        data_list = [d for d in data_list if d != temp]

        return data_list, temp

    def get_excel_data_4(self, file):
        """
        讀取的Excel 所有sheet的表頭一樣
        :param file:
        :return:
        """
        from openpyxl import load_workbook
        import pandas as pd
        import xlrd

        excel = xlrd.open_workbook(file)
        the_sheet = excel.sheet_by_index(0)
        tot_ncols = the_sheet.ncols

        excel = load_workbook(file, read_only=True)
        sh = excel.sheetnames

        all_data = [list() for abc in range(tot_ncols)]
        for a in sh:
            print('當前sheet Name是 {}'.format(a))
            sheet = excel.get_sheet_by_name(a)

            for i in sheet.values:

                for d in range(tot_ncols):
                    all_data[d].append(i[d])

        data = [self.delete_header(e)[0] for e in all_data]
        # print(data)
        header = [self.delete_header(e)[1] for e in all_data]
        # print(header)
        data_dict = dict()
        for f in range(len(header)):
            data_dict[header[f]] = data[f]

        print(data_dict)

        data_df = pd.DataFrame(data_dict)
        new_time = time.strftime("_%H%M%S")
        new_file = file.replace('.xlsx', ''.join([new_time, '.csv']))

        data_df.to_csv(new_file, index=False)

缺陷:用pandas的 to_csv(),去生成CSV文件,表頭若是有重複字段,在生成data_dict時候,update(),會把前面的 key_value 覆蓋掉;
【我的Excel文件 確實是有 2個表頭字段是一樣的】;

獲取Excel的sheet總數量 和 sheet_name

    def read_excel_sheetname(self, file):
        """
        獲取 Excel的sheet數量 和每個sheet_Name
        :param file:
        :return:
        """
        from openpyxl import load_workbook
        import xlwings as xw
        import xlrd
        import pandas as pd

        excel = load_workbook(file, read_only=True, keep_vba=False)
        chang = len(excel.sheetnames)
        print(excel.sheetnames)
        print(chang, 'zyooooxie_csdn')

        excel = xw.Book(file)           # 事先打開了 xlsx 文件
        chang = len(excel.sheets)
        print([s.name for s in excel.sheets])
        print(chang, 'zyooooxie_csdn')
        # excel.close()

        excel = xlrd.open_workbook(file)
        chang = len(excel.sheet_names())
        print(excel.sheet_names())
        print(chang, 'zyooooxie_csdn')

        excel = xlrd.open_workbook(file)
        all_sheets = excel.sheets()
        chang = len(all_sheets)
        print([a.name for a in all_sheets])
        print(chang, 'zyooooxie_csdn')

        excel = pd.ExcelFile(file)
        chang = len(excel.sheet_names)
        print(excel.sheet_names)
        print(chang, 'zyooooxie_csdn')

這些方法 我並沒 對大數據量Excel做實踐,若要對大數量的有興趣,可以看下 我寫的第一期;

交流技術 歡迎+QQ 153132336 zy
個人博客 https://blog.csdn.net/zyooooxie

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章