Python脚本之读写Excel【二】

之前分享过一期 Excel 读写【一】 这儿分享下其他的收获。

一些尝试

openpyxl

A. 普通写入内容 【可写可读模式】

        header = ['ID', 'Product', 'Status', 'Fee', 'Tr updated']
        data = [[(10, 'zyoooo00', '100', 50, '2020-05-16'), (1, 'zyoooo01', '10', 5, '2020-05-15')], [(11, 'zyoooo1', '100', 50, '2020-05-16') , (10, 'zyoooo11', '10', 5, '2020-05-15')], [(12, 'zyoooo2', '100', 50, '2020-05-16'), (10, 'zyoooo0', '10', 5, '2020-05-15')], [(3, 'zyoooo3', '100', 50, '2020-05-16'), (10, 'zyoooo0', '10', 5, '2020-05-15')]]
    def insert_excel_data(self, header, all_data):
        """
        新建Excel 写入内容
        :param header:    header = ['ID', 'Product', 'Status', 'Fee', 'Tr updated']
         :param all_data: [list1, list2, list3, list4]   list1对应第一个sheet的所有数据
        :return:
        """
        import openpyxl
        workbook = openpyxl.Workbook()

        for sh in range(len(all_data)):
            sheet = workbook.create_sheet(index=sh)
            Log.info(sheet)

            for b in range(len(header)):
                sheet.cell(row=1, column=b + 1, value=str(header[b]))

            for a in range(len(all_data[sh])):

                sheet.cell(row=2 + a, column=1, value=str(all_data[sh][a][0]))
                sheet.cell(row=2 + a, column=2, value=str(all_data[sh][a][1]))
                sheet.cell(row=2 + a, column=3, value=str(all_data[sh][a][2]))
                sheet.cell(row=2 + a, column=4, value=str(all_data[sh][a][3]))
                sheet.cell(row=2 + a, column=5, value=str(all_data[sh][a][4]))

            Log.info('当前sheet数据生成成功')

        # 【多出来一个sheet】
        ws = workbook["Sheet"]
        workbook.remove(ws)

        ex_time = time.strftime("%Y%m%d_%H%M%S")
        workbook.save(r'D:\work\gys\test_' + ex_time + '.xlsx')

        print('excel创建成功')
    def insert_excel_data_a(self, header, all_data):
        """
        新建Excel 写入内容
        :param header:    header = ['ID', 'Product', 'Status', 'Fee', 'Tr updated']
         :param all_data: [list1, list2, list3, list4]   list1对应第一个sheet的所有数据
        :return:
        """
        import openpyxl
        workbook = openpyxl.Workbook()

        for sh in range(len(all_data)):
            sheet = workbook.create_sheet(index=sh)
            Log.info(sheet)

            for b in range(len(header)):
                sheet.cell(row=1, column=b + 1, value=str(header[b]))

            for a in range(len(all_data[sh])):

                for h in range(len(header)):
                    sheet.cell(row=2 + a, column=h + 1, value=str(all_data[sh][a][h]))

            Log.info('当前sheet数据生成成功')

        # 【多出来一个sheet】
        ws = workbook["Sheet"]
        workbook.remove(ws)

        ex_time = time.strftime("%Y%m%d_%H%M%S")
        workbook.save(r'D:\work\gys\test_' + ex_time + '.xlsx')

        print('excel创建成功')

B.只写模式

其实这个也是比较推荐的; 但我电脑配置没那么好(一跑起来,内存要用近90%),所以最推荐的是 前面的 insert_excel_new()

    def insert_excel(self, all_data, header):
        """
        新建Excel 写入内容 【只写模式】
        :param header:    header = ['ID', 'Product', 'Status', 'Fee', 'Tr updated']
         :param all_data: [list1, list2, list3, list4]   list1对应第一个sheet的所有数据
        :return:
        """
        import openpyxl
        workbook = openpyxl.Workbook(write_only=True)
        for sh in range(len(all_data)):
            sheet = workbook.create_sheet(index=sh)
            Log.info(sheet)

            test_list = all_data[sh]
            test_list.insert(0, header)

            for b in range(len(test_list)):
                sheet.append(test_list[b])
            Log.info('当前sheet数据生成成功')

        ex_time = time.strftime("_%Y%m%d_%H%M%S")
        workbook.save(r'D:\work\gys\test_' + ex_time + '.xlsx')

        print('excel创建成功')

pandas

    def get_excel_data_new_1(self, file, sheet_index=0):
        """
        获取excel
        :param file: 
        :param sheet_index: 序列
        :return: 
        """
        import numpy as np
        import pandas as pd

        data_old = pd.read_excel(file, sheet_name=sheet_index)

        data = np.array(data_old).tolist()

        print(data)
    def get_excel_data_new_2(self, file, sheet_name):
        """
        获取excel
        :param file: 
        :param sheet_name: sheet name 
        :return: 
        """
        import numpy as np
        import pandas as pd
        df = pd.read_excel(file, sheet_name=None)

        df1 = df[sheet_name]
        data = np.array(df1).tolist()

        print(data)

xlwings

xlwings 好像有时候需要 打开某个Excel文件。
【下方代码 若打开此Excel 不报错;若没打开,就报错 ‘pywintypes.com_error: (-2147221164, ‘没有注册类’, None, None)‘】

    def get_excel_data_new_3(self, file, sheet_index=0):
        import xlwings as xw
        from openpyxl.utils import get_column_letter

        data = list()
        wb = xw.Book(file)
        sht = wb.sheets[sheet_index]

        rng = sht.range('a1').expand('table')
        tot_rows = rng.rows.count
        print(tot_rows)
        tot_ncols = rng.columns.count
        print(tot_ncols)

        # print(get_column_letter(1))  # A
        # print(get_column_letter(267))  # JG          # 推荐这样获取 tot_ncols 【1-N】

        # openpyxl.utils.get_column_letter -> 会把数字转化为字母。
        # openpyxl.utils.column_index_from_string -> 会把字母转化为数字。

        # chr_ele = chr(97 + tot_ncols)       # tot_ncols 小于等于25 【0-25】
        # print(chr_ele)

        for a in range(1, tot_rows + 1):
            # value = sht.range('A{shu}:{lie}{shu}'.format(shu=a, lie=chr_ele[tot_ncols - 1])).value
            value = sht.range('A{shu}:{lie}{shu}'.format(shu=a, lie=get_column_letter(tot_ncols))).value

            data.append(value)

        print(data)

一种有缺陷的思路

需求:将Excel转CSV
思路:快速获取Excel的内容,再使用 pandas 的 to_csv() 保存;

openpyxl的 Worksheet.columns 方法不能在只读模式下使用。

    def delete_header(self, data_list):
        """
        返回 非表头的所有数据,表头
        :param data_list: 
        :return: 
        """
        temp = data_list[0]
        data_list = [d for d in data_list if d != temp]

        return data_list, temp

    def get_excel_data_4(self, file):
        """
        读取的Excel 所有sheet的表头一样
        :param file:
        :return:
        """
        from openpyxl import load_workbook
        import pandas as pd
        import xlrd

        excel = xlrd.open_workbook(file)
        the_sheet = excel.sheet_by_index(0)
        tot_ncols = the_sheet.ncols

        excel = load_workbook(file, read_only=True)
        sh = excel.sheetnames

        all_data = [list() for abc in range(tot_ncols)]
        for a in sh:
            print('当前sheet Name是 {}'.format(a))
            sheet = excel.get_sheet_by_name(a)

            for i in sheet.values:

                for d in range(tot_ncols):
                    all_data[d].append(i[d])

        data = [self.delete_header(e)[0] for e in all_data]
        # print(data)
        header = [self.delete_header(e)[1] for e in all_data]
        # print(header)
        data_dict = dict()
        for f in range(len(header)):
            data_dict[header[f]] = data[f]

        print(data_dict)

        data_df = pd.DataFrame(data_dict)
        new_time = time.strftime("_%H%M%S")
        new_file = file.replace('.xlsx', ''.join([new_time, '.csv']))

        data_df.to_csv(new_file, index=False)

缺陷:用pandas的 to_csv(),去生成CSV文件,表头若是有重复字段,在生成data_dict时候,update(),会把前面的 key_value 覆盖掉;
【我的Excel文件 确实是有 2个表头字段是一样的】;

获取Excel的sheet总数量 和 sheet_name

    def read_excel_sheetname(self, file):
        """
        获取 Excel的sheet数量 和每个sheet_Name
        :param file:
        :return:
        """
        from openpyxl import load_workbook
        import xlwings as xw
        import xlrd
        import pandas as pd

        excel = load_workbook(file, read_only=True, keep_vba=False)
        chang = len(excel.sheetnames)
        print(excel.sheetnames)
        print(chang, 'zyooooxie_csdn')

        excel = xw.Book(file)           # 事先打开了 xlsx 文件
        chang = len(excel.sheets)
        print([s.name for s in excel.sheets])
        print(chang, 'zyooooxie_csdn')
        # excel.close()

        excel = xlrd.open_workbook(file)
        chang = len(excel.sheet_names())
        print(excel.sheet_names())
        print(chang, 'zyooooxie_csdn')

        excel = xlrd.open_workbook(file)
        all_sheets = excel.sheets()
        chang = len(all_sheets)
        print([a.name for a in all_sheets])
        print(chang, 'zyooooxie_csdn')

        excel = pd.ExcelFile(file)
        chang = len(excel.sheet_names)
        print(excel.sheet_names)
        print(chang, 'zyooooxie_csdn')

这些方法 我并没 对大数据量Excel做实践,若要对大数量的有兴趣,可以看下 我写的第一期;

交流技术 欢迎+QQ 153132336 zy
个人博客 https://blog.csdn.net/zyooooxie

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章