《Python數據分析基礎 Clinton W.Brownley》第三章最後一段代碼,修正版

#!/usrl/local/bin/python3
# -*- coding: utf8 -*-


import pandas as pd
import glob
import os
import sys

input_path = sys.argv[1]
output_path = sys.argv[2]

all_workbooks = glob.glob(os.path.join(input_path, 's*.xls*'))
data_frames = []

for workbook in all_workbooks:
    all_worksheets = pd.read_excel(workbook, sheet_name=None, index_col=None)
    workbook_total_sales = []
    workbook_number_of_sales = []
    worksheet_data_frames = []
    worksheets_data_frame = None
    workbook_data_frame = None

    for worksheet_name, data in all_worksheets.items():
        #print("1.", worksheet_name)
        total_sales = data.loc[:, 'Sale Amount'].sum()
        number_of_sales = len(data.loc[:, 'Sale Amount'])
        average_sales = '%.2f' % (total_sales / number_of_sales)
        workbook_total_sales.append(total_sales)
        workbook_number_of_sales.append(number_of_sales)

        data = {'workbook': os.path.basename(workbook),
                'worksheet': worksheet_name,
                'worksheet_total': total_sales,
                'worksheet_average': average_sales}

        worksheet_data_frames.append(pd.DataFrame(data, \
                                                  columns=['workbook',\
                                                           'worksheet',\
                                                           'worksheet_total',\
                                                           'worksheet_average'],
                                                  index=[890]))
        worksheets_data_frame = pd.concat(worksheet_data_frames, axis=0, ignore_index=True)
    #print("2=", worksheets_data_frame)

    workbook_total = pd.DataFrame(workbook_total_sales).sum()
    workbook_total_number_of_sales = pd.DataFrame(workbook_number_of_sales).sum()
    workbook_average = pd.DataFrame(workbook_total / workbook_total_number_of_sales)

    workbook_stats = {'workbook': os.path.basename(workbook),
                       'workbook_total': str(workbook_total),
                       'workbook_average': str(workbook_average)}
    workbook_stats = pd.DataFrame(workbook_stats, \
                                      columns=['workbook', 'workbook_total', 'workbook_average'],\
                                      index=[1])

    workbook_data_frame = pd.merge(worksheets_data_frame, workbook_stats, on='workbook', how='left')
    data_frames.append(workbook_data_frame)

#print(workbook_data_frame)

all_data_concatenated = pd.concat(data_frames, axis=0, ignore_index=True)

writer = pd.ExcelWriter(output_path)

all_data_concatenated.to_excel(writer, sheet_name='sums_and_averages', index=False)
writer.save()
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章