文章目錄

需求

excel報表自動彙總。

現狀

解決方案

1.基於Python的xlwings包，完成報表數據轉換、換算、彙總。
2.使用PyInstaller將代碼打包，降低使用門坎。

環境 & 工具

Win7
Excel
Anaconda3

xlwings 0.16.0
pandas 0.23.3
PyInstaller 3.6

主函數代碼流程圖

代碼

# -*- conding: utf-8 _*_
'''
autoGP.py
/ 假設Metrics爲模版輸出
'''

from xlwings import constants
import xlwings as xw
import pandas as pd
import datetime
import time
import os


now = lambda: time.perf_counter()
PATH = r'\Desktop\Metrics'


def check(strDat):
    try:
        # 檢查輸入爲整數
        int(strDat)
        # 檢查輸入是否爲：年月
        datetime.datetime.strptime(strDat, '%Y%m')
    except (TypeError, ValueError):
        raise Exception('請按要求輸入,如當前爲5月，則輸入:202005')
    else:
        return strDat

def getM(dat):
    return datetime.datetime.strptime(dat, '%Y%m').strftime('%b')

def getFil(dat):
    # gp sz+hk
    lis = [f for f in os.listdir(PATH) 
           if 'P4P+KA GP_' + dat == f.split('.')[0]]
    ## 檢查
    if len(lis) == 0 or (len(lis) == 1 and 'P4P+KA GP' not in lis[0]):
        raise FileNotFoundError('指定路徑下："P4P+KA GP_%s.xlsx"文件不存在\
                                \n補充文件後重新運行。' % dat)
    # gp sz, hk
    fil = [f for l in os.walk(PATH) for f in l[2]
           if 'GP Analysis-' + dat == f.split('.')[0]
           or 'P4P & KA Schedules ' + dat + ' - Janice' == f.split('.')[0]]
    ## check
    if len(fil) < 2:
        raise FileNotFoundError('指定路徑下："GP Analysis-%s.xlsx"\
                                或"P4P & KA Schedules %s"文件不存在\
                                \n補充文件後重新運行。' % (dat,dat))
    ##
    lis += fil
    # book rate
    cnt = 0
    while not (('Ex rate ' 
           + (datetime.datetime.strptime(dat, '%Y%m')
              + datetime.timedelta(cnt)).strftime('%m-%Y') 
           + '_book rate.xlsx') in os.listdir(list(os.walk(PATH))[1][0])):
        cnt += 20
        ## 檢查
        if cnt == 200:
            raise FileNotFoundError("指定路徑下沒找到 'book rate' 文件.\
                                    補充文件後重新運行。")
    name = ('Ex rate ' 
       + (datetime.datetime.strptime(dat, '%Y%m')
          + datetime.timedelta(cnt)).strftime('%m-%Y') 
       + '_book rate.xlsx')
    lis.append(name)
    # daily ring ratio
    lis_1 = [f for f in os.listdir(list(os.walk(PATH))[1][0]) 
             if '日環比' in f and '~' not in f]
    ## 檢查
    if len(lis_1) == 0:
        raise FileNotFoundError('指定路徑下沒找到 *日環比* 文件.\
                                補充文件後重新運行')
    lis_1 = sorted(lis_1
                   , key=lambda x: os.path.getmtime(os.path.join(
                       list(os.walk(PATH))[1][0], x))
                   , reverse=True)
    lis.append(lis_1[0])
    return lis

def getSZ(wb, dat):
    sht = wb.sheets[dat]
    cntR = sht[0, 0].current_region.rows.count
    # 參數檢查
    if '賬戶名稱' not in sht['A1:AV1'].value:
        wb.close()
        raise ValueError('SZ GP表中 *賬戶名稱* 不存在或錯誤，\
                         請檢查excel後重新運行。')
    #
    df = pd.DataFrame(sht['A2:AV' + str(cntR)].value
                      , columns=sht['A1:AV1'].value)
    # 去重
    df = df.groupby(['賬戶名稱']).sum()
    df.reset_index(inplace=True)
    return df

def getHK(wb, dat):
    sht = wb.sheets['P4P ' + dat]
    cntR = sht[4, 0].current_region.rows.count
    cntC = sht[4, 0].current_region.columns.count
    # 參數檢查
    if '用戶名' not in sht[4, :cntC].value:
        wb.close()
        raise ValueError("HK GP表中 *用戶名* 不存在，請檢查excel後重新動行。")
    #
    df1 = pd.DataFrame(sht[5:cntR+4, :cntC].value
                      , columns=sht[4, :cntC].value)
    # 去重
    df1 = df1.groupby(['用戶名']).sum()
    df1.reset_index(inplace=True)
    return df1

def result(sz, hk, f, strDat):
    '''
    將sz, hk的gp彙總寫入指定表單P4P

    Parameters
    ----------
    sz : TYPE
        DESCRIPTION.
    hk : TYPE
        DESCRIPTION.
    f : TYPE
        DESCRIPTION.
    strDat : TYPE
        DESCRIPTION.

    Raises
    ------
    ValueError
        DESCRIPTION.檢查表頭是否符合規範

    Returns
    -------
    None.

    '''
    try:
        wb = xw.books(f)
        sht = wb.sheets['P4P']
        cntR = sht[1, 0].current_region.rows.count
        cntC = sht[1, 0].current_region.columns.count
        # 獲取表頭
        lis = sht[1, :cntC].value
        col = lis.index('用戶名') + 1
        # target file
        df2 = pd.DataFrame(sht[2:cntR, :col].value
                           , columns=sht[1, :col].value)
        # merge sz
        df3 = pd.merge(df2, sz[['賬戶名稱', 'p4p spending /1.06', 'GP_']]
                       ,  left_on='用戶名', right_on='賬戶名稱', how='left')
        # merge hk
        df4 = pd.merge(df3, hk[['用戶名', "+HK Sales(RMB)", 'HK GP(RMB)']]
                        , on='用戶名', how='left')
        df4.fillna(0, inplace=True)
        # write to
        ## 參數檢查 （表頭）
        header = ['Revenue ' + getM(strDat) + ' HK', 
                  'Revenue ' + getM(strDat) + ' SZ',
                  'GP ' + getM(strDat) + ' HK',
                  'GP ' + getM(strDat) + ' SZ']
        if header[0] not in lis:
            raise ValueError('檢查[%s]表頭應爲： %s' % (wb.name, header[0]))
        if header[1] not in lis:
            raise ValueError('檢查[%s]表頭應爲： %s' % (wb.name, header[1]))
        if header[2] not in lis:
            raise ValueError('檢查[%s]表頭應爲： %s' % (wb.name, header[2]))
        if header[3] not in lis:
            raise ValueError('檢查[%s]表頭應爲： %s' % (wb.name, header[3]))
        ## 賦值
        sht[2, lis.index('Revenue ' + getM(strDat) + ' HK')
            ].options(transpose=True).value = df4['+HK Sales(RMB)'].values
        sht[2, lis.index('Revenue ' + getM(strDat) + ' SZ')
            ].options(transpose=True).value = df4['p4p spending /1.06'].values
        sht[2, lis.index('GP ' + getM(strDat) + ' HK')
            ].options(transpose=True).value = df4['HK GP(RMB)'].values
        sht[2, lis.index('GP ' + getM(strDat) + ' SZ')
            ].options(transpose=True).value = df4['GP_'].values
        # region
        num_region = len(sht['A2:' + 
                             sht['A2'].end(
                                 'down').get_address(False, False)].value) + 1
        num_user = len(sht['C2:' +
                           sht['C2'].end(
                               'down').get_address(False, False)]) + 1
        for r in range(1, num_user - num_region + 1):
            if 'cny' in sht['A' + str(num_region + r)].offset(0, 2).value:
                sht['A' + str(num_region + r)].value = 'SZ'
            else:
                sht['A' + str(num_region + r)].value = 'HK'
        # save
        wb.save()
    except ValueError:
        wb.close()
        raise
    
def calc_sz(wb, strDat):
    '''
    填充公式計算SZ消費 & GP

    # [p4p spending /1.06] 
     = ([Total P4P Spending(Inc 6%VAT)]
      +[ 新產品消費(Inc 6%VAT)]
      + [點擊調整(Inc 6%VAT) ]
      +[ 原生總消費])/1.06
    
    # GP_ = ([p4p spending/1.06]*1.06
     -([Cost（Inc 6% VAT)]
     +[原生信息流Cost（Inc 6% VAT)]
     +[ 原生信息流Rebate&贈送（Inc 6% VAT)]
     +[Rebate&贈送（Inc 6% VAT)]
     +[V Cost(Inc 6% VAT)])/1.06
     
    Parameters
    ----------
    wb : TYPE
        SZ GP excel表
    strDat : TYPE
        輸入的目標月度。如2月，即202002

    Raises
    ------
    ValueError
        參數檢查
        表頭值是否符合規範

    Returns
    -------
    TYPE
        DESCRIPTION.

    '''
    def getAddress(header, value):
        return sht[1, header.index(value)].get_address(False, False)
    
    # 先清除篩選
    sht = wb.sheets[strDat]
    sht.api.AutoFilterMode = False
    # p4p spending /1.06
    header = sht['A1:AT1'].value
    # check 表頭值
    lis = ['Total P4P Spending(Inc 6%VAT)',
        '新產品消費(Inc 6%VAT)',
        '點擊調整(Inc 6%VAT)',
        '原生總消費',
        'Cost（Inc 6% VAT)',
        '原生信息流Cost（Inc 6% VAT)',
        '原生信息流Rebate&贈送（Inc 6% VAT)',
        'Rebate&贈送（Inc 6% VAT)',
        'V Cost(Inc 6% VAT)']
    ## 
    if (lis[0] not in header):
        raise ValueError('請檢查excel表頭值： %s' % lis[0])
    if (lis[1] not in header):
        raise ValueError('請檢查excel表頭值： %s' % lis[1])
    if (lis[2] not in header):
        raise ValueError('請檢查excel表頭值： %s' % lis[2])
    if (lis[3] not in header):
        raise ValueError('請檢查excel表頭值： %s' % lis[3])
    if (lis[4] not in header):
        raise ValueError('請檢查excel表頭值： %s' % lis[4])
    if (lis[5] not in header):
        raise ValueError('請檢查excel表頭值： %s' % lis[5])
    if (lis[6] not in header):
        raise ValueError('請檢查excel表頭值： %s' % lis[6])
    if (lis[7] not in header):
        raise ValueError('請檢查excel表頭值： %s' % lis[7])
    if (lis[8] not in header):
        raise ValueError('請檢查excel表頭值： %s' % lis[8])
    # 換算
    sht[0, len(header)].value = 'p4p spending /1.06'
    sht[1, len(header)].formula = ('=(' + 
                                   getAddress(header,
                                              'Total P4P Spending(Inc 6%VAT)')
                                    + '+' + getAddress(header, 
                                                       '新產品消費(Inc 6%VAT)')
                                    + '+' + getAddress(header, 
                                                       '點擊調整(Inc 6%VAT)')
                                    + '+' + getAddress(header, '原生總消費')
                                    + ')/1.06')
    # GP
    sht[0, len(header) + 1].value = 'GP_'
    sht[1, len(header) + 1].formula = ('=(' + 
                                       sht[1, len(header)
                                           ].get_address(False, False) +
                                       '*1.06-(' +
                                       getAddress(header, 'Cost（Inc 6% VAT)')
                                       + '+' +
                                       getAddress(header, 
                                                  '原生信息流Cost（Inc 6% VAT)')
                                       + '+' +
                                       getAddress(header, 
                                          '原生信息流Rebate&贈送（Inc 6% VAT)')
                                       + '+' + 
                                       getAddress(header,
                                                  'Rebate&贈送（Inc 6% VAT)')
                                       + '+' +
                                       getAddress(header, 
                                                  'V Cost(Inc 6% VAT)') 
                                       + '))/1.06')
    # 填充
    ## 剔除KA
    cntR = sht['A1'].current_region.rows.count
    lis = [i for i in sht[1:cntR, 0].value if isinstance(i, float)]
    sht[1, len(header):len(header)+2].api.AutoFill(
        sht[1:len(lis) + 1, len(header):len(header)+2].api
            , constants.AutoFillType.xlFillCopy)
    # 計算
    wb.app.calculate()

def calc_hk(wb, strDat, rate='1.2'):
    '''
    填充公式，計算HK GP
    

    Parameters
    ----------
    wb : TYPE
        DESCRIPTION.
    strDat : TYPE
        DESCRIPTION.
    rate : TYPE, optional
        DESCRIPTION. The default is '1.2'.

    Raises
    ------
    ValueError
        參數檢查
        檢查表頭值是否符合規範。

    Returns
    -------
    None.

    '''
    # 先清除篩選
    sht = wb.sheets['P4P ' + strDat]
    sht.api.AutoFilterMode = False
    ## 總行數 
    cntR = sht[4, 1].current_region.rows.count + 4
    cntC = sht[4, 1].current_region.columns.count
    # 檢查表頭
    header = sht[4, :cntC].value
    if header[2] != '用戶名':
        raise ValueError("檢查[%s]表頭： %s" % (wb.name, header[2]))
    if header[3] != 'HK Sales':
        raise ValueError("檢查[%s]表頭： %s" % (wb.name, header[3]))
    if header[4] != 'HK Service':
        raise ValueError("檢查[%s]表頭： %s" % (wb.name, header[4]))
    if header[5] != 'HK Media Cost':
        raise ValueError("檢查[%s]表頭： %s" % (wb.name, header[5]))
    if header[6] == 'HK Client Rebate ' or header[6] == 'HK Client Rebate':
        pass
    else:
        raise ValueError("檢查[%s]表頭： %s" % (wb.name, header[6]))
    ## 插入列
    sht.api.Columns(5).Insert()
    sht.api.Columns(5).Insert()
    ## +HK Sales
    sht[4, 4].value = '+HK Sales'
    sht[5, 4].formula = '=-' + sht[5, 3].get_address(False, False)
    ## +HK Sales(RMB)
    sht[4, 5].value = '+HK Sales(RMB)'
    sht[5, 5].formula = '=' + sht[5, 4].get_address(False, False) + '/' + rate
    sht[5, 4:6].api.AutoFill(sht[5:cntR, 4:6].api
                             , constants.AutoFillType.xlFillCopy)
    ## HK Client Rebate
    cntC = sht[4, 1].current_region.columns.count
    ## 
    sht[4, cntC].value = 'Client Rebate(RMB)'
    sht[5, cntC].formula = '=' + sht[5, cntC-1].get_address(False, False
                                                            ) + '/' + rate
    ## HK GP
    header = tuple(map(lambda x: x.strip(), sht[4, :cntC + 1].value))   
    sht[4, cntC + 1].value = 'HK GP'
    sht[5, cntC + 1].formula = ('=' + sht[5, header.index('+HK Sales')
                                         ].get_address(False, False) + '-'
                                + sht[5, header.index('HK Media Cost')
                                      ].get_address(False, False) + '-'
                                + sht[5, header.index('HK Client Rebate')
                                      ].get_address(False, False))
    ## HK GP(RMB)
    sht[4, cntC + 2].value = 'HK GP(RMB)'
    sht[5, cntC + 2].formula = ('=' + sht[5, cntC + 1
                                          ].get_address(False, False) 
                                + '/' + rate)
    ## 向下填充
    sht[5, cntC:cntC + 3].api.AutoFill(sht[5:cntR, cntC:cntC + 3].api,
                                       constants.AutoFillType.xlFillCopy)
    ## 字體
    sht[4, cntC:cntC + 3].api.Font.Size = sht[4, cntC - 1].api.Font.Size
    sht[4, cntC:cntC + 3].api.Font.Bold = True
    sht[4, cntC:cntC + 3].api.Borders(9).LineStyle = 1
    sht[4, cntC:cntC + 3].api.Borders(9).Weight = 3
    sht.autofit()
    # 計算
    wb.app.calculate()

def getRate(wb, strDat):
    '''
    獲取指定月度book rate值

    Parameters
    ----------
    wb : TYPE
        DESCRIPTION.
    strDat : TYPE
        DESCRIPTION.

    Returns
    -------
    TYPE
        DESCRIPTION.

    '''
    # 鎖定表
    # 鎖定目標值位置、取值
    def getValue(shtName):
        sht = wb.sheets[shtName]
        cntR = sht[3, 1].current_region.rows.count
        for n, j in enumerate(sht[3:cntR+3, 1].value):
            if isinstance(j, datetime.datetime):
                j = j.strftime('%Y%m')
                if strDat == j:
                    return str(sht[3 + n, 2].value)
    
    listTable = [i.name for i in wb.sheets]
    # 從最近9個月查找
    for i in range(1, 1000):
        if strDat in listTable:
            return getValue(strDat)
        else:
            intDat = int(strDat)
            intDat += i
            if str(intDat) in listTable:
                return getValue(str(intDat))
            else:
                intDat = int(strDat)
                intDat -= i
                if str(intDat) in listTable:
                    return getValue(str(intDat))
                
def writeTo(strDat):
    '''
    更新彙總表單中的P4P基本信息

    Parameters
    ----------
    strDat : TYPE
        DESCRIPTION.

    Returns
    -------
    TYPE
        DESCRIPTION.

    '''
    def getBasicInfo():
        # 獲取賬戶基本信
        daily_ = [f for f in getFil(strDat) if '日環比' in f]
        wb = xw.books(daily_[0].split('.')[0])
        sht = wb.sheets['P4P消費']
        cntC = sht['A1'].current_region.columns.count
        cntR = sht['A1'].current_region.rows.count
        lis = sht[0, :cntC].value
        # region, user
        region = sht[9:cntR, lis.index('區域')].value
        user = sht[9:cntR, lis.index('用戶名')].value
        port = sht[9:cntR, lis.index('端口')].value
        return region, user, port
    
    def getFilName(region):
        lis_fil = [f for f in getFil(strDat) if region in f]
        wb = xw.books(lis_fil[0].split('.')[0])
        return wb
    
    def dropDuplicates_1():
        # 準備數據
        #
        ## sz
        wb_sz = getFilName('GP Analysis')
        df_sz = getSZ(wb_sz, strDat)
        ### Series
        s_sz = df_sz.loc[(df_sz['p4p spending /1.06']+df_sz['GP_'])!=0
                         , '賬戶名稱']
        #
        ## hk
        wb_hk = getFilName('P4P & KA')
        df_hk = getHK(wb_hk, strDat)
        ### Series
        s_hk = df_hk.loc[(df_hk['+HK Sales(RMB)']+df_hk['HK GP(RMB)'])!=0
                         , '用戶名']
        # 
        b_user = pd.Series(getBasicInfo()[1])
        b_user = b_user.append(s_sz).append(s_hk)
        b_user.drop_duplicates(inplace=True)
        return b_user.values
    
    def addNull(dic):
        # 補空值
        max_ = max([len(v) for k, v in dic.items()])
        for k, v in dic.items():
            if len(v) < max_:
                dic[k] = v + [None] * (max_ - len(v))
            
    wb = xw.books('P4P+KA GP_' + strDat)
    sht = wb.sheets['P4P']
    cntC = sht['A2'].current_region.columns.count
    cntR = sht['A2'].current_region.rows.count
    header = sht[1, :cntC].value
    # region
    # 已有數據
    old_data = pd.DataFrame(sht[2:cntR, header.index('區域'):
                                header.index('用戶名')+1].value
                       , columns=['區域', '端口', '用戶名'])
    # 新增
    dic = {'區域': getBasicInfo()[0],
           '端口': getBasicInfo()[2],
           '用戶名': dropDuplicates_1()}
    addNull(dic)
    # 合併，去重
    old_data = old_data.append(pd.DataFrame(dic)).drop_duplicates('用戶名')
    sht['A3'].value = old_data.values
       
def main():
    '''
    主程序

    Raises
    ------
    Exception
        DESCRIPTION.

    Returns
    -------
    None.

    '''
    try:
        # 輸入，檢查
        strDat = check(input('輸入目標月度(如當前爲5月: 202005)'))
        #
        for n, f in enumerate(getFil(strDat)):
            print(n, f)
            if n == 0:
                if os.path.exists(os.path.join(list(os.walk(PATH))[0][0], f)):
                    xw.Book(os.path.join(list(os.walk(PATH))[0][0], f))
                else:
                    raise FileNotFoundError('文件不存在: %s' % f)
            else:
                if os.path.exists(os.path.join(list(os.walk(PATH))[1][0], f)):
                    xw.Book(os.path.join(list(os.walk(PATH))[1][0], f))
                else:
                    raise FileNotFoundError('文件不存在: %s' % f)
            # 
            if 'GP Analysis' in f:
                wb1 = xw.books(f)
                calc_sz(wb1, strDat)
                #
                data_sz = getSZ(wb1, strDat)
            elif 'P4P & KA Schedules' in f:
                wb = xw.books(f)
                # rate
                ## open book rate file
                bookRate = list(filter(lambda x: 'book rate' in x
                                       , getFil(strDat)))[0]
                xw.Book(os.path.join(list(os.walk(PATH))[1][0], bookRate))
                rate = getRate(xw.books(bookRate.split('.')[0]), strDat)
                # 換算hk gp
                calc_hk(wb, strDat, rate)
                # 
                data_hk = getHK(wb, strDat)
            elif 'P4P+KA' in f:
                target = f
        # update basicInfo
        writeTo(strDat)
        # update spending
        result(data_sz, data_hk, target, strDat)
    except ValueError:
        wb.close()
        wb1.close()
        raise
    except Exception as e:
        print(e)
    else:
        wb.save()
        wb1.save()
        
if __name__ == '__main__':
    st = now()
    main()
    print("程序運行結束，耗時： %s" %(now() - st))
    time.sleep(60)

代碼打包

# Anaconda Prompt

PyInstaller -F autoGP.py

注意事項

1.所有輸入excel必須按模版要求進行整理；
2.要提前對所有輸入的參數進行檢查，以便出現異常時給出友好的提示；
3.準備操作流程指南：

xlwings - 報表自動換算、彙總

文章目錄

需求

現狀

解決方案

環境 & 工具

主函數代碼流程圖

代碼

代碼打包

注意事項

商務統計_8 數值描述度量 - 集中趨勢

商務統計_7 用圖表演示數據 - 定量數據

Java_2 面向對象

小結 pandas 時間序列

xlwings - 報表自動換算、彙總

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結