需求
excel報表自動彙總。
現狀
解決方案
1.基於Python的xlwings包,完成報表數據轉換、換算、彙總。
2.使用PyInstaller將代碼打包,降低使用門坎。
環境 & 工具
Win7
Excel
Anaconda3
- xlwings 0.16.0
- pandas 0.23.3
- PyInstaller 3.6
主函數代碼流程圖
代碼
# -*- conding: utf-8 _*_
'''
autoGP.py
/ 假設Metrics爲模版輸出
'''
from xlwings import constants
import xlwings as xw
import pandas as pd
import datetime
import time
import os
now = lambda: time.perf_counter()
PATH = r'\Desktop\Metrics'
def check(strDat):
try:
# 檢查輸入爲整數
int(strDat)
# 檢查輸入是否爲:年月
datetime.datetime.strptime(strDat, '%Y%m')
except (TypeError, ValueError):
raise Exception('請按要求輸入,如當前爲5月,則輸入:202005')
else:
return strDat
def getM(dat):
return datetime.datetime.strptime(dat, '%Y%m').strftime('%b')
def getFil(dat):
# gp sz+hk
lis = [f for f in os.listdir(PATH)
if 'P4P+KA GP_' + dat == f.split('.')[0]]
## 檢查
if len(lis) == 0 or (len(lis) == 1 and 'P4P+KA GP' not in lis[0]):
raise FileNotFoundError('指定路徑下:"P4P+KA GP_%s.xlsx"文件不存在\
\n補充文件後重新運行。' % dat)
# gp sz, hk
fil = [f for l in os.walk(PATH) for f in l[2]
if 'GP Analysis-' + dat == f.split('.')[0]
or 'P4P & KA Schedules ' + dat + ' - Janice' == f.split('.')[0]]
## check
if len(fil) < 2:
raise FileNotFoundError('指定路徑下:"GP Analysis-%s.xlsx"\
或"P4P & KA Schedules %s"文件不存在\
\n補充文件後重新運行。' % (dat,dat))
##
lis += fil
# book rate
cnt = 0
while not (('Ex rate '
+ (datetime.datetime.strptime(dat, '%Y%m')
+ datetime.timedelta(cnt)).strftime('%m-%Y')
+ '_book rate.xlsx') in os.listdir(list(os.walk(PATH))[1][0])):
cnt += 20
## 檢查
if cnt == 200:
raise FileNotFoundError("指定路徑下沒找到 'book rate' 文件.\
補充文件後重新運行。")
name = ('Ex rate '
+ (datetime.datetime.strptime(dat, '%Y%m')
+ datetime.timedelta(cnt)).strftime('%m-%Y')
+ '_book rate.xlsx')
lis.append(name)
# daily ring ratio
lis_1 = [f for f in os.listdir(list(os.walk(PATH))[1][0])
if '日環比' in f and '~' not in f]
## 檢查
if len(lis_1) == 0:
raise FileNotFoundError('指定路徑下沒找到 *日環比* 文件.\
補充文件後重新運行')
lis_1 = sorted(lis_1
, key=lambda x: os.path.getmtime(os.path.join(
list(os.walk(PATH))[1][0], x))
, reverse=True)
lis.append(lis_1[0])
return lis
def getSZ(wb, dat):
sht = wb.sheets[dat]
cntR = sht[0, 0].current_region.rows.count
# 參數檢查
if '賬戶名稱' not in sht['A1:AV1'].value:
wb.close()
raise ValueError('SZ GP表中 *賬戶名稱* 不存在或錯誤,\
請檢查excel後重新運行。')
#
df = pd.DataFrame(sht['A2:AV' + str(cntR)].value
, columns=sht['A1:AV1'].value)
# 去重
df = df.groupby(['賬戶名稱']).sum()
df.reset_index(inplace=True)
return df
def getHK(wb, dat):
sht = wb.sheets['P4P ' + dat]
cntR = sht[4, 0].current_region.rows.count
cntC = sht[4, 0].current_region.columns.count
# 參數檢查
if '用戶名' not in sht[4, :cntC].value:
wb.close()
raise ValueError("HK GP表中 *用戶名* 不存在,請檢查excel後重新動行。")
#
df1 = pd.DataFrame(sht[5:cntR+4, :cntC].value
, columns=sht[4, :cntC].value)
# 去重
df1 = df1.groupby(['用戶名']).sum()
df1.reset_index(inplace=True)
return df1
def result(sz, hk, f, strDat):
'''
將sz, hk的gp彙總寫入指定表單P4P
Parameters
----------
sz : TYPE
DESCRIPTION.
hk : TYPE
DESCRIPTION.
f : TYPE
DESCRIPTION.
strDat : TYPE
DESCRIPTION.
Raises
------
ValueError
DESCRIPTION.檢查表頭是否符合規範
Returns
-------
None.
'''
try:
wb = xw.books(f)
sht = wb.sheets['P4P']
cntR = sht[1, 0].current_region.rows.count
cntC = sht[1, 0].current_region.columns.count
# 獲取表頭
lis = sht[1, :cntC].value
col = lis.index('用戶名') + 1
# target file
df2 = pd.DataFrame(sht[2:cntR, :col].value
, columns=sht[1, :col].value)
# merge sz
df3 = pd.merge(df2, sz[['賬戶名稱', 'p4p spending /1.06', 'GP_']]
, left_on='用戶名', right_on='賬戶名稱', how='left')
# merge hk
df4 = pd.merge(df3, hk[['用戶名', "+HK Sales(RMB)", 'HK GP(RMB)']]
, on='用戶名', how='left')
df4.fillna(0, inplace=True)
# write to
## 參數檢查 (表頭)
header = ['Revenue ' + getM(strDat) + ' HK',
'Revenue ' + getM(strDat) + ' SZ',
'GP ' + getM(strDat) + ' HK',
'GP ' + getM(strDat) + ' SZ']
if header[0] not in lis:
raise ValueError('檢查[%s]表頭應爲: %s' % (wb.name, header[0]))
if header[1] not in lis:
raise ValueError('檢查[%s]表頭應爲: %s' % (wb.name, header[1]))
if header[2] not in lis:
raise ValueError('檢查[%s]表頭應爲: %s' % (wb.name, header[2]))
if header[3] not in lis:
raise ValueError('檢查[%s]表頭應爲: %s' % (wb.name, header[3]))
## 賦值
sht[2, lis.index('Revenue ' + getM(strDat) + ' HK')
].options(transpose=True).value = df4['+HK Sales(RMB)'].values
sht[2, lis.index('Revenue ' + getM(strDat) + ' SZ')
].options(transpose=True).value = df4['p4p spending /1.06'].values
sht[2, lis.index('GP ' + getM(strDat) + ' HK')
].options(transpose=True).value = df4['HK GP(RMB)'].values
sht[2, lis.index('GP ' + getM(strDat) + ' SZ')
].options(transpose=True).value = df4['GP_'].values
# region
num_region = len(sht['A2:' +
sht['A2'].end(
'down').get_address(False, False)].value) + 1
num_user = len(sht['C2:' +
sht['C2'].end(
'down').get_address(False, False)]) + 1
for r in range(1, num_user - num_region + 1):
if 'cny' in sht['A' + str(num_region + r)].offset(0, 2).value:
sht['A' + str(num_region + r)].value = 'SZ'
else:
sht['A' + str(num_region + r)].value = 'HK'
# save
wb.save()
except ValueError:
wb.close()
raise
def calc_sz(wb, strDat):
'''
填充公式計算SZ消費 & GP
# [p4p spending /1.06]
= ([Total P4P Spending(Inc 6%VAT)]
+[ 新產品消費(Inc 6%VAT)]
+ [點擊調整(Inc 6%VAT) ]
+[ 原生總消費])/1.06
# GP_ = ([p4p spending/1.06]*1.06
-([Cost(Inc 6% VAT)]
+[原生信息流Cost(Inc 6% VAT)]
+[ 原生信息流Rebate&贈送(Inc 6% VAT)]
+[Rebate&贈送(Inc 6% VAT)]
+[V Cost(Inc 6% VAT)])/1.06
Parameters
----------
wb : TYPE
SZ GP excel表
strDat : TYPE
輸入的目標月度。如2月,即202002
Raises
------
ValueError
參數檢查
表頭值是否符合規範
Returns
-------
TYPE
DESCRIPTION.
'''
def getAddress(header, value):
return sht[1, header.index(value)].get_address(False, False)
# 先清除篩選
sht = wb.sheets[strDat]
sht.api.AutoFilterMode = False
# p4p spending /1.06
header = sht['A1:AT1'].value
# check 表頭值
lis = ['Total P4P Spending(Inc 6%VAT)',
'新產品消費(Inc 6%VAT)',
'點擊調整(Inc 6%VAT)',
'原生總消費',
'Cost(Inc 6% VAT)',
'原生信息流Cost(Inc 6% VAT)',
'原生信息流Rebate&贈送(Inc 6% VAT)',
'Rebate&贈送(Inc 6% VAT)',
'V Cost(Inc 6% VAT)']
##
if (lis[0] not in header):
raise ValueError('請檢查excel表頭值: %s' % lis[0])
if (lis[1] not in header):
raise ValueError('請檢查excel表頭值: %s' % lis[1])
if (lis[2] not in header):
raise ValueError('請檢查excel表頭值: %s' % lis[2])
if (lis[3] not in header):
raise ValueError('請檢查excel表頭值: %s' % lis[3])
if (lis[4] not in header):
raise ValueError('請檢查excel表頭值: %s' % lis[4])
if (lis[5] not in header):
raise ValueError('請檢查excel表頭值: %s' % lis[5])
if (lis[6] not in header):
raise ValueError('請檢查excel表頭值: %s' % lis[6])
if (lis[7] not in header):
raise ValueError('請檢查excel表頭值: %s' % lis[7])
if (lis[8] not in header):
raise ValueError('請檢查excel表頭值: %s' % lis[8])
# 換算
sht[0, len(header)].value = 'p4p spending /1.06'
sht[1, len(header)].formula = ('=(' +
getAddress(header,
'Total P4P Spending(Inc 6%VAT)')
+ '+' + getAddress(header,
'新產品消費(Inc 6%VAT)')
+ '+' + getAddress(header,
'點擊調整(Inc 6%VAT)')
+ '+' + getAddress(header, '原生總消費')
+ ')/1.06')
# GP
sht[0, len(header) + 1].value = 'GP_'
sht[1, len(header) + 1].formula = ('=(' +
sht[1, len(header)
].get_address(False, False) +
'*1.06-(' +
getAddress(header, 'Cost(Inc 6% VAT)')
+ '+' +
getAddress(header,
'原生信息流Cost(Inc 6% VAT)')
+ '+' +
getAddress(header,
'原生信息流Rebate&贈送(Inc 6% VAT)')
+ '+' +
getAddress(header,
'Rebate&贈送(Inc 6% VAT)')
+ '+' +
getAddress(header,
'V Cost(Inc 6% VAT)')
+ '))/1.06')
# 填充
## 剔除KA
cntR = sht['A1'].current_region.rows.count
lis = [i for i in sht[1:cntR, 0].value if isinstance(i, float)]
sht[1, len(header):len(header)+2].api.AutoFill(
sht[1:len(lis) + 1, len(header):len(header)+2].api
, constants.AutoFillType.xlFillCopy)
# 計算
wb.app.calculate()
def calc_hk(wb, strDat, rate='1.2'):
'''
填充公式,計算HK GP
Parameters
----------
wb : TYPE
DESCRIPTION.
strDat : TYPE
DESCRIPTION.
rate : TYPE, optional
DESCRIPTION. The default is '1.2'.
Raises
------
ValueError
參數檢查
檢查表頭值是否符合規範。
Returns
-------
None.
'''
# 先清除篩選
sht = wb.sheets['P4P ' + strDat]
sht.api.AutoFilterMode = False
## 總行數
cntR = sht[4, 1].current_region.rows.count + 4
cntC = sht[4, 1].current_region.columns.count
# 檢查表頭
header = sht[4, :cntC].value
if header[2] != '用戶名':
raise ValueError("檢查[%s]表頭: %s" % (wb.name, header[2]))
if header[3] != 'HK Sales':
raise ValueError("檢查[%s]表頭: %s" % (wb.name, header[3]))
if header[4] != 'HK Service':
raise ValueError("檢查[%s]表頭: %s" % (wb.name, header[4]))
if header[5] != 'HK Media Cost':
raise ValueError("檢查[%s]表頭: %s" % (wb.name, header[5]))
if header[6] == 'HK Client Rebate ' or header[6] == 'HK Client Rebate':
pass
else:
raise ValueError("檢查[%s]表頭: %s" % (wb.name, header[6]))
## 插入列
sht.api.Columns(5).Insert()
sht.api.Columns(5).Insert()
## +HK Sales
sht[4, 4].value = '+HK Sales'
sht[5, 4].formula = '=-' + sht[5, 3].get_address(False, False)
## +HK Sales(RMB)
sht[4, 5].value = '+HK Sales(RMB)'
sht[5, 5].formula = '=' + sht[5, 4].get_address(False, False) + '/' + rate
sht[5, 4:6].api.AutoFill(sht[5:cntR, 4:6].api
, constants.AutoFillType.xlFillCopy)
## HK Client Rebate
cntC = sht[4, 1].current_region.columns.count
##
sht[4, cntC].value = 'Client Rebate(RMB)'
sht[5, cntC].formula = '=' + sht[5, cntC-1].get_address(False, False
) + '/' + rate
## HK GP
header = tuple(map(lambda x: x.strip(), sht[4, :cntC + 1].value))
sht[4, cntC + 1].value = 'HK GP'
sht[5, cntC + 1].formula = ('=' + sht[5, header.index('+HK Sales')
].get_address(False, False) + '-'
+ sht[5, header.index('HK Media Cost')
].get_address(False, False) + '-'
+ sht[5, header.index('HK Client Rebate')
].get_address(False, False))
## HK GP(RMB)
sht[4, cntC + 2].value = 'HK GP(RMB)'
sht[5, cntC + 2].formula = ('=' + sht[5, cntC + 1
].get_address(False, False)
+ '/' + rate)
## 向下填充
sht[5, cntC:cntC + 3].api.AutoFill(sht[5:cntR, cntC:cntC + 3].api,
constants.AutoFillType.xlFillCopy)
## 字體
sht[4, cntC:cntC + 3].api.Font.Size = sht[4, cntC - 1].api.Font.Size
sht[4, cntC:cntC + 3].api.Font.Bold = True
sht[4, cntC:cntC + 3].api.Borders(9).LineStyle = 1
sht[4, cntC:cntC + 3].api.Borders(9).Weight = 3
sht.autofit()
# 計算
wb.app.calculate()
def getRate(wb, strDat):
'''
獲取指定月度book rate值
Parameters
----------
wb : TYPE
DESCRIPTION.
strDat : TYPE
DESCRIPTION.
Returns
-------
TYPE
DESCRIPTION.
'''
# 鎖定表
# 鎖定目標值位置、取值
def getValue(shtName):
sht = wb.sheets[shtName]
cntR = sht[3, 1].current_region.rows.count
for n, j in enumerate(sht[3:cntR+3, 1].value):
if isinstance(j, datetime.datetime):
j = j.strftime('%Y%m')
if strDat == j:
return str(sht[3 + n, 2].value)
listTable = [i.name for i in wb.sheets]
# 從最近9個月查找
for i in range(1, 1000):
if strDat in listTable:
return getValue(strDat)
else:
intDat = int(strDat)
intDat += i
if str(intDat) in listTable:
return getValue(str(intDat))
else:
intDat = int(strDat)
intDat -= i
if str(intDat) in listTable:
return getValue(str(intDat))
def writeTo(strDat):
'''
更新彙總表單中的P4P基本信息
Parameters
----------
strDat : TYPE
DESCRIPTION.
Returns
-------
TYPE
DESCRIPTION.
'''
def getBasicInfo():
# 獲取賬戶基本信
daily_ = [f for f in getFil(strDat) if '日環比' in f]
wb = xw.books(daily_[0].split('.')[0])
sht = wb.sheets['P4P消費']
cntC = sht['A1'].current_region.columns.count
cntR = sht['A1'].current_region.rows.count
lis = sht[0, :cntC].value
# region, user
region = sht[9:cntR, lis.index('區域')].value
user = sht[9:cntR, lis.index('用戶名')].value
port = sht[9:cntR, lis.index('端口')].value
return region, user, port
def getFilName(region):
lis_fil = [f for f in getFil(strDat) if region in f]
wb = xw.books(lis_fil[0].split('.')[0])
return wb
def dropDuplicates_1():
# 準備數據
#
## sz
wb_sz = getFilName('GP Analysis')
df_sz = getSZ(wb_sz, strDat)
### Series
s_sz = df_sz.loc[(df_sz['p4p spending /1.06']+df_sz['GP_'])!=0
, '賬戶名稱']
#
## hk
wb_hk = getFilName('P4P & KA')
df_hk = getHK(wb_hk, strDat)
### Series
s_hk = df_hk.loc[(df_hk['+HK Sales(RMB)']+df_hk['HK GP(RMB)'])!=0
, '用戶名']
#
b_user = pd.Series(getBasicInfo()[1])
b_user = b_user.append(s_sz).append(s_hk)
b_user.drop_duplicates(inplace=True)
return b_user.values
def addNull(dic):
# 補空值
max_ = max([len(v) for k, v in dic.items()])
for k, v in dic.items():
if len(v) < max_:
dic[k] = v + [None] * (max_ - len(v))
wb = xw.books('P4P+KA GP_' + strDat)
sht = wb.sheets['P4P']
cntC = sht['A2'].current_region.columns.count
cntR = sht['A2'].current_region.rows.count
header = sht[1, :cntC].value
# region
# 已有數據
old_data = pd.DataFrame(sht[2:cntR, header.index('區域'):
header.index('用戶名')+1].value
, columns=['區域', '端口', '用戶名'])
# 新增
dic = {'區域': getBasicInfo()[0],
'端口': getBasicInfo()[2],
'用戶名': dropDuplicates_1()}
addNull(dic)
# 合併,去重
old_data = old_data.append(pd.DataFrame(dic)).drop_duplicates('用戶名')
sht['A3'].value = old_data.values
def main():
'''
主程序
Raises
------
Exception
DESCRIPTION.
Returns
-------
None.
'''
try:
# 輸入,檢查
strDat = check(input('輸入目標月度(如當前爲5月: 202005)'))
#
for n, f in enumerate(getFil(strDat)):
print(n, f)
if n == 0:
if os.path.exists(os.path.join(list(os.walk(PATH))[0][0], f)):
xw.Book(os.path.join(list(os.walk(PATH))[0][0], f))
else:
raise FileNotFoundError('文件不存在: %s' % f)
else:
if os.path.exists(os.path.join(list(os.walk(PATH))[1][0], f)):
xw.Book(os.path.join(list(os.walk(PATH))[1][0], f))
else:
raise FileNotFoundError('文件不存在: %s' % f)
#
if 'GP Analysis' in f:
wb1 = xw.books(f)
calc_sz(wb1, strDat)
#
data_sz = getSZ(wb1, strDat)
elif 'P4P & KA Schedules' in f:
wb = xw.books(f)
# rate
## open book rate file
bookRate = list(filter(lambda x: 'book rate' in x
, getFil(strDat)))[0]
xw.Book(os.path.join(list(os.walk(PATH))[1][0], bookRate))
rate = getRate(xw.books(bookRate.split('.')[0]), strDat)
# 換算hk gp
calc_hk(wb, strDat, rate)
#
data_hk = getHK(wb, strDat)
elif 'P4P+KA' in f:
target = f
# update basicInfo
writeTo(strDat)
# update spending
result(data_sz, data_hk, target, strDat)
except ValueError:
wb.close()
wb1.close()
raise
except Exception as e:
print(e)
else:
wb.save()
wb1.save()
if __name__ == '__main__':
st = now()
main()
print("程序運行結束,耗時: %s" %(now() - st))
time.sleep(60)
代碼打包
# Anaconda Prompt
PyInstaller -F autoGP.py
注意事項
1.所有輸入excel必須按模版要求進行整理;
2.要提前對所有輸入的參數進行檢查,以便出現異常時給出友好的提示;
3.準備操作流程指南: