需求
每週郵件發送:近兩週 top30 的客戶周環比
環境 & 工具
Win7
Python 3.6
Excel
流程
代碼
- 訪問配置文件
# _*_ conding: utf-8 _*_
'''
// getConfig.py
從配置文件中讀取信息
'''
from configparser import ConfigParser
class Conf():
def __init__(self):
self._path = r'H:\SZ_數據\Python\c.s.conf'
def getEmail(self, sec, smt, email, pw):
fil = ConfigParser()
fil.read(self._path)
return (fil.get(sec, smt),
fil.get(sec, email),
fil.get(sec, pw))
def getToEmail(self, sec, toEmail):
fil = ConfigParser()
fil.read(self._path)
return fil.get(sec, toEmail)
- 發送郵件
# _*_ coding: utf-8 _*_
'''
// sendEmail.py
Send email
'''
import smtplib
from getConfig import Conf
from email.header import Header
from email.mime.text import MIMEText
from email.utils import parseaddr, formataddr
from email.mime.multipart import MIMEMultipart
from email.mime.application import MIMEApplication
def _format_addr(s):
# 格式化郵件地址
# Header 如是中文必須編碼
#
name, addr = parseaddr(s)
return formataddr((Header(name, 'utf-8').encode(), addr))
def sendEmail(subject, message, files=None, to_addr='newIOSys'):
# login info
conf = Conf()
smt, fro, pw = conf.getEmail('mail_baidu', 'sender server', 'email'
, 'password')
to = conf.getToEmail('to_addr', to_addr)
# header of email
msg = MIMEMultipart()
msg['From'] = fro
msg['To'] = to
msg['Subject'] = Header(subject, 'utf-8').encode()
# Main Body
msg.attach(MIMEText(message, 'plain', 'utf-8'))
# attachment
import os
if files != None:
for i in range(len(files)):
if os.path.isfile(files[i]):
with open(files[i], 'rb') as f:
xl = MIMEApplication(f.read())
xl.add_header('Content-Disposition', 'attachment'
, filename=os.path.split(files[i])[-1])
msg.attach(xl)
# Send
with smtplib.SMTP(smt, 25) as smtp:
smtp.ehlo()
smtp.starttls()
smtp.ehlo()
smtp.set_debuglevel(1)
smtp.login(fro, pw)
try:
smtp.sendmail(fro, to.split(','), msg.as_string())
except Exception as e:
print('Failed send: {}'.format(e))
else:
print('Success send.')
- 構造訪問數據庫URL
# _*_ coding: utf-8 _*_
'''
// db.py
'''
from getConfig import Conf
def getUrl(sec, acc, pw, ip, port, db):
conf = Conf()
url = ('mssql+pymssql://%s:%s@%s:%s/%s'
% conf.getInfo(sec, acc, pw, ip, port, db))
return url
- 主代碼
# _*_ coding: utf-8 _*_
'''
// lt_top30Cash.py
'''
import time
import pandas as pd
from db import getUrl
from datetime import date, timedelta
from sqlalchemy import create_engine
PATH = r'H:\SZ_數據\Python\c.s.conf'
URL = getUrl('SQL Server', 'acc', 'pw', 'ip', 'port', 'db')
ST_DAT, ED_DAT = eval(input('輸入:QTD起始日,終止日;如(20200401,20200430):'))
# 時間
now = lambda: time.perf_counter()
dat = lambda n: (date.today() - timedelta(n)).strftime('%Y%m%d')
mDat = lambda n: (date.today() - timedelta(n)).strftime('%m.%d')
qDat = lambda n: (int((date.today() - timedelta(n)).strftime('%m'))-1)//3+1
yDat = lambda n: (date.today() - timedelta(n)).strftime('%Y')
def getData(sql, url):
with create_engine(URL).begin() as conn:
return map(lambda x: list(x), conn.execute(sql).fetchall())
def week(data):
# 周粒度消費
df = pd.DataFrame(list(data), columns=('用戶名', '類別', '金額', '周'))
df = df.pivot_table(values=['金額'], index=['用戶名']
, columns=['類別', '周'])
return df
def getP4P(df):
# 上上、上、本
datLis = df.columns.get_level_values(2).unique()
# 產品大類
clsLis = tuple(set(df.columns.get_level_values(1)))
for d in datLis:
df[('金額', 'P4P', d)] = (df[('金額', clsLis[0], d)]
+ df[('金額',clsLis[1], d)]
+ df[('金額', clsLis[2], d)])
# 索引
df.columns = [c + dat for c in df.columns.get_level_values(1).unique()
for dat in datLis]
df.reset_index(inplace=True)
def merge(basicInfo, qtd, data):
# basicInfo & qtd
col1 = ['用戶名', '廣告主', '二級行業', '區域']
col2 = ['用戶名', 'QTD']
df = pd.merge(pd.DataFrame(list(basicInfo), columns=col1)
, pd.DataFrame(list(qtd), columns=col2)
, how='left', on='用戶名')
# region -> hk
df['區域'] = df['區域'].str.replace(r'^HK.+', 'HK')
# week
df = pd.merge(df, data, how='left', on='用戶名')
df.fillna(0, inplace=True)
return df
def rank(df):
# 上週排名
df['sum'] = df['P4P上上週'] + df['P4P上週']
df.sort_values('sum', inplace=True, ascending=False)
df.reset_index(drop=True, inplace=True)
df.index = [i+1 for i in df.index]
df.index.name = '上週排名'
df.reset_index(inplace=True)
# 本週排名
df['sum'] = df['P4P上週'] + df['P4P本週']
df.sort_values('sum', inplace=True, ascending=False)
df.reset_index(drop=True, inplace=True)
df.index = [i+1 for i in df.index]
df.index.name = '本週排名'
df.drop(columns=['sum'] + list(filter(lambda x: '上上' in x, df.columns))
, inplace=True)
def ringRatio(df):
df['環比增長'] = df['P4P本週'] - df['P4P上週']
df['環比增長率'] = df['環比增長'] / df['P4P上週']
def fmt(df):
# Output
path = r'H:\SZ_數據\Download\Top30Cash(' + mDat(14
) +'_' + mDat(1) + ').xlsx'
with pd.ExcelWriter(path) as writer:
df[:30].to_excel(writer, startrow=2, freeze_panes=(3,0))
# 修改表
import xlwings as xw
wb = xw.Book(path)
sht = wb.sheets[0]
cntRow = sht['A3'].current_region.rows.count
cntCol = sht['A3'].current_region.columns.count
# 標籤
sht[0, 0].value = 'P4P'
for n, v in enumerate(sht[2, :cntCol].value):
if '排名' in v:
sht[1, n].clear()
elif '上' in v:
sht[1, n].value = mDat(14) + '-' + mDat(7)
elif '本' in v:
sht[1, n].value = mDat(7) + '-' + mDat(1)
elif 'QTD' in v:
sht[1, n].value = yDat(n) + 'Q' + str(qDat(n)) + '現金'
elif '環比' in v:
sht[1, n].value = 'P4P現金'
# 邊框
for b in range(7, 13):
sht[1:cntRow, :cntCol].current_region.api.Borders(b).weight = 2
# 列寬
sht[:, :cntCol].autofit()
# 加粗
sht[:3, :cntCol].api.Font.Bold = True
# 數字格式
sht[3:, :cntCol-1].api.NumberFormat = '#,##0'
sht[3:, cntCol-1].api.NumberFormat = '0.0%'
#
wb.save()
wb.close()
return path
def main():
# basicInfo
sql1 = "SELECT 用戶名, 廣告主, 信譽成長值, 區域 FROM basicInfo"
# QTD
sql2 = "SELECT * FROM getCashSUM('%s', '%s')" % (ST_DAT, ED_DAT)
# Nearly Three weeks Spending
sql3 = '''SELECT * FROM getThrWeekCash(%s)''' % dat(21)
#
basicInfo, qtd, data = (getData(sql1, URL), getData(sql2, URL)
, getData(sql3, URL))
# week
w = week(data)
# calculate P4P
getP4P(w)
# merge
df = merge(basicInfo, qtd, w)
# groupby
df = df.groupby(['廣告主', '二級行業', '區域']).sum()
df.reset_index(inplace=True)
# rank
rank(df)
# ring ratio
ringRatio(df)
# fmt
path = fmt(df)
# 郵件發送
from sendEmail import sendEmail
sendEmail('Top 30廣告主現金', ' 見附件。', [path])
if __name__ == '__main__':
st = now()
main()
print('Runtime: {:.3f} min'.format((now()-st)/60))
Q/A
# S1 在DataFrame中進行數據聚合
# 客戶起始消費日不一致,聚合起始點不一致,聚合結果錯誤
# sql 函數替代
#
df.resample('7D').sum()
-- S2 獲取近3周消費
IF OBJECT_ID('getThreeWeekCash', 'IF') IS NOT NULL
DROP FUNCTION getThreeWeekCash
GO
CREATE FUNCTION [dbo].[getThreeWeekCash]
(
@st varchar(10)
)
RETURNS TABLE
AS
RETURN(
SELECT 用戶名, 類別, sum(金額) AS 金額, '上上週' AS '周'
FROM 現金
WHERE 日期 >= @st
AND 日期 < DATEADD(D, 7, @st)
AND 類別 IN ('搜索點擊', '新產品', '自主投放')
GROUP BY 用戶名, 類別
UNION
SELECT 用戶名, 類別, sum(金額) AS 金額, '上週' AS '周'
FROM 現金
WHERE 日期 >= DATEADD(D, 7, @st)
AND 日期 < DATEADD(D, 14, @st)
AND 類別 IN ('搜索點擊', '新產品', '自主投放')
GROUP BY 用戶名, 類別
UNION
SELECT 用戶名, 類別, sum(金額) AS 金額, '本週' AS '周'
FROM 現金
WHERE 日期 >= DATEADD(D, 14, @st)
AND 日期 < DATEADD(D, 21, @st)
AND 類別 IN ('搜索點擊', '新產品', '自主投放')
GROUP BY 用戶名, 類別
)
- 迭代器可以直接在
pd.DataFrame
中自動迭代
# 3.6 不支持
# pd.DataFrame(list(tu))
n [35]: tu = (i for i in range(10))
tu
Out[37]: <generator object <genexpr> at 0x0000023319776B48>
pd.DataFrame(tu)
Out[36]:
0
0 0
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
- 正則
ddf
Out[156]:
0
0 ac
1 ad
0 cc
0 acdd
1 aacdf
ddf[0].str.replace(r'^a.+', 'a')
Out[155]:
0 a
1 a
0 cc
0 a
1 a
Name: 0, dtype: object
- 排序
# 3.7
df.sort_values('sum', inplace=True, ascending=False, ignore_index=True)
# 3.6
df.sort_values('sum', inplace=True, ascending=False)
df.reset_index(drop=True, inplace=True)
- 存儲過程
-- 不同數據庫之間,存儲過程不能直接調用 ?
-- [Output].dbo.pr_check 'pr_checkFunc'
--
CREATE PROC [dbo].[pr_check]
@obj nvarchar(50)
AS
BEGIN
DECLARE @sql nvarchar(max)
SET @sql = 'IF OBJECT_ID(''' + @obj + ''', ''P'') IS NOT NULL DROP PROC ' + @obj
EXEC sp_executesql @sql, N'@obj nvarchar(50)', @obj
END
GO
EXEC pr_check 'pr_checkFunc'
GO
CREATE PROC pr_checkFunc
@name nvarchar(50)
AS
BEGIN
DECLARE @sql nvarchar(max)
SET @sql = 'IF OBJECT_ID(''' + @name + ''', ''IF'') IS NOT NULL DROP FUNCTION ' + @name
PRINT @sql
EXEC sp_executesql @sql, N'@name nvarchar(50)',@name
END
GO