【綜合案例】sqlalchemy + pandas + xlwings + smtplib + configparser

需求

每週郵件發送:近兩週 top30 的客戶周環比

環境 & 工具

Win7
Python 3.6
Excel

流程

在這裏插入圖片描述

代碼

  • 訪問配置文件
# _*_ conding: utf-8 _*_
'''
// getConfig.py
從配置文件中讀取信息
'''

from configparser import ConfigParser

class Conf():

    def __init__(self):
        self._path = r'H:\SZ_數據\Python\c.s.conf'

    def getEmail(self, sec, smt, email, pw):
        fil = ConfigParser()
        fil.read(self._path)
        return (fil.get(sec, smt),
                fil.get(sec, email),
                fil.get(sec, pw))
                
    def getToEmail(self, sec, toEmail):
        fil = ConfigParser()
        fil.read(self._path)
        return fil.get(sec, toEmail)
        

  • 發送郵件
# _*_ coding: utf-8 _*_
'''
// sendEmail.py
Send email

'''

import smtplib
from getConfig import Conf
from email.header import Header
from email.mime.text import MIMEText
from email.utils import parseaddr, formataddr
from email.mime.multipart import MIMEMultipart
from email.mime.application import MIMEApplication

def _format_addr(s):
    # 格式化郵件地址
    # Header 如是中文必須編碼
    #
    name, addr = parseaddr(s)
    return formataddr((Header(name, 'utf-8').encode(), addr))

def sendEmail(subject, message, files=None, to_addr='newIOSys'):
    # login info
    conf = Conf()
    smt, fro, pw = conf.getEmail('mail_baidu', 'sender server', 'email'
                                , 'password')
    to = conf.getToEmail('to_addr', to_addr)
    # header of email
    msg = MIMEMultipart()
    msg['From'] = fro
    msg['To'] = to
    msg['Subject'] = Header(subject, 'utf-8').encode()
    # Main Body
    msg.attach(MIMEText(message, 'plain', 'utf-8'))
    # attachment
    import os
    if files != None:
        for i in range(len(files)):
            if os.path.isfile(files[i]):
                with open(files[i], 'rb') as f:
                    xl = MIMEApplication(f.read())
                    xl.add_header('Content-Disposition', 'attachment'
                                , filename=os.path.split(files[i])[-1])
                    msg.attach(xl)
    # Send
    with smtplib.SMTP(smt, 25) as smtp:
        smtp.ehlo()
        smtp.starttls()
        smtp.ehlo()
        smtp.set_debuglevel(1)
        smtp.login(fro, pw)
        try:
            smtp.sendmail(fro, to.split(','), msg.as_string())
        except Exception as e:
            print('Failed send: {}'.format(e))
        else:
            print('Success send.')

  • 構造訪問數據庫URL
# _*_ coding: utf-8 _*_
'''
// db.py
'''
from getConfig import Conf

def getUrl(sec, acc, pw, ip, port, db):
    conf = Conf()
    url = ('mssql+pymssql://%s:%s@%s:%s/%s'
            % conf.getInfo(sec, acc, pw, ip, port, db))
    return url
    
  • 主代碼
# _*_ coding: utf-8 _*_
'''
// lt_top30Cash.py

'''
import time
import pandas as pd
from db import getUrl
from datetime import date, timedelta
from sqlalchemy import create_engine

PATH = r'H:\SZ_數據\Python\c.s.conf'
URL = getUrl('SQL Server', 'acc', 'pw', 'ip', 'port', 'db')
ST_DAT, ED_DAT = eval(input('輸入:QTD起始日,終止日;如(20200401,20200430):'))

# 時間
now = lambda: time.perf_counter()
dat = lambda n: (date.today() - timedelta(n)).strftime('%Y%m%d')
mDat = lambda n: (date.today() - timedelta(n)).strftime('%m.%d')
qDat = lambda n: (int((date.today() - timedelta(n)).strftime('%m'))-1)//3+1
yDat = lambda n: (date.today() - timedelta(n)).strftime('%Y')

def getData(sql, url):
    with create_engine(URL).begin() as conn:
        return map(lambda x: list(x), conn.execute(sql).fetchall())

def week(data):
    # 周粒度消費
    df = pd.DataFrame(list(data), columns=('用戶名', '類別', '金額', '周'))
    df = df.pivot_table(values=['金額'], index=['用戶名']
                        , columns=['類別', '周'])
    return df

def getP4P(df):
    # 上上、上、本
    datLis = df.columns.get_level_values(2).unique()
    # 產品大類
    clsLis = tuple(set(df.columns.get_level_values(1)))
    for d in datLis:
        df[('金額', 'P4P', d)] = (df[('金額', clsLis[0], d)]
                                    + df[('金額',clsLis[1], d)]
                                    + df[('金額', clsLis[2], d)])
    # 索引
    df.columns = [c + dat for c in df.columns.get_level_values(1).unique()
                  for dat in datLis]
    df.reset_index(inplace=True)

def merge(basicInfo, qtd, data):
    # basicInfo & qtd
    col1 = ['用戶名', '廣告主', '二級行業', '區域']
    col2 = ['用戶名', 'QTD']
    df = pd.merge(pd.DataFrame(list(basicInfo), columns=col1)
                , pd.DataFrame(list(qtd), columns=col2)
                , how='left', on='用戶名')
    # region -> hk
    df['區域'] = df['區域'].str.replace(r'^HK.+', 'HK')
    # week
    df = pd.merge(df, data, how='left', on='用戶名')
    df.fillna(0, inplace=True)
    return df
    
def rank(df):
    # 上週排名
    df['sum'] = df['P4P上上週'] + df['P4P上週']
    df.sort_values('sum', inplace=True, ascending=False)
    df.reset_index(drop=True, inplace=True)
    df.index = [i+1 for i in df.index]
    df.index.name = '上週排名'
    df.reset_index(inplace=True)
    # 本週排名
    df['sum'] = df['P4P上週'] + df['P4P本週']
    df.sort_values('sum', inplace=True, ascending=False)
    df.reset_index(drop=True, inplace=True)
    df.index = [i+1 for i in df.index]
    df.index.name = '本週排名'
    df.drop(columns=['sum'] + list(filter(lambda x: '上上' in x, df.columns))
            , inplace=True)

def ringRatio(df):
    df['環比增長'] = df['P4P本週'] - df['P4P上週']
    df['環比增長率'] = df['環比增長'] / df['P4P上週']

def fmt(df):
    # Output
    path = r'H:\SZ_數據\Download\Top30Cash(' + mDat(14
                                                  ) +'_' + mDat(1) + ').xlsx'
    with pd.ExcelWriter(path) as writer:
        df[:30].to_excel(writer, startrow=2, freeze_panes=(3,0))
    # 修改表
    import xlwings as xw
    wb = xw.Book(path)
    sht = wb.sheets[0]
    cntRow = sht['A3'].current_region.rows.count
    cntCol = sht['A3'].current_region.columns.count
    # 標籤
    sht[0, 0].value = 'P4P'
    for n, v in enumerate(sht[2, :cntCol].value):
        if '排名' in v:
            sht[1, n].clear()
        elif '上' in v:
            sht[1, n].value = mDat(14) + '-' + mDat(7)
        elif '本' in v:
            sht[1, n].value = mDat(7) + '-' + mDat(1)
        elif 'QTD' in v:
            sht[1, n].value = yDat(n) + 'Q' + str(qDat(n)) + '現金'
        elif '環比' in v:
            sht[1, n].value = 'P4P現金'
    # 邊框
    for b in range(7, 13):
        sht[1:cntRow, :cntCol].current_region.api.Borders(b).weight = 2
    # 列寬
    sht[:, :cntCol].autofit()
    # 加粗
    sht[:3, :cntCol].api.Font.Bold = True
    # 數字格式
    sht[3:, :cntCol-1].api.NumberFormat = '#,##0'
    sht[3:, cntCol-1].api.NumberFormat = '0.0%'
    #
    wb.save()
    wb.close()
    return path
    
def main():
    # basicInfo
    sql1 = "SELECT 用戶名, 廣告主, 信譽成長值, 區域 FROM basicInfo"
    # QTD
    sql2 = "SELECT * FROM getCashSUM('%s', '%s')" % (ST_DAT, ED_DAT)
    # Nearly Three weeks Spending
    sql3 = '''SELECT * FROM getThrWeekCash(%s)''' % dat(21)
    # 
    basicInfo, qtd, data = (getData(sql1, URL), getData(sql2, URL)
                            , getData(sql3, URL))
    # week
    w = week(data)
    # calculate P4P
    getP4P(w)
    # merge
    df = merge(basicInfo, qtd, w)
    # groupby
    df = df.groupby(['廣告主', '二級行業', '區域']).sum()
    df.reset_index(inplace=True)
    # rank
    rank(df)
    # ring ratio
    ringRatio(df)
    # fmt
    path = fmt(df)
    # 郵件發送
    from sendEmail import sendEmail
    sendEmail('Top 30廣告主現金', '    見附件。', [path])
    
if __name__ == '__main__':
    st = now()
    main()
    print('Runtime: {:.3f} min'.format((now()-st)/60))
    

Q/A

# S1 在DataFrame中進行數據聚合
# 客戶起始消費日不一致,聚合起始點不一致,聚合結果錯誤
# sql 函數替代
#
df.resample('7D').sum()
-- S2 獲取近3周消費
IF OBJECT_ID('getThreeWeekCash', 'IF') IS NOT NULL
	DROP FUNCTION getThreeWeekCash
GO
CREATE FUNCTION [dbo].[getThreeWeekCash]
(
	@st varchar(10)
)
	RETURNS TABLE
AS
	RETURN(
		SELECT 用戶名, 類別, sum(金額) AS 金額, '上上週' AS '周'
			FROM 現金
			WHERE 日期 >= @st
				AND 日期 < DATEADD(D, 7, @st)
				AND 類別 IN ('搜索點擊', '新產品', '自主投放')
			GROUP BY 用戶名, 類別
		UNION
		SELECT 用戶名, 類別, sum(金額) AS 金額, '上週' AS '周'
			FROM 現金
			WHERE 日期 >= DATEADD(D, 7, @st)
				AND 日期 < DATEADD(D, 14, @st)
				AND 類別 IN ('搜索點擊', '新產品', '自主投放')
			GROUP BY 用戶名, 類別
		UNION
		SELECT 用戶名, 類別, sum(金額) AS 金額, '本週' AS '周'
			FROM 現金
			WHERE 日期 >= DATEADD(D, 14, @st)
				AND 日期 < DATEADD(D, 21, @st)
				AND 類別 IN ('搜索點擊', '新產品', '自主投放')
			GROUP BY 用戶名, 類別
	)
  • 迭代器可以直接在pd.DataFrame中自動迭代
# 3.6 不支持
# pd.DataFrame(list(tu))
n [35]: tu = (i for i in range(10))

tu
Out[37]: <generator object <genexpr> at 0x0000023319776B48>

pd.DataFrame(tu)
Out[36]: 
   0
0  0
1  1
2  2
3  3
4  4
5  5
6  6
7  7
8  8
9  9

  • 正則
ddf
Out[156]: 
       0
0     ac
1     ad
0     cc
0   acdd
1  aacdf
ddf[0].str.replace(r'^a.+', 'a')
Out[155]: 
0     a
1     a
0    cc
0     a
1     a
Name: 0, dtype: object
  • 排序
# 3.7
df.sort_values('sum', inplace=True, ascending=False, ignore_index=True)
# 3.6
df.sort_values('sum', inplace=True, ascending=False)
df.reset_index(drop=True, inplace=True)
  • 存儲過程
-- 不同數據庫之間,存儲過程不能直接調用 ?
-- [Output].dbo.pr_check 'pr_checkFunc'
--
CREATE PROC [dbo].[pr_check]
	@obj nvarchar(50)
AS
BEGIN
	DECLARE @sql nvarchar(max)
	SET @sql = 'IF OBJECT_ID(''' + @obj + ''', ''P'') IS NOT NULL DROP PROC ' + @obj
	EXEC sp_executesql @sql, N'@obj nvarchar(50)', @obj
END
GO
EXEC pr_check 'pr_checkFunc'
GO
CREATE PROC pr_checkFunc
	@name nvarchar(50)
AS
BEGIN
	DECLARE @sql nvarchar(max)
	SET @sql = 'IF OBJECT_ID(''' + @name + ''', ''IF'') IS NOT NULL DROP FUNCTION ' + @name
	PRINT @sql
	EXEC sp_executesql @sql, N'@name nvarchar(50)',@name 
END
GO
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章