【案例】用 pandas 讀取 excel 數據並上載到 MSSQL Server

環境 & 工具

Python 3.6
Spyder
Win7

代碼

# -*- conding: utf-8 _*_
'''

'''

import os
import time
import functools
import pandas as pd
from auto_ave import getPath
from datetime import datetime, timedelta, date


now = lambda : time.perf_counter()
dat = lambda n: date.today() - timedelta(n)

D = dat(1)

def log(func):
    @functools.wraps(func)
    def wrapper(*args, **kw):
        print("Call %s():" % func.__name__)
        return func(*args, **kw)
    return wrapper

@log
def readAve(shtName):
    path = getPath()
    df = pd.read_excel(path, sheet_name=shtName, skiprows=1)
    dataCleaning(df)
    return df.loc[:, :'Notes'], df.loc[:, 'Notes':]

def getQ(dat):
    # 返回給定日期所在季度
    # 目的：剔除非本季度的消費
    if isinstance(dat, date):
        m = dat.month
        if m in (1, 2, 3):
            return 'Q1'
        elif m in (4, 5, 6):
            return 'Q2'
        elif m in (7, 8, 9):
            return 'Q3'
        else:
            return 'Q4'
    else:
        raise

def dataCleaning(df):
    
    def dropBracket(lis):
        # 去除字段中的 圓括號 ()
        return tuple(map(
            lambda s: s.replace('(', ' ').replace(')', '')
            , lis))
    
    def dropDat(df, col):
        # 刪除非本季度消費字段
        return df.drop(columns=col, inplace=True)
    
    def transferDat():
        # '字段日期格式轉換： datetime/str -> %Y%m%d'
        # 剔除非本季度的消費
        newCols = []
        for col in df.columns:
            n = 1
            try:
                if getQ(col) == getQ(D):
                    col_ = col.strftime('%Y%m%d')
                    newCols.append(col_)
                else:
                    # 刪除非本季度消費
                    dropDat(df, col)
            except:
                try:
                    col_ = datetime.strptime(col, '%Y-%m-%d %H:%M:%S.%f'
                                             ).strftime('%Y%m%d')
                    if col_ in newCols:
                        col_1 = col_ + '_' + str(n)
                        n += 1
                        if col_1 in newCols:
                            col_2 = col_ + '_' + str(n)
                            newCols.append(col_2)
                        else:
                            newCols.append(col_1)
                    else:
                        newCols.append(col_)
                except ValueError:
                    newCols.append(col)
        return dropBracket(newCols)
    
    def consistentType(df):
        # 統一數據類型爲 datetime.datetime
        # datetime.time(0,0) -> None
        df['首次消費日_1'] = df['首次消費日']
        df['Campaign Start Date'] = df['首次消費日']
        df['Campaign Start Date_1'] = df['首次消費日']
        
    def dropZeroSpending(df):
        # 當前年度 2020
        # 前一年度 2019
        index = df[df['2019YTD'] + df['2020YTD'] == 0].index
        df.drop(index=index, inplace=True)
    
    consistentType(df)
    #dropZeroSpending(df)  # 降低時間有限；且影響 output結果帶來不確定
    # 增加標識列,拆爲兩部分上載，便於再次合併
    df['用戶名1'] = df['用戶名']
    # 日期轉換
    df.columns = transferDat()

@log
def upLoad(getTable, shtName):
    def dropNotes(df):
        df.drop(columns='Notes', inplace=True)  # 多一列 Notes
        
    def getDate(ver):
        return D.strftime('%Y%m%d') + ver
        
    def getName():
        if '搜' in shtName:
            return 'P4P_' + getDate('_1'), 'P4P_' + getDate('_2')
        elif '新' in shtName:
            return 'NP_' + getDate('_1'), 'NP_' + getDate('_2')
        elif '原' in shtName:
            return 'Infeeds_' + getDate('_1'), 'Infeeds_' + getDate('_2')
    
    t, t1 = getTable(shtName)
    table1, table2 = getName()
    dropNotes(t)
    # 寫入
    with connect().begin() as en:
        t.to_sql(table1, con=en, if_exists='replace', index=False
                 , chunksize=1000)
        t1.to_sql(table2, con=en, if_exists='replace', index=False
                  , chunksize=1000)

def connect():
    def loginInfo(section):
        from configparser import ConfigParser
        path = r'H:\SZ_數據\Python\c.s.conf'
        #path = r'C:\users\chen.huaiyu\Chinasearch\c.s.conf'
        conf = ConfigParser()
        conf.read(path)
        return (conf.get(section, 'acc'), conf.get(section, 'pw')
                , conf.get(section, 'ip'), conf.get(section, 'port')
                , conf.get(section, 'dbname'))
    
    from sqlalchemy import create_engine
    try:
        engine = create_engine(
            'mssql+pymssql://%s:%s@%s:%s/%s' % loginInfo('Output'))
    except Exception as e:
        print('連接失敗： %s' % e)
        raise
    else:
        return engine

if __name__ == '__main__':
	for shtName in （'搜索', '原生'):
		upLoad(readAve, shtName)

Q/A

spyder中調試正常，cmd - R 命令行下運行異常
小結：

命令行動行模式下，當 df 中無對應數據時，報錯；
注意：數據審查；

# excel -> mssql
# 日期列中的 '-' -> None
datList = ['開戶日期', '首次消費日', '收取年服務費時間', '主體資質到期日',
           '加V繳費到期日']
for colName in datList:
    df.loc[df[colName] == '-', colName] = None
********
# ValueError: could not convert string to Timestamp

上載時，字段名中有英文小括號時引發異常：ProgrammingError: (102, b"Incorrect syntax near '('.DB-Lib error message 20018, severity 15:\nGeneral SQL Server error: Check messages from the SQL Server\n")

ProgrammingError
因編程錯誤而引發的異常，例如，找不到或已經存在表，SQL語句中的語法錯誤，指定的參數數量錯誤等。
此錯誤是DBAPI錯誤，起源於數據庫驅動程序（DBAPI），而不是SQLAlchemy本身。
該ProgrammingError有時在數據庫連接的情況下驅動程序引發的被丟棄，或者不能夠連接到數據庫。

小結： pandas 上載數據至 MSSQL注意事項（to_sql）：
1. 列數 < 256
2. 標準化字段命名
  應符合 MSSQL 對常規標識符的使用規則:a-z,A-Z,_,@,#,$等；
  不能有特殊符號，如'(</'等；
3. 統一各列的數據類型
  每一列只能存儲一種數據類型的數據；
for 循環的流程圖
文本操作

with open('t.txt', 'r', encoding='utf-8') as f:
	f.read([size])  # 讀取1個字符串，儘可能多的讀取，可能一次性讀入文件
	# f.readlines()  # 1.將每行作爲一個字符，一次性讀入全部文件
	# f.readline()  # 一次讀取一行
	# f.seek(偏移量, [起始位置])  # 1.偏移量：單位爲比特，可正可負；2.起始位置：0-文件頭，默認；1-當前位置；2-文件尾；
	# f.tell()  # 獲取當前指針位置
	# f.write()  # 寫入文件，必須是字符串
	# f.close()

文件複製

# shutil.copy(source, destination)  需os.chdir()配合下完成複製
# shutil.copytree()  不論何種路徑下均可完成複製
# os.rename(name, newname)  修改文件名

【案例】用 pandas 讀取 excel 數據並上載到 MSSQL Server

環境 & 工具

代碼

Q/A

今天！通義靈碼在北京、成都、杭州三城開講啦

【BI 可視化插件】怎麼做？手把手教你實現

商務統計_8 數值描述度量 - 集中趨勢

商務統計_7 用圖表演示數據 - 定量數據

Java_2 面向對象

小結 pandas 時間序列

xlwings - 報表自動換算、彙總

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結