一、準備工作
1). 安裝python,installed version:2.7.9 in Window.
3). excel文檔規範化
4). 安裝xlrd
什麼是xlrd? xlrd是python中專門用於抽取Microsoft Excel表格數據的庫。xlrd v0.9.3, 下載地址:https://pypi.python.org/pypi/xlrd
安裝步驟:下載tar.gz,並解壓到目錄:xlrd\xlrd-0.9.3.tar\xlrd-0.9.3,然後轉到該目錄下,執行:python setup.py install
二、編寫腳本
# -*- coding: cp936 -*- import xlrd class DataComparison(): compare_result = '' f_dir = r'' datasrc = '' datadst = '' f1 = '' f2 = '' col_sets_sheet1 = [] col_sets_sheet2 = [] f1_sheet_index = 0 f2_sheet_index = 0 if_with_title = '1' def __init__(self, dataList): ############### Set Parameters ############# compare_result = dataList[0] datasrc = dataList[1] datadst = dataList[2] # Read the 2 excel files to be compared. # Set data file name f1 = dataList[3] f2 = dataList[4] # Set col numbers in the 1st sheet col_sets_sheet1 = dataList[5] # col_sets_sheet2 = dataList[6] # # Set sheet in files to be compared. # The 1st sheet as default f1_sheet_index = dataList[7] f2_sheet_index = dataList[8] # Set if excel sheet has title(only 1st row as title) if_with_title = dataList[9] ############### Load Data & Compare ############# print "Start Loading..." book1 = xlrd.open_workbook(f1) book2 = xlrd.open_workbook(f2) print "Loading Files OK!" b1sheet = book1.sheet_by_index(f1_sheet_index) b2sheet = book2.sheet_by_index(f2_sheet_index) print "Ranging Data" col_1_b1sheet = b1sheet.col_values(col_sets_sheet1[0]) col_2_b1sheet = b1sheet.col_values(col_sets_sheet1[1]) col_1_b2sheet = b2sheet.col_values(col_sets_sheet2[0]) col_2_b2sheet = b2sheet.col_values(col_sets_sheet2[1]) if if_with_title == '1': print "Removing titles" col1_b1sheet = col_1_b1sheet[1:] col2_b1sheet = col_2_b1sheet[1:] col1_b2sheet = col_1_b2sheet[1:] col2_b2sheet = col_2_b2sheet[1:] print "Round columns with number type" for i, val in enumerate(col2_b1sheet): col2_b1sheet[i] = round(val,2) for i, val in enumerate(col2_b2sheet): col2_b2sheet[i] = round(val,2) # Get sum value of number cols sum1 = sum(col2_b1sheet) sum2 = sum(col2_b2sheet) print "Do zipping..." zip1 = zip(col1_b1sheet, col2_b1sheet) zip2 = zip(col1_b2sheet, col2_b2sheet) print "Sort ziped list" zip1.sort() zip2.sort() print "Compare rows in summary" len1 = len(zip1) len2 = len(zip2) print len1, '--', len2 compare_result = compare_result+":\n 總數比對:" if len1 == len2: print "記錄總數比較一致!" compare_result = compare_result+"結果一致,均爲"+str(len1)+"條;" else: print "記錄總數不一致!" compare_result = compare_result+"結果不一致,"+ datasrc +"爲"+str(len1)+"條,"+datadst +"爲"+str(len2)+"條;" print sum1, '--', sum2 if round(sum1,2) == round(sum2,2): print "總金額比較一致!" compare_result = compare_result+"總金額比較一致,"+"均爲"+str(sum1)+";\n" else: print "總金額不一致!" compare_result = compare_result+"總金額不一致,"+datasrc +"爲"+str(sum1)+","+datadst +"爲"+str(sum2)+";\n" print "Get different rows in detail" cnt = 0 for c in range(len(zip1)): if zip1[c] != zip2[c]: cnt += 1 print zip1[c], '--', zip2[c] if cnt >0: print cnt, "行比較有差異!" compare_result = compare_result + " 明細比對:存在"+str(cnt)+"條差異。" else: print "未發現明細差異數據" compare_result = compare_result + " 明細比對:無差異。" print compare_result self.compare_result = compare_result print "Finished!" def getResult(self): return self.compare_result datasrc = "源數據1" datadst = "源數據2" f1_sheet_index = 0 f2_sheet_index = 0 if_with_title = '1' compare_result = "比對結果:" f1 = r'D:\tmp\src001.xlsx' f2 = r'D:\tmp\tmp001.xlsx' col_sets_sheet1 = [2,4] #取第3、5列 col_sets_sheet2 = [1,2] #取第2、3列 # 組裝dataList dataList = [compare_result, datasrc, datadst, f1, f2, col_sets_sheet1, col_sets_sheet2, f1_sheet_index, f2_sheet_index, if_with_title] dc1 = DataComparison(dataList) # 打印比較結果 print dc1.getResult()
總結:
本次主要練習了使用xlrd讀取xls文件,並對文件記錄進行加工、比較。下一步將練習xlwd並增加對文件寫入的相關操作。