import xlrd as xr
import os,pickle
import cx_Oracle
import sys,re,datetime,csv
from xlrd import xldate_as_datetime, xldate_as_tuple
import time
def trim(s):
r = re.findall('[\S]+', s)
print ("替換:"+s)
return " ".join(r)
def query_sql(v_sql):
conn = cx_Oracle.connect('user/pass@host:ip/sid')
c = conn.cursor() # 獲取cursor
try:
# 解析sql語句
c.parse(v_sql)
# 捕獲SQL異常
except cx_Oracle.DatabaseError as e:
print(e)
c.execute(v_sql) # 使用cursor進行各種操作
row = c.fetchall() # 可以調用cursor.fetchall()一次取完所有結果,或者cursor.fetchone()一次取一行結果
c.close() # 關閉cursor
conn.close() # 關閉連接
return row
def insert_batch_sql(v_sql, data):
conn = cx_Oracle.connect('user/pass@host:ip/sid')
c = conn.cursor()
try:
c.parse(v_sql)
except cx_Oracle.DatabaseError as e:
print(e)
c.executemany(v_sql,data) #批量寫入
conn.commit()
c.close()
conn.close()
def insert_log(log_msg):
conn = cx_Oracle.connect('user/pass@host:ip/sid')
c = conn.cursor()
dt =datetime.datetime.now()
param = {'rzxx': log_msg,'rzsj':dt}
c.execute('insert into t_log values(seq_t.nextval,:rzxx,:rzsj)', param);
conn.commit()
c.close()
conn.close()
#excel寫入數據,檢查數據庫表的列數和excel是否一致,不一致的跳過
def excel_check_and_imp(filename,tablename,col_nums):
data = xr.open_workbook(filename)
sheets = data.sheet_names()
for sheet in sheets:
table = data.sheet_by_name(sheet)
ncols = table.ncols
if ncols != col_nums:
log_msg = "[跳過原因2]文件:["+filename+"],sheet["+sheet+"]和表["+tablename+"]列數不一致"
print("文件:[%s],sheet[%s]和表[%s]列數不一致,跳過"%(filename,sheet,tablename))
print(table.row_values(1))
insert_log(log_msg)
return -1
else:
batch_to_ora(filename,tablename)
return 1
#檢查路徑下是否有excel文件
def fold_files_check(path):
if os.path.exists(path):
filenames = os.listdir(path)
for i,filename in enumerate(filenames):
if i == 0:
iSpecialFile = i + 1
sFileName = filename
print('==================正在檢查第%s個文件=========================' % (i + 1))
if sFileName.find(".xls") <0 :
log_msg="[跳過原因1]文件:[" + filename + "]爲非excel文件"
print('文件:[%s]爲非excel文件,跳過' % (filename))
insert_log(log_msg)
continue
else:
log_msg = "文件:[" + (path+"\\"+filename) + "]文件格式檢查通過,開始導入"
print('文件:[%s]文件格式檢查通過' % (path+"\\"+filename))
insert_log(log_msg)
#通過filename確定表名
list_table=['T1','T2','T3','T4','T5','T6','T7','T8','T9','T10','T11','T12','T13','T14','T15','T16','T17','T18','T19','T20','T21','T22','T23']
tablename=(list_table[int(filename[0:2])-1])
# 獲取表的列數
v_sys_sql = "select count(1) from sys.dba_tab_cols a where a.owner='RAM' AND A.TABLE_NAME='" + tablename.upper() + "'"
cols_name = query_sql(v_sys_sql)
for col in cols_name:
col_nums = col[0]
#檢查通過的進行導入
if excel_check_and_imp(path+"\\"+filename,tablename,col_nums) <0 :
continue
else:
print('文件夾:(%s)不存在!' % (path))
def batch_to_ora(filename,tablename):
#讀excel
data = xr.open_workbook(filename)
sheets = data.sheet_names()
for sheet in sheets:
table = data.sheet_by_name(sheet)
#table = data.sheets()[0]
nrows = table.nrows
ncols = table.ncols
#準備變量
list = [] #批量寫入
num = 0
#插入表的SQL
sql = "INSERT INTO :tab :cols VALUES :vals "
#查詢系統視圖,獲取列名,並組裝SQL
v_sys_sql = "select listagg(column_name,',') WITHIN GROUP (order by column_id),listagg(':'||column_id,',') WITHIN GROUP (order by column_id) from sys.dba_tab_cols a where a.owner='RAM' AND A.TABLE_NAME='"+tablename.upper()+"'"
cols_name = query_sql(v_sys_sql)
for col in cols_name:
cols = "(" + col[0] + ")"
vals = "(" + col[1] + ")"
#獲取所有數據類型列表
v_sys_sql2 = "select DATA_TYPE from sys.dba_tab_cols a where a.owner='RAM' AND A.TABLE_NAME='" + tablename.upper() + "'"
coltype = []
col_types = query_sql(v_sys_sql2)
for i in range(len(col_types)):
str_type=str(col_types[i])
if str_type.find("VARCHAR2")>0:
coltype.append("VARCHAR2")
elif str_type.find("NUMBER")>0:
coltype.append("NUMBER")
elif str_type.find("DATE")>0:
coltype.append("DATE")
#組裝SQL
sql=sql.replace(":cols", cols).replace(":vals", vals).replace(":tab",tablename)
# for iRow in range(1, nrows):
# for iCol in range(ncols):
# sCell = table.cell_value(iRow, iCol)
# # ctype: 0 empty,1 string, 2 number, 3 date, 4 boolean, 5 error
# ctype = table.cell(iRow, iCol).ctype
# if ctype == 3:
# date = xr.xldate_as_datetime(sCell,data.datemode)
#批量寫入,6000一批
excel_cols_num_imp = 0
for i in range(1,nrows):
tab_row = table.row_values(i)
for j, x in enumerate(tab_row):
if table.cell(i, j).ctype==3:
tab_row[j]=xr.xldate_as_datetime(tab_row[j],data.datemode).strftime('%Y-%d-%m %H:%M:%S')
if x=='':
tab_row[j] = None
# if table.cell(i, j).ctype==1 and trim(x)== '':
# tab_row[j] = None
# elif coltype[j] == 'NUMBER':
# if trim(x) == '':
# tab_row[j]=None
#如果數據類型爲varchar,並且excel爲科學技術法,則進行轉換
elif coltype[j] == 'VARCHAR2':
try:
v=str(tab_row[j])
#科學計數法轉換成字符串
if v.find("e+") != -1:
v = "%.f" % float(tab_row[j])
tab_row[j] = str(v)
except Exception as e:
tab_row[j] = str(tab_row[j])
#如果爲number,且excel爲字符,則強制轉換爲Int類型
#TypeError: expecting string or bytes object:一般出現這個問題,是因爲excel讀出來爲'96.99',帶引號的,要使用float轉一下
elif coltype[j] == 'NUMBER':
try:
v = str(tab_row[j])
if v.find(".") != -1:
v = "%.2f" % float(tab_row[j])
tab_row[j] = float(v)
else:
tab_row[j] = int(v)
except Exception as e:
tab_row[j] = str(tab_row[j])
list.append(tab_row)
print(tab_row)
num += 1
if num>=batch_limit:
print("正在導入文件:[%s],sheet:[%s]總共[%s]行,已導入[%s]行" % (filename,sheet, nrows - 1, excel_cols_num_imp))
insert_batch_sql(sql, list)
excel_cols_num_imp = excel_cols_num_imp + num
num = 0
list.clear()
log_msg = "文件:[" + filename + "],sheet["+sheet+"]總共[" + str(nrows-1)+ "]行,已導入["+str(excel_cols_num_imp)+"]行"
print("文件:[%s],sheet:[%s]總共[%s]行,已導入[%s]行"%(filename,sheet,nrows-1,excel_cols_num_imp))
insert_log(log_msg)
#剩下的小於批量數的再次插入
print("正在導入文件:[%s],sheet:[%s]總共[%s]行,已導入[%s]行,導入完成" % (filename,sheet, nrows - 1, excel_cols_num_imp))
insert_batch_sql(sql, list)
excel_cols_num_imp = excel_cols_num_imp + num
list.clear()
log_msg = "文件:[" +filename+ "總共" + str(nrows-1) + "]行,已導入[" + str(excel_cols_num_imp) + "]行,導入完成"
print("文件:[%s],sheet:[%s]總共[%s]行,已導入[%s]行,導入完成" % (filename, sheet,nrows-1, excel_cols_num_imp))
insert_log(log_msg)
batch_limit = 10000
fold_files_check("C:\\Users\\sp\\Desktop\\data\\******")
#fold_files_check("C:\\test")
# if __name__=="__main__":
# cmd=sys.argv[1]
# p1=cmd.split(",")[0]
# fold_files_check(p1)