python xlrd數據寫入oracle

import xlrd as xr
import os,pickle
import cx_Oracle
import sys,re,datetime,csv
from xlrd import xldate_as_datetime, xldate_as_tuple
import time



def trim(s):
    r = re.findall('[\S]+', s)
    print ("替換:"+s)
    return " ".join(r)

def query_sql(v_sql):
      conn = cx_Oracle.connect('user/pass@host:ip/sid')
      c = conn.cursor()  # 獲取cursor
      try:
            # 解析sql語句
            c.parse(v_sql)
      # 捕獲SQL異常
      except cx_Oracle.DatabaseError as e:
            print(e)
      c.execute(v_sql)  # 使用cursor進行各種操作
      row = c.fetchall()  # 可以調用cursor.fetchall()一次取完所有結果,或者cursor.fetchone()一次取一行結果
      c.close()  # 關閉cursor
      conn.close()  # 關閉連接
      return row

def insert_batch_sql(v_sql, data):
    conn = cx_Oracle.connect('user/pass@host:ip/sid')
    c = conn.cursor()
    try:
        c.parse(v_sql)
    except cx_Oracle.DatabaseError as e:
        print(e)
    c.executemany(v_sql,data) #批量寫入
    conn.commit()
    c.close()
    conn.close()



def insert_log(log_msg):
      conn = cx_Oracle.connect('user/pass@host:ip/sid')
      c = conn.cursor()
      dt =datetime.datetime.now()
      param = {'rzxx': log_msg,'rzsj':dt}
      c.execute('insert into t_log values(seq_t.nextval,:rzxx,:rzsj)', param);
      conn.commit()
      c.close()
      conn.close()

#excel寫入數據,檢查數據庫表的列數和excel是否一致,不一致的跳過
def excel_check_and_imp(filename,tablename,col_nums):
      data = xr.open_workbook(filename)
      sheets = data.sheet_names()
      for sheet in sheets:
          table = data.sheet_by_name(sheet)
          ncols = table.ncols
          if ncols != col_nums:
                log_msg = "[跳過原因2]文件:["+filename+"],sheet["+sheet+"]和表["+tablename+"]列數不一致"
                print("文件:[%s],sheet[%s]和表[%s]列數不一致,跳過"%(filename,sheet,tablename))
                print(table.row_values(1))
                insert_log(log_msg)
                return -1
          else:
                batch_to_ora(filename,tablename)
                return 1
#檢查路徑下是否有excel文件
def fold_files_check(path):
   if os.path.exists(path):
         filenames =  os.listdir(path)
         for i,filename in enumerate(filenames):
               if i == 0:
                     iSpecialFile = i + 1
               sFileName = filename
               print('==================正在檢查第%s個文件=========================' % (i + 1))
               if   sFileName.find(".xls") <0 :
                     log_msg="[跳過原因1]文件:[" + filename + "]爲非excel文件"
                     print('文件:[%s]爲非excel文件,跳過' % (filename))
                     insert_log(log_msg)
                     continue
               else:
                     log_msg = "文件:[" + (path+"\\"+filename) + "]文件格式檢查通過,開始導入"
                     print('文件:[%s]文件格式檢查通過' % (path+"\\"+filename))
                     insert_log(log_msg)
               #通過filename確定表名
               list_table=['T1','T2','T3','T4','T5','T6','T7','T8','T9','T10','T11','T12','T13','T14','T15','T16','T17','T18','T19','T20','T21','T22','T23']
               tablename=(list_table[int(filename[0:2])-1])

               # 獲取表的列數
               v_sys_sql = "select count(1)  from sys.dba_tab_cols a where a.owner='RAM' AND A.TABLE_NAME='" + tablename.upper() + "'"
               cols_name = query_sql(v_sys_sql)
               for col in cols_name:
                     col_nums = col[0]
               #檢查通過的進行導入
               if excel_check_and_imp(path+"\\"+filename,tablename,col_nums) <0 :
                     continue
   else:
         print('文件夾:(%s)不存在!' % (path))

def batch_to_ora(filename,tablename):
      #讀excel
      data = xr.open_workbook(filename)
      sheets = data.sheet_names()
      for sheet in sheets:
          table = data.sheet_by_name(sheet)
      #table = data.sheets()[0]
          nrows = table.nrows
          ncols = table.ncols
          #準備變量
          list = [] #批量寫入
          num = 0
          #插入表的SQL
          sql = "INSERT INTO  :tab  :cols VALUES  :vals "
          #查詢系統視圖,獲取列名,並組裝SQL
          v_sys_sql = "select listagg(column_name,',') WITHIN GROUP (order by column_id),listagg(':'||column_id,',') WITHIN GROUP (order by column_id)  from sys.dba_tab_cols a where a.owner='RAM' AND A.TABLE_NAME='"+tablename.upper()+"'"
          cols_name = query_sql(v_sys_sql)
          for col in cols_name:
                cols = "(" + col[0] + ")"
                vals = "(" + col[1] + ")"

          #獲取所有數據類型列表
          v_sys_sql2 = "select DATA_TYPE from sys.dba_tab_cols a where a.owner='RAM' AND A.TABLE_NAME='" + tablename.upper() + "'"
          coltype = []
          col_types = query_sql(v_sys_sql2)
          for i in range(len(col_types)):
                str_type=str(col_types[i])
                if str_type.find("VARCHAR2")>0:
                      coltype.append("VARCHAR2")
                elif str_type.find("NUMBER")>0:
                      coltype.append("NUMBER")
                elif str_type.find("DATE")>0:
                      coltype.append("DATE")
          #組裝SQL
          sql=sql.replace(":cols", cols).replace(":vals", vals).replace(":tab",tablename)

          # for iRow in range(1, nrows):
          #       for iCol in range(ncols):
          #             sCell = table.cell_value(iRow, iCol)
          #             # ctype: 0 empty,1 string, 2 number, 3 date, 4 boolean, 5 error
          #             ctype = table.cell(iRow, iCol).ctype
          #             if ctype == 3:
          #                   date = xr.xldate_as_datetime(sCell,data.datemode)

          #批量寫入,6000一批
          excel_cols_num_imp = 0
          for i in range(1,nrows):
             tab_row = table.row_values(i)
             for j, x in enumerate(tab_row):
                   if table.cell(i, j).ctype==3:
                    tab_row[j]=xr.xldate_as_datetime(tab_row[j],data.datemode).strftime('%Y-%d-%m %H:%M:%S')
                   if x=='':
                       tab_row[j] = None
                   # if table.cell(i, j).ctype==1 and  trim(x)== '':
                   #    tab_row[j] = None
                   # elif coltype[j] == 'NUMBER':
                   #     if trim(x) == '':
                   #         tab_row[j]=None
                   #如果數據類型爲varchar,並且excel爲科學技術法,則進行轉換
                   elif coltype[j] == 'VARCHAR2':
                      try:
                        v=str(tab_row[j])
                        #科學計數法轉換成字符串
                        if v.find("e+") != -1:
                              v = "%.f" % float(tab_row[j])
                              tab_row[j] = str(v)
                      except Exception as e:
                            tab_row[j] = str(tab_row[j])
                    #如果爲number,且excel爲字符,則強制轉換爲Int類型
                   #TypeError: expecting string or bytes object:一般出現這個問題,是因爲excel讀出來爲'96.99',帶引號的,要使用float轉一下
                   elif  coltype[j] == 'NUMBER':
                       try:
                           v = str(tab_row[j])
                           if v.find(".") != -1:
                               v = "%.2f" % float(tab_row[j])
                               tab_row[j] = float(v)
                           else:
                               tab_row[j] = int(v)
                       except Exception as e:
                           tab_row[j] = str(tab_row[j])
             list.append(tab_row)
             print(tab_row)
             num += 1
             if num>=batch_limit:
                   print("正在導入文件:[%s],sheet:[%s]總共[%s]行,已導入[%s]行" % (filename,sheet, nrows - 1, excel_cols_num_imp))
                   insert_batch_sql(sql, list)
                   excel_cols_num_imp = excel_cols_num_imp + num
                   num = 0
                   list.clear()
                   log_msg = "文件:[" + filename + "],sheet["+sheet+"]總共[" + str(nrows-1)+ "]行,已導入["+str(excel_cols_num_imp)+"]行"
                   print("文件:[%s],sheet:[%s]總共[%s]行,已導入[%s]行"%(filename,sheet,nrows-1,excel_cols_num_imp))
                   insert_log(log_msg)
           #剩下的小於批量數的再次插入
          print("正在導入文件:[%s],sheet:[%s]總共[%s]行,已導入[%s]行,導入完成" % (filename,sheet, nrows - 1, excel_cols_num_imp))
          insert_batch_sql(sql, list)
          excel_cols_num_imp = excel_cols_num_imp + num
          list.clear()
          log_msg = "文件:[" +filename+ "總共" + str(nrows-1) + "]行,已導入[" + str(excel_cols_num_imp) + "]行,導入完成"
          print("文件:[%s],sheet:[%s]總共[%s]行,已導入[%s]行,導入完成" % (filename, sheet,nrows-1, excel_cols_num_imp))
          insert_log(log_msg)
batch_limit = 10000
fold_files_check("C:\\Users\\sp\\Desktop\\data\\******")
#fold_files_check("C:\\test")
# if __name__=="__main__":
#       cmd=sys.argv[1]
#       p1=cmd.split(",")[0]
#       fold_files_check(p1)

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章