pyspark讀取mysql、sqlserver數據庫數據,pymysql往mysql數據庫中寫入數據

pyspark讀取mysql、sqlserver數據庫數據

pymysql往mysql數據庫中寫入數據

示例代碼一

import datetime
import pymysql

#連接sqlserver數據庫獲取數據
url='jdbc:sqlserver://10.178.13.1:1433;DatabaseName=DW'
driver='com.microsoft.sqlserver.jdbc.SQLServerDriver'
user='rw-etl'
password='rw-etl.aac'
table="(select top 1 convert(varchar(10),parsedate,120) as today_date,parseday as task_shift from DW.dbo.aps_2080_mesdc_bianzu_info_flag order by id desc) temp"
properties={'user':user,'password':password}
df = spark.read.jdbc(url=url,table=table,properties=properties).collect()
run_task_date=df[0]['today_date']
run_task_shift=df[0]['task_shift']

#查看源數據庫最新的狀態信息
print(run_task_date)
print(run_task_shift)

#判斷班次是白班還是夜班,夜班時間設置爲每天的21:30,白班時間設置爲每天的9:30
if run_task_shift=='night':
    run_task_date=run_task_date+' 21:30:00'
else:
    run_task_date=run_task_date+' 09:30:00'

#查看修改後的班次具體時間
print(run_task_date)

#print("··········從目標數據庫獲取數據·············")
#連接mysql數據庫獲取數據
url2='jdbc:mysql://10.133.0.46:3306/aac_lens_analysis'
user='rw_aps'
password='rw_aps.aac'
table1="(select * from aac_lens_analysis.t_palo_status order by id desc limit 1) temp"
properties2={'user':user,'password':password}
df1 = spark.read.jdbc(url=url2,table=table1,properties=properties2).collect()
last_task_date=df1[0]['task_date']
last_task_shift=df1[0]['task_shift']

#查看目標數據庫最新的狀態信息
print(last_task_date)
print(last_task_shift)

#把時間從字符串類型轉化爲datetime類型
run_date_time=datetime.datetime.strptime(run_task_date,'%Y-%m-%d %H:%M:%S')
last_date_time=datetime.datetime.strptime(last_task_date,'%Y-%m-%d %H:%M:%S')

#用timedelta實現時間加上半天
last_date_now=last_date_time+datetime.timedelta(days=0.5)

#源數據庫與目標數據庫最新的狀態信息一致,則說明源數據庫沒有獲取到數據
if run_task_date==last_task_date and run_task_shift==last_task_shift:
    message=u"中間表未從計算平臺獲取到數據"
    if run_task_shift=='白班':
        run_task_shift=='夜班'
    else:
        run_task_shift=='白班'

#使用pymysql連接mysql數據庫,並實現往數據庫中插入數據
  	db=pymysql.connect(host="10.133.0.46",port=3306,user='rw_aps',password='rw_aps.aac',db='aac_lens_analysis')
    cursor=db.cursor()
    sql1="insert into aac_lens_analysis.t_palo_status (task_date,task_shift,message) values ('%s','%s','%s')"%(last_date_now,run_task_shift,message)
    cursor.execute(sql1)
    db.commit()
    cursor.close()
    db.close()
else:
    #message=u"中間表成功獲取數據,數倉開始抽取數據"
    message="start"
    db=pymysql.connect(host="10.133.0.46",port=3306,user='rw_aps',password='rw_aps.aac',db='aac_lens_analysis')
    cursor=db.cursor()
    sql1="insert into aac_lens_analysis.t_palo_status (task_date,task_shift,message) values ('%s','%s','%s')"%(run_date_time,run_task_shift,message)
    cursor.execute(sql1)
    db.commit()
    cursor.close()
    db.close()

示例代碼二

import datetime
import pymysql

#連接sqlserver數據庫
url='jdbc:sqlserver://10.178.13.1:1433;DatabaseName=DW'
driver='com.microsoft.sqlserver.jdbc.SQLServerDriver'
user='rw-etl'
password='rw-etl.aac'
table="(select top 1 convert(varchar(10),parsedate,120) as today_date,parseday as task_shift from DW.dbo.aps_2080_mesdc_bianzu_info_flag order by id desc) temp"
properties={'user':user,'password':password}
df = spark.read.jdbc(url=url,table=table,properties=properties).collect()
run_task_date=df[0]['today_date']
run_task_shift=df[0]['task_shift']
if run_task_shift=='night':
    run_task_date=run_task_date+' 21:30:00'
else:
    run_task_date=run_task_date+' 09:30:00'
print("---------打印日期和班次-----------")
print(run_task_shift)
print(run_task_date)
    
site_code='2080-2090-5060'
site_id='5-7-8'
task_name='lens_bianzu_mtf_info'
user='F Y'
message='success'

# 利用pymysql連接mysql數據庫,並往Mysql數據庫中寫入數據
db=pymysql.connect(host="10.133.0.46",port=3306,user='rw_aps',password='rw_aps.aac',db='aac_lens_analysis')
cursor=db.cursor()
sql1="insert into aac_lens_analysis.t_palo_status (site_code,site_id,task_no,task_date,task_shift,message,user) values ('%s','%s','%s','%s','%s','%s','%s')"%(site_code,site_id,task_name,run_task_date,run_task_shift,message,user)
cursor.execute(sql1)
db.commit()
cursor.close()
db.close()
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章