pyspark讀寫mysql數據庫

pyspark讀寫mysql數據庫

from pyspark.sql import SparkSession
# 導入類型
from pyspark.sql.types import StructType, StructField, LongType, StringType,IntegerType  
data = spark.sparkContext.parallelize(
[
    ('2080-2090-5060','5-7-8','lens_bianzu_mtf_info','2020-04-24','night','1','start','yf')
]
)
fields=[StructField("site_code",StringType(),True),
        StructField("site_id",StringType(),True),
        StructField("task_no",StringType(),True),
        StructField("task_date",StringType(),True),
        StructField("task_shift",StringType(),True),
        StructField("palo_status",StringType(),True),
        StructField("message",StringType(),True),
        StructField("user",StringType(),True)
       ]

schema = StructType(fields)
#創建dataframe
data_df=spark.createDataFrame(data,schema)
data_df.collect()

# 利用DataFrame創建一個臨時視圖
data_df.registerTempTable("t_palo_status")


# 查看DataFrame的行數
data_df.count()
# 查看數據
data_df.show()

#"SparkSession.sql,使用sql方法返回的是dataframe"

data_df2=spark.sql("select * from t_palo_status")
# DataFrame[site_code: string, site_id: string, task_no: string, task_date: string, 
# task_shift: string, palo_status: string, message: string, user: string]

data_df2.collect() #返回的是列表
# [Row(site_code=u'2080-2090-5060', site_id=u'5-7-8', task_no=u'lens_bianzu_mtf_info', task_date=u'2020-04-24',
# task_shift=u'day', palo_status=u'1', message=u'start', user=u'yf')]


#"spark讀取mysql數據庫"
#需要將mysql-jar驅動放到spark/jars
url='jdbc:mysql://10.133.0.46/aac_lens_analysis'
user='rw_aps'
password='rw_aps.aac'
table="(select * from t_palo_status_test order by id desc limit 1) temp"
properties={'user':user,'password':password}
#df1是列表
df1 = spark.read.jdbc(url=url,table=table,properties=properties).collect()

print(df1[0]['site_id'])
#5-7-8


#"spark寫入mysql數據庫"
prop={"user":"rw_aps","password":"rw_aps.aac","driver":"com.mysql.jdbc.Driver"}
data_df.write.jdbc("jdbc:mysql://10.133.0.46:3306/aac_lens_analysis",'t_palo_status_test','append', prop)

class pyspark.sql.SparkSession 類

from pyspark.sql import SparkSession
spark = SparkSession.builder
.master("local") \
.appName("Word Count")\
.config("spark.some.config.option", "some-value")\
.getOrCreate()

getOrCreate: 得到一個現成的 SparkSession ,如果沒有就生成一個。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章