pyspark讀寫mysql數據庫
from pyspark.sql import SparkSession
# 導入類型
from pyspark.sql.types import StructType, StructField, LongType, StringType,IntegerType
data = spark.sparkContext.parallelize(
[
('2080-2090-5060','5-7-8','lens_bianzu_mtf_info','2020-04-24','night','1','start','yf')
]
)
fields=[StructField("site_code",StringType(),True),
StructField("site_id",StringType(),True),
StructField("task_no",StringType(),True),
StructField("task_date",StringType(),True),
StructField("task_shift",StringType(),True),
StructField("palo_status",StringType(),True),
StructField("message",StringType(),True),
StructField("user",StringType(),True)
]
schema = StructType(fields)
#創建dataframe
data_df=spark.createDataFrame(data,schema)
data_df.collect()
# 利用DataFrame創建一個臨時視圖
data_df.registerTempTable("t_palo_status")
# 查看DataFrame的行數
data_df.count()
# 查看數據
data_df.show()
#"SparkSession.sql,使用sql方法返回的是dataframe"
data_df2=spark.sql("select * from t_palo_status")
# DataFrame[site_code: string, site_id: string, task_no: string, task_date: string,
# task_shift: string, palo_status: string, message: string, user: string]
data_df2.collect() #返回的是列表
# [Row(site_code=u'2080-2090-5060', site_id=u'5-7-8', task_no=u'lens_bianzu_mtf_info', task_date=u'2020-04-24',
# task_shift=u'day', palo_status=u'1', message=u'start', user=u'yf')]
#"spark讀取mysql數據庫"
#需要將mysql-jar驅動放到spark/jars
url='jdbc:mysql://10.133.0.46/aac_lens_analysis'
user='rw_aps'
password='rw_aps.aac'
table="(select * from t_palo_status_test order by id desc limit 1) temp"
properties={'user':user,'password':password}
#df1是列表
df1 = spark.read.jdbc(url=url,table=table,properties=properties).collect()
print(df1[0]['site_id'])
#5-7-8
#"spark寫入mysql數據庫"
prop={"user":"rw_aps","password":"rw_aps.aac","driver":"com.mysql.jdbc.Driver"}
data_df.write.jdbc("jdbc:mysql://10.133.0.46:3306/aac_lens_analysis",'t_palo_status_test','append', prop)
class pyspark.sql.SparkSession 類
from pyspark.sql import SparkSession
spark = SparkSession.builder
.master("local") \
.appName("Word Count")\
.config("spark.some.config.option", "some-value")\
.getOrCreate()
getOrCreate: 得到一個現成的 SparkSession ,如果沒有就生成一個。