一、pyspark安裝
略
import pyspark
pyspark.__version__
#>>> '2.4.3'
from pyspark.sql import sparkSession
spark = sparkSession.builder.appName("your app name").getOrCreate()
rdd = spark.sparkContext
二、pyspark rdd
三、pyspark dataframe
url = "jdbc:mysql://localhost:3306?rewriteBatchedStatements=true&serverTimezone=Asia/Shanghai"
properties = {
"user": "root",
# "password": "your-password",
"driver": "com.mysql.cj.jdbc.Driver",
}
table = "your_database_name.your_table_name"
# or
# table = "(select * from your_database_name.your_table_name) table"
spark_frame = spark.read.jdbc(url, table, properties=properties)
spark_frame.show()
(
spark_frame.write
.format("jdbc")
.options(**{**properties, "url": url, "dbtable": "your_write_database_name.your_write_table_name"})
.mode("append") # ("overwrite", "ignore", "append")
.save()
)