10-SparkSQL讀取mysql數據源並將結果寫回mysql

一、創建測試表t_user2user_tt_result

1、t_user2表結構如下:

CREATE TABLE `t_user2` (
  `id` int(11) DEFAULT NULL COMMENT 'id',
  `name` varchar(64) DEFAULT NULL COMMENT '用戶名',
  `password` varchar(64) DEFAULT NULL COMMENT '密碼',
  `age` int(11) DEFAULT NULL COMMENT '年齡'
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

2、user_t表結構如下:

CREATE TABLE `user_t` (
  `id` int(11) DEFAULT NULL COMMENT 'id',
  `name` varchar(64) DEFAULT NULL COMMENT '姓名',
  `password` varchar(64) DEFAULT NULL COMMENT '密碼',
  `address` varchar(64) DEFAULT NULL COMMENT '地址',
  `age` int(11) DEFAULT NULL COMMENT '年齡'
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

3、t_result表結構如下:

CREATE TABLE `t_result` (
  `id` int(11) DEFAULT NULL COMMENT 'id',
  `name` varchar(64) DEFAULT NULL COMMENT '姓名',
  `password` varchar(64) DEFAULT NULL COMMENT '密碼',
  `address` varchar(64) DEFAULT NULL COMMENT '地址',
  `age` int(11) DEFAULT NULL COMMENT '年齡'
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

4、插入測試數據:

INSERT INTO `t_user2` VALUES (12, 'cassie', '1234562', 25);
INSERT INTO `t_user2` VALUES (11, 'zhangs', '123456', 25);
INSERT INTO `t_user2` VALUES (23, 'zhangs', '2321312', 34);
INSERT INTO `t_user2` VALUES (22, 'tom', 'sadfdsa', 23);
 
 
INSERT INTO `user_t` VALUES (1, 'zhangs', '123456', NULL, 25);
INSERT INTO `user_t` VALUES (2, 'zhangs', '123456', NULL, 252);

二、創建maven工程,導入mysql驅動包、spark相關包

mysql-connector-java.5.1.24.jar
spark-assembly-1.6.2-hadoop2.6.0.jar
spark-examples-1.6.2-hadoop2.6.0.jar

注:pom.xml文件內容如下

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.itxiaobai</groupId>
    <artifactId>00-SparkSql</artifactId>
    <version>1.0-SNAPSHOT</version>

    <dependencyManagement>
        <dependencies>
            <dependency>
                <groupId>io.netty</groupId>
                <artifactId>netty-all</artifactId>
                <version>4.1.18.Final</version>
            </dependency>
        </dependencies>
    </dependencyManagement>
    <dependencies>
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>5.1.47</version>
        </dependency>
        <dependency>
            <groupId>com.google.code.gson</groupId>
            <artifactId>gson</artifactId>
            <version>2.8.5</version>
        </dependency>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.12</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>2.7.5</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>2.7.5</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>2.7.5</version>
        </dependency>
        <!--spark-sql的相關依賴-->
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql_2.11</artifactId>
            <version>2.3.0</version>
        </dependency>
        <!--spark-core依賴-->
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_2.11</artifactId>
            <version>2.3.0</version>
        </dependency>
        <!--spark依賴-->
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_2.11</artifactId>
            <version>2.3.0</version>
        </dependency>
        <!--scala依賴-->
        <dependency>
            <groupId>org.scala-lang</groupId>
            <artifactId>scala-library</artifactId>
            <version>2.11.7</version>
        </dependency>
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-log4j12</artifactId>
            <version>1.7.10</version>
        </dependency>
        <dependency>
            <groupId>commons-lang</groupId>
            <artifactId>commons-lang</artifactId>
            <version>2.5</version>
        </dependency>
        <dependency>
            <groupId>commons-logging</groupId>
            <artifactId>commons-logging</artifactId>
            <version>1.1.3</version>
        </dependency>
    </dependencies>
</project>

三、創建本地執行的scala代碼類:

SparkSqlMysqlDatasource.scala
package sql
 
import java.util.Properties
 
import org.apache.spark.sql.{DataFrame, SQLContext}
import org.apache.spark.{SparkConf, SparkContext}
 
/**
  * 生產環境:下提交任務
  * spark-submit --class sql.SparkSqlMysqlDatasource --master yarn-cluster --executor-memory 2G --num-executors 2 --driver-memory 1g --executor-cores 1  /data1/e_heyutao/sparktest/sparkEnn.jar
  *
  */
object SparkSqlMysqlDatasource {
  //數據庫配置
  lazy val url = "jdbc:mysql://your_ip:3306/my_test"
  lazy val username = "root"
  lazy val password = "secret_password"
 
  def main(args: Array[String]) {
//    val sparkConf = new SparkConf().setAppName("sparkSqlTest").setMaster("local[2]").set("spark.app.id", "sql")
    val sparkConf = new SparkConf().setAppName("sparkSqlTest").setMaster("yarn-cluster").set("spark.app.id", "sqlTest")
    //序列化
    sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
    sparkConf.set("spark.kryoserializer.buffer", "256m")
    sparkConf.set("spark.kryoserializer.buffer.max", "2046m")
    sparkConf.set("spark.akka.frameSize", "500")
    sparkConf.set("spark.rpc.askTimeout", "30")
    //獲取context
    val sc = new SparkContext(sparkConf)
    //獲取sqlContext
    val sqlContext = new SQLContext(sc)
 
    //引入隱式轉換,可以使用spark sql內置函數
    import sqlContext.implicits._
    
    //創建jdbc連接信息
    val uri = url + "?user=" + username + "&password=" + password + "&useUnicode=true&characterEncoding=UTF-8"
    val prop = new Properties()
    //注意:集羣上運行時,一定要添加這句話,否則會報找不到mysql驅動的錯誤
    prop.put("driver", "com.mysql.jdbc.Driver")
    //加載mysql數據表
    val df_test1: DataFrame = sqlContext.read.jdbc(uri, "user_t", prop)
    val df_test2: DataFrame = sqlContext.read.jdbc(uri, "t_user2", prop)
 
    //從dataframe中獲取所需字段
    df_test2.select("id", "name", "age").collect()
      .foreach(row => {
        println("id  " + row(0) + " ,name  " + row(1) + ", age  " + row(2))
      })
    //註冊成臨時表
    df_test1.registerTempTable("temp_table")
 
    val total_sql = "select * from temp_table "
    val total_df: DataFrame = sqlContext.sql(total_sql)
    
     //將結果寫入數據庫中
    val properties=new Properties()
    properties.setProperty("user","root")
    properties.setProperty("password","secret_password")
    total_df.write.mode("append").jdbc("jdbc:mysql://your_ip:3306/my_test?useUnicode=true&characterEncoding=UTF-8","t_result",properties)
 
    /**
      * 注意:查看源碼可以知道詳細意思
    def mode(saveMode: String): DataFrameWriter = {
          this.mode = saveMode.toLowerCase match {
          case "overwrite" => SaveMode.Overwrite
          case "append" => SaveMode.Append
          case "ignore" => SaveMode.Ignore
          case "error" | "default" => SaveMode.ErrorIfExists
          case _ => throw new IllegalArgumentException(s"Unknown save mode: $saveMode. " +
            "Accepted modes are 'overwrite', 'append', 'ignore', 'error'.")
    }
      */
 
    //分組後求平均值
    total_df.groupBy("name").avg("age").collect().foreach(x => {
      println("name " + x(0))
      println("age " + x(1))
    })
 
  }
}

結果:

id  12 ,name  cassie, age  25
id  11 ,name  zhangs, age  25
id  23 ,name  zhangs, age  34
id  22 ,name  tom, age  23
name zhangs
age    138.5

四、查看數據庫表t_result,發現剛纔從mysql中讀取出來的數據已經插入到表中

在這裏插入圖片描述

spark相關文檔

spark學習總結

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章