Spark RDD 數據到 MySQL

數據
www.xzdream.cn    1    2    江西
www.xzdream.cn    3    4    廣東
www.xzdream.cn    1    2    西藏
www.xzdream.cn    3    4    浙江
將日誌文件 put 到hdfs
package com.xzdream.spark


import java.sql.DriverManager


import org.apache.spark.{SparkConf, SparkContext}


/**
  * Log App
  */
object SparkContextApp {
  def main(args: Array[String]): Unit = {
    val sparkConf = new SparkConf();
    sparkConf.setAppName("LogApp").setMaster("local[2]");


    val sc = new SparkContext(sparkConf);


    //file:///Users/hadoop/scala/spark_demo1/src/main/logs/2020-5-11.log
    val file_path = args(0);


    val lines = sc.textFile(file_path);
    //    lines.take(3).foreach(println)


    /*
    //讀取每一行數據
    lines.map(x => {
      val splits = x.split("\t")
      val length = splits.length
      if(length == 4){
        val domain = splits(0)
        var traffic = 0L
        try{
          traffic = splits(1).toLong
        }catch {
          case e:Exception => 0L
        }


        (domain,traffic)
      }else{
        ("-",0L)
      }


    }).reduceByKey(_+_).collect.foreach(println)
     */


    //求省份訪問量的top10


    val res = lines.map(x=>{
      val splits = x.split("\t")
      val length = splits.length
      if(length == 4){
        val province = splits(3)
        (province,1)
      }else{
        ('-',1)
      }
    }).reduceByKey(_+_).sortBy(_._2,false)
    //      .take(10)


    //保存到數據庫
    //    Class.forName("com.mysql.jdbc.Driver")

    res.foreachPartition(x=>{
      var conn = DriverManager.getConnection("jdbc:mysql://127.0.0.1:3306/hive_db?user=root&password=123456&useUnicode=true&characterEncoding=UTF-8")
      conn.setAutoCommit(false)
      val sql = "insert into py(name) values(?)"
      var pstmt = conn.prepareStatement(sql)


      x.foreach{case (name,count)=>{
        pstmt.setString(1,name.toString)
        pstmt.addBatch()
      }}


      pstmt.executeBatch()
      conn.commit()
      conn.close()
    })

    sc.stop();
  }
}
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <groupId>com.xzdream.spark</groupId>
  <artifactId>spark_demo1</artifactId>
  <version>1.0</version>
  <inceptionYear>2008</inceptionYear>
  <properties>
    <scala.version>2.11.8</scala.version>
  </properties>


  <repositories>
    <repository>
      <id>scala-tools.org</id>
      <name>Scala-Tools Maven2 Repository</name>
      <url>http://scala-tools.org/repo-releases</url>
    </repository>
  </repositories>


  <pluginRepositories>
    <pluginRepository>
      <id>scala-tools.org</id>
      <name>Scala-Tools Maven2 Repository</name>
      <url>http://scala-tools.org/repo-releases</url>
    </pluginRepository>
  </pluginRepositories>


  <dependencies>
    <!--scala依賴-->
    <dependency>
      <groupId>org.scala-lang</groupId>
      <artifactId>scala-library</artifactId>
      <version>${scala.version}</version>
    </dependency>


    <!--spark core 依賴-->
    <dependency>
      <groupId>org.apache.spark</groupId>
      <artifactId>spark-core_2.11</artifactId>
      <version>2.4.0</version>
    </dependency>


    <dependency>
      <groupId>mysql</groupId>
      <artifactId>mysql-connector-java</artifactId>
      <version>5.1.27</version>
    </dependency>
  </dependencies>


  <build>
    <sourceDirectory>src/main/scala</sourceDirectory>
    <testSourceDirectory>src/test/scala</testSourceDirectory>
    <plugins>
      <plugin>
        <groupId>org.scala-tools</groupId>
        <artifactId>maven-scala-plugin</artifactId>
        <executions>
          <execution>
            <goals>
              <goal>compile</goal>
              <goal>testCompile</goal>
            </goals>
          </execution>
        </executions>
        <configuration>
          <scalaVersion>${scala.version}</scalaVersion>
          <args>
            <arg>-target:jvm-1.5</arg>
          </args>
        </configuration>
      </plugin>
      <plugin>
        <groupId>org.apache.maven.plugins</groupId>
        <artifactId>maven-eclipse-plugin</artifactId>
        <configuration>
          <downloadSources>true</downloadSources>
          <buildcommands>
            <buildcommand>ch.epfl.lamp.sdt.core.scalabuilder</buildcommand>
          </buildcommands>
          <additionalProjectnatures>
            <projectnature>ch.epfl.lamp.sdt.core.scalanature</projectnature>
          </additionalProjectnatures>
          <classpathContainers>
            <classpathContainer>org.eclipse.jdt.launching.JRE_CONTAINER</classpathContainer>
            <classpathContainer>ch.epfl.lamp.sdt.launching.SCALA_CONTAINER</classpathContainer>
          </classpathContainers>
        </configuration>
      </plugin>
    </plugins>
  </build>
  <reporting>
    <plugins>
      <plugin>
        <groupId>org.scala-tools</groupId>
        <artifactId>maven-scala-plugin</artifactId>
        <configuration>
          <scalaVersion>${scala.version}</scalaVersion>
        </configuration>
      </plugin>
    </plugins>
  </reporting>
</project>
提交任務
./spark-submit --master yarn --class com.xzdream.spark.SparkContextApp /Users/hadoop/scala/spark_demo1/target/spark_demo1-1.0.jar hdfs:///spark/logs

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章