數據
www.xzdream.cn 1 2 江西
www.xzdream.cn 3 4 廣東
www.xzdream.cn 1 2 西藏
www.xzdream.cn 3 4 浙江
將日誌文件 put 到hdfs
package com.xzdream.spark
import java.sql.DriverManager
import org.apache.spark.{SparkConf, SparkContext}
/**
* Log App
*/
object SparkContextApp {
def main(args: Array[String]): Unit = {
val sparkConf = new SparkConf();
sparkConf.setAppName("LogApp").setMaster("local[2]");
val sc = new SparkContext(sparkConf);
//file:///Users/hadoop/scala/spark_demo1/src/main/logs/2020-5-11.log
val file_path = args(0);
val lines = sc.textFile(file_path);
// lines.take(3).foreach(println)
/*
//讀取每一行數據
lines.map(x => {
val splits = x.split("\t")
val length = splits.length
if(length == 4){
val domain = splits(0)
var traffic = 0L
try{
traffic = splits(1).toLong
}catch {
case e:Exception => 0L
}
(domain,traffic)
}else{
("-",0L)
}
}).reduceByKey(_+_).collect.foreach(println)
*/
//求省份訪問量的top10
val res = lines.map(x=>{
val splits = x.split("\t")
val length = splits.length
if(length == 4){
val province = splits(3)
(province,1)
}else{
('-',1)
}
}).reduceByKey(_+_).sortBy(_._2,false)
// .take(10)
//保存到數據庫
// Class.forName("com.mysql.jdbc.Driver")
res.foreachPartition(x=>{
var conn = DriverManager.getConnection("jdbc:mysql://127.0.0.1:3306/hive_db?user=root&password=123456&useUnicode=true&characterEncoding=UTF-8")
conn.setAutoCommit(false)
val sql = "insert into py(name) values(?)"
var pstmt = conn.prepareStatement(sql)
x.foreach{case (name,count)=>{
pstmt.setString(1,name.toString)
pstmt.addBatch()
}}
pstmt.executeBatch()
conn.commit()
conn.close()
})
sc.stop();
}
}
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.xzdream.spark</groupId>
<artifactId>spark_demo1</artifactId>
<version>1.0</version>
<inceptionYear>2008</inceptionYear>
<properties>
<scala.version>2.11.8</scala.version>
</properties>
<repositories>
<repository>
<id>scala-tools.org</id>
<name>Scala-Tools Maven2 Repository</name>
<url>http://scala-tools.org/repo-releases</url>
</repository>
</repositories>
<pluginRepositories>
<pluginRepository>
<id>scala-tools.org</id>
<name>Scala-Tools Maven2 Repository</name>
<url>http://scala-tools.org/repo-releases</url>
</pluginRepository>
</pluginRepositories>
<dependencies>
<!--scala依賴-->
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
</dependency>
<!--spark core 依賴-->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.4.0</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.27</version>
</dependency>
</dependencies>
<build>
<sourceDirectory>src/main/scala</sourceDirectory>
<testSourceDirectory>src/test/scala</testSourceDirectory>
<plugins>
<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>compile</goal>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
<configuration>
<scalaVersion>${scala.version}</scalaVersion>
<args>
<arg>-target:jvm-1.5</arg>
</args>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-eclipse-plugin</artifactId>
<configuration>
<downloadSources>true</downloadSources>
<buildcommands>
<buildcommand>ch.epfl.lamp.sdt.core.scalabuilder</buildcommand>
</buildcommands>
<additionalProjectnatures>
<projectnature>ch.epfl.lamp.sdt.core.scalanature</projectnature>
</additionalProjectnatures>
<classpathContainers>
<classpathContainer>org.eclipse.jdt.launching.JRE_CONTAINER</classpathContainer>
<classpathContainer>ch.epfl.lamp.sdt.launching.SCALA_CONTAINER</classpathContainer>
</classpathContainers>
</configuration>
</plugin>
</plugins>
</build>
<reporting>
<plugins>
<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<configuration>
<scalaVersion>${scala.version}</scalaVersion>
</configuration>
</plugin>
</plugins>
</reporting>
</project>
提交任務
./spark-submit --master yarn --class com.xzdream.spark.SparkContextApp /Users/hadoop/scala/spark_demo1/target/spark_demo1-1.0.jar hdfs:///spark/logs