在前面兩篇博客的基礎上:
https://blog.csdn.net/weixin_40096730/article/details/102782195
https://blog.csdn.net/weixin_40096730/article/details/89921176
package com.likexinTest.BigData.Spark
import org.apache.spark.{SparkConf, SparkContext}
object WordCount{
def main(args: Array[String]): Unit = {
//1.創建 SparkConf 並設置 App 名稱
val conf = new SparkConf().setAppName("WC")
//2.創建 SparkContext,該對象是提交 Spark App 的入口
val sc = new SparkContext(conf)
//3.使用 sc 創建 RDD 並執行相應的 transformation 和 action
// sc.textFile(args(0)).flatMap(_.split(" ")).map((_,
// 1)).reduceByKey(_+_, 1).sortBy(_._2, false).saveAsTextFile(args(1))
//讀取數據
val line = sc.textFile(args(0))
//壓平
val words = line.flatMap(_.split(" "))
//map(word,1)
val k2v = words.map((_,1))
//reduceByKey(word,x)
val result = k2v.reduceByKey(_+_)
//展示輸出
result.saveAsTextFile(args(1))
//4.關閉連接
sc.stop()
}
}
pom.xml文件
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.LikexinTest1</groupId>
<artifactId>SparkDemo1</artifactId>
<version>1.0</version>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.1.1</version>
</dependency>
</dependencies>
<build>
<finalName>WordCount</finalName>
<plugins>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>3.2.2</version>
<executions>
<execution>
<goals>
<goal>compile</goal>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>3.0.0</version>
<configuration>
<archive>
<manifest>
<mainClass>WordCount</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
打包工程!!
這裏出現了error: scala.reflect.internal.MissingRequirementError: object java.lang.Object in compiler mirror not found.錯誤
這裏將JDK的路徑選擇好,(安裝JDK後選擇會自動識別路徑)
打包完成!!!
將jar包存入服務器目錄:
在IDEA中將自己的object選擇copy refrence
現在讀取hdfs上的測試文檔,wordcount結果輸出到out目錄:
運行spark-submit
bin/spark-submit \
--class com.likexinTest.BigData.Spark.WordCount \
--master yarn \
--deploy-mode client \
/usr/local/BigDataApp/spark-2.1.1-bin-hadoop2.7/likexinTest/WordCount-jar-with-dependencies.jar \
hdfs://192.168.187.100:9000/hello.txt \
hdfs://192.168.187.100:9000/out
輸出結果: