本文的主線 環境 => 開發 => 打包 => 提交
本文的示例代碼參考sparkapp
環境
cd /opt/services
wget https://downloads.lightbend.com/scala/2.11.8/scala-2.11.8.tgz
tar xf scala-2.11.8.tgz
mv scala-2.11.8 scala
vim ~/.zshrc
# export PATH=$PATH:/opt/services/scala/bin
. ~/.zshrc
scala -version
# Scala code runner version 2.11.8 -- Copyright 2002-2016, LAMP/EPFL
開發
mkdir sparkapp
mkdir -p sparkapp/src/main/scala
vim sparkapp/src/main/scala/SimpleApp.scala
/* SimpleApp.scala */
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.SparkConf
object SimpleApp {
def main(args: Array[String]) {
val logFile = "file:///opt/services/spark/README.md"
val conf = new SparkConf().setAppName("Simple Application")
val sc = new SparkContext(conf)
val logData = sc.textFile(logFile, 2).cache()
val numAs = logData.filter(line => line.contains("a")).count()
val numBs = logData.filter(line => line.contains("b")).count()
println("Lines with a: %s, Lines with b: %s".format(numAs, numBs))
}
}
打包
cd /opt/services/
wget https://github.com/sbt/sbt/releases/download/v1.4.7/sbt-1.4.7.tgz
tar xf sbt-1.4.7.tgz.tgz
vim ~/.sbt/repositories
[repositories]
local
huaweicloud-maven: https://repo.huaweicloud.com/repository/maven/
maven-central: https://repo1.maven.org/maven2/
sbt-plugin-repo: https://repo.scala-sbt.org/scalasbt/sbt-plugin-releases, [organization]/[module]/(scala_[scalaVersion]/)(sbt_[sbtVersion]/)[revision]/[type]s/[artifact](-[classifier]).[ext]
vim ~/.zshrc
# export PATH=$PATH:/opt/services/sbt/bin
. ~/.zshrc
sbt -version
vim sparkapp/simple.sbt
name := "Simple Project"
version := "1.0"
scalaVersion := "2.11.8"
libraryDependencies += "org.apache.spark" %% "spark-core" % "2.1.0"
cd sparkapp
sbt package
# target/scala-2.11/simple-project_2.11-1.0.jar
提交
詳細參考Spark搭建 之 單機模式
/opt/services/spark/bin/spark-submit --class "SimpleApp" target/scala-2.11/simple-project_2.11-1.0.jar
# Lines with a: 61, Lines with b: 30