本文的示例代碼參考SparkOSS
目錄
環境
Maven
mvn --version
# Apache Maven 3.6.3
mv settings.xml ~/.m2/
Scala
scala -version
# Scala code runner version 2.11.8 -- Copyright 2002-2016, LAMP/EPFL
開發
Project
- 新建項目 => Maven => Create from archetype
- 配置項目 => Project Structure => Global Libraries
vim pom.xml
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>spark-oss</artifactId>
<version>1.0-SNAPSHOT</version>
<inceptionYear>2008</inceptionYear>
<properties>
<scala.version>2.11.8</scala.version>
</properties>
<dependencies>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>2.11.8</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.4.7</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>2.4.7</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.3</version>
</dependency>
<!--支持OSS數據源 -->
<dependency>
<groupId>com.aliyun.emr</groupId>
<artifactId>emr-core</artifactId>
<version>1.5.0</version>
</dependency>
<dependency>
<groupId>com.aliyun.oss</groupId>
<artifactId>aliyun-sdk-oss</artifactId>
<version>3.4.0</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
<version>4.4</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.4.1</version>
</dependency>
</dependencies>
</project>
vim src/main/scala/org/example/App.scala
package org.example
object App {
def main(args: Array[String]): Unit = {
println( "Hello World!" )
}
}
rm -rf src/test
- 運行項目
vim src/main/scala/org/example/App.scala
package org.example
import org.apache.spark.{SparkConf, SparkContext}
object App {
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
.setAppName("SparkOSS")
.setMaster("local[2]")
conf.set("spark.hadoop.fs.oss.impl", "com.aliyun.fs.oss.nat.NativeOssFileSystem")
conf.set("spark.hadoop.mapreduce.job.run-local", "true")
conf.set("spark.hadoop.fs.oss.accessKeyId", "***")
conf.set("spark.hadoop.fs.oss.accessKeySecret", "***")
val sc = new SparkContext(conf)
val input = sc.textFile("oss://share-yl.oss-cn-hangzhou.aliyuncs.com/README.md")
val numAs = input.filter(line => line.contains("a")).count()
val numBs = input.filter(line => line.contains("b")).count()
println("Lines with a: %s, Lines with b: %s".format(numAs, numBs))
}
}
- 運行項目
Package
vim pom.xml
# 省略未修改代碼
<build>
<finalName>spark-oss</finalName>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.6.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>3.1.1</version>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
# 省略未修改代碼
mvn clean package
/opt/services/spark/bin/spark-submit --class "org.example.App" target/spark-oss-jar-with-dependencies.jar
# TODO: Exception in thread "main" java.lang.RuntimeException: java.lang.ClassNotFoundException: Class com.aliyun.fs.oss.nat.NativeOssFileSystem not found