pom依賴
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>1.6.1</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId>
<version>1.6.1</version>
</dependency>
代碼
import com.google.common.base.Optional;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function0;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaPairDStream;
import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import scala.Tuple2;
import java.util.Arrays;
import java.util.List;
public class MyStreamIng{
private static String appName = "streaming";
private static String master = "local[2]";
private static String host = "mini1";
private static int port = 8888;
public static void main(String[] args) {
String checkpointDir = "C:\\workspace\\sparkTest\\src\\test\\java\\data\\";//checkPointPath
JavaStreamingContext ssc = JavaStreamingContext.getOrCreate(checkpointDir, createContext(appName, checkpointDir));
//開始作業
ssc.start();
ssc.awaitTermination();
}
public static Function0<JavaStreamingContext> createContext(final String appName, final String checkpointDir) {
return new Function0<JavaStreamingContext>() {
@Override
public JavaStreamingContext call() throws Exception {
SparkConf sparkConf = new SparkConf().setMaster(master).setAppName(appName);//初始化sparkConf
JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(5));//獲得JavaStreamingContext
ssc.checkpoint(checkpointDir);
JavaReceiverInputDStream<String> lines = ssc.socketTextStream(host, port);//從socket源獲取數據
JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {//拆分行成單詞
@Override
public Iterable<String> call(String s) throws Exception {
return Arrays.asList(s.split(" "));
}
});
JavaPairDStream<String, Integer> pairs = words.mapToPair(new PairFunction<String, String, Integer>() {//轉化成<K,V>
public Tuple2<String, Integer> call(String s) throws Exception {
return new Tuple2<String, Integer>(s, 1);
}
}).cache();
JavaPairDStream<String, Integer> dStream = pairs.updateStateByKey(new Function2<List<Integer>, Optional<Integer>, Optional<Integer>>() {//統計歷史數據
@Override
public Optional<Integer> call(List<Integer> integers, Optional<Integer> optional) throws Exception {//<當前批次數據,歷史數據>
Integer updatedValue = 0;//默認初始值
if (optional.isPresent()) {//獲取歷史數據
updatedValue = optional.get();
}
for (Integer value : integers) {//累加
updatedValue += value;
}
return Optional.of(updatedValue);//返回結果
}
}, 10);
dStream.print();//輸出
return ssc;
}
};
}
}
使用有狀態的計算(例如計算累計量)都會將結果數據保存在checkpoint中,減少依賴長度所以計算累計必須設置檢查點