package kafka;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.common.serialization.StringSerializer;
import java.util.Properties;
import java.util.Random;
import java.util.UUID;
/**
* 模擬實時生成單詞
*/
public class GenerateWords {
public static void main(String[] args) {
Properties props = new Properties();
props.setProperty("bootstrap.servers","hadoop01:9092,hadoop02:9092,hadoop03:9092");
props.setProperty("key.serializer", StringSerializer.class.getName());
props.setProperty("value.serializer","org.apache.kafka.common.serialization.StringSerializer");
//發送數據的時候做應答
/**
* 取值範圍:[all,-1,0,1]
* 默認:1
* 0: leader不做任何應答
* 1: leader會給producer做應答
* -1,all: follower -> leader -> producer
*/
props.setProperty("acks","1");
//創建一個生產者得客戶端實例
KafkaProducer<String, String> kafkaProducer = new KafkaProducer<>(props);
while(true){
try {
Thread.sleep(500);
String key = UUID.randomUUID().toString();
//隨機生成一個單詞
int base=97;
int asi_code =new Random().nextInt(26)+base;
char word = (char)asi_code;
System.out.println("word="+word);
ProducerRecord<String, String> record = new ProducerRecord<>("wordcount", key, word + "");
kafkaProducer.send(record);
System.out.println("record="+record);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
}
package kafka
import java.lang
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.SparkConf
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
import org.apache.spark.streaming.{Seconds, StreamingContext}
/**
* 消費自定義的數據(單詞)
*/
object ConsumerWords {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("consumerwords").setMaster("local[*]")
//每2秒拉取一次數據
val ssc = new StreamingContext(conf,Seconds(2))
//定義一個消費組id
val groupid ="day_001"
//配置參數
val kafkaParams = Map[String, Object](
"bootstrap.servers" -> "hadoop01:9092,hadoop02:9092,hadoop03:9092",
"key.deserializer" -> classOf[StringDeserializer],
"value.deserializer" -> classOf[StringDeserializer],
"group.id" -> groupid,
"auto.offset.reset" -> "earliest",
//"auto.commit.interval.ms"-> "1000",設置爲1秒提交一次offset,默認是5秒
"enable.auto.commit" -> (false: lang.Boolean) //是否自動遞交偏移量
)
//創建kafka
val stream = KafkaUtils.createDirectStream(
ssc,
LocationStrategies.PreferConsistent,
ConsumerStrategies.Subscribe[String, String](Array("wordcount"), kafkaParams)
)
stream.foreachRDD(rdd=>{
rdd.map(crd=>(crd.value(),1)).reduceByKey(_+_).foreach(println(_))
})
ssc.start()
ssc.awaitTermination()
}
}
該方案不能夠累計過去的單詞個數,可以藉助updateStateBykey算子實現.