object SourceDemo {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
//數據來源
//1.從文件讀取
val inpath="D:\\programs\\sparkPrograms\\FlinkProgarm\\src\\main\\resources\\hello.txt"
val stream1 = env.readTextFile(inpath)
//2.從socket流中讀取
val stream2 =env.socketTextStream("hadoop01",7777)
//3.從kafka中讀取
val properties=new Properties()
properties.setProperty("bootstrap.servers","hadoop01:9092")
properties.setProperty("group.id","cosumer-group")
properties.setProperty("key.deserializer","org.apache.kafka.common.serialization.StringDeserializer")
properties.setProperty("value.deserializer",
"org.apache.kafka.common.serialization.StringDeserializer")
properties.setProperty("auto.offset.reset", "latest")
val stream3 = env.addSource(new FlinkKafkaConsumer011[String]("sensor", new
SimpleStringSchema(), properties))
// stream3.print("stream3")
//4.自定義Source
val stream4: DataStream[SensorReading] = env.addSource(new MySensorSource())
stream4.print("Stream4")
env.execute()
}
case class SensorReading(id: String, timestamp: Long, temperature: Double)
class MySensorSource extends SourceFunction[SensorReading]{
//flag 表示數據源是否正常運行
var running:Boolean = true
override def run(sourceContext: SourceFunction.SourceContext[SensorReading]): Unit = {
//初始化一個隨機數發生器
val rand = new Random()
var curTemp=1.to(10).map(
//rand.nextGaussian() 正太分佈隨機數 範圍在正負2seigama之間
i=>("sensor_"+i,65+rand.nextGaussian()*20)
)
while (running){
//更新溫度值
curTemp = curTemp.map(
t=>(t._1,t._2+rand.nextGaussian())
)
//獲取當前時間戳
val curTime = System.currentTimeMillis()
curTemp.foreach(
//使用collect方法 將數據一條一條發送出去
t=>sourceContext.collect(SensorReading(t._1,curTime,t._2))
)
Thread.sleep(100)
}
}
override def cancel(): Unit = {
running=false
}
}
}