Flink - 數據源

從集合中讀取

    private static void radFromCollection(String[] args) throws Exception {
        //將參數轉成對象
        MultipleParameterTool params = MultipleParameterTool.fromArgs(args);
        //創建批處理執行環境
//        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        //創建流程處理
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        //設置每個算子的的並行度,默認爲cup核數(測試環境下)
        env.setParallelism(2);
        //設置最大並行度
        env.setMaxParallelism(6);

        //從集合中讀取
        List<String> collectionData = Arrays.asList("a", "b", "c", "d");
        DataStreamSource<String> dataStreamSource = env.fromCollection(collectionData);
        //從數組中讀取
        // env.fromElements("a", "b", "c", "d");
        dataStreamSource.print(); //dataStreamSource.addSink(new PrintSinkFunction<>());

        env.execute();
    }

從文件中讀取

    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        DataStreamSource<String> dataStreamSource = env.readTextFile("E:\\GIT\\flink-learn\\flink1\\word.txt", "utf-8");
        dataStreamSource.print();
        env.execute();

從kafka 中讀取

StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        Properties properties = new Properties();
        properties.put("bootstrap.servers", "10.1.5.130:9092");
        properties.put("zookeeper.connect", "10.2.5.135:2181");
        properties.put("group.id", "my-flink");
        properties.put("auto.offset.reset", "latest");
        properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
        properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");

        FlinkKafkaConsumer010<String> kafkaConsumer010 = new FlinkKafkaConsumer010<>(
                "flink",// topic
                new SimpleStringSchema(),
                properties
        );
        DataStreamSource<String> dataStreamSource = env.addSource(kafkaConsumer010);
        dataStreamSource.print();
        env.execute();

從自定義Source 中讀取

  • 實現 org.apache.flink.streaming.api.functions.source.SourceFunction
 public static final class MyDataSource implements SourceFunction<String> {

        private Boolean running = true;

        @Override
        public void run(SourceContext<String> sourceContext) throws Exception {
            Random random = new Random();
            while (running) {
                double data = random.nextDouble() * 100;
                sourceContext.collectWithTimestamp(String.valueOf(data), System.currentTimeMillis());
                TimeUnit.SECONDS.sleep(1);
            }

        }

        @Override
        public void cancel() {
            this.running = false;
        }
    }
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        DataStreamSource<String> dataStreamSource = env.addSource(new MyDataSource());
        dataStreamSource.print();
        env.execute();

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章