Flink Table API和SQL的分析及使用(二)

案例:读取Csv文件中的内容,打印到控制台

a.csv
zs,15
ww,18
ls,20

package com.example;

import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.StreamTableEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.sources.CsvTableSource;
import org.apache.flink.table.sources.TableSource;

/**
 * 读取Csv文件中的内容,打印到控制台
 */
package com.example;

import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.StreamTableEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.sources.CsvTableSource;
import org.apache.flink.table.sources.TableSource;

/**
 * 读取Csv文件中的内容,打印到控制台
 */
public class FlinkTableCsv {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        StreamTableEnvironment tableEnv = StreamTableEnvironment.getTableEnvironment(env);

        //创建TableSource
        String[] fieldsName = {"name", "age"};
        TypeInformation[] fieldsType = {Types.STRING, Types.INT};
        TableSource csvSource = new CsvTableSource("src/main/resources/a.csv", fieldsName, fieldsType);

        //注册TableSource
        tableEnv.registerTableSource("student", csvSource);
        //查询语句
        Table student = tableEnv.sqlQuery("select * from student");
        //转换成DataStream
        DataStream<Student> csvStream = ((org.apache.flink.table.api.java.StreamTableEnvironment) tableEnv).toAppendStream(student, Student.class);
        csvStream.print().setParallelism(1);

        env.execute("csvStream");
    }
}

案例:读取student.txt文件中的单词并对其进行统计,计算每个单词出现的总次数,并把结果写入到student.csv文件中

student.txt
zhangsangsang,18
wangwu,20
liliuliu,17

package com.example;

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.operators.DataSource;
import org.apache.flink.core.fs.FileSystem;
import org.apache.flink.table.api.BatchTableEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.TableEnvironment;
import org.apache.flink.table.api.Types;
import org.apache.flink.table.sinks.CsvTableSink;

/**
 * 读取student.txt文件中的单词并对其进行统计,计算每个单词出现的总次数,并把结果写入到student.csv文件中
 */
public class FlinkTableCount {
    public static void main(String[] args) throws Exception {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);

        //读数据源,进行map操作转为DataSet格式数据
        DataSource<String> dataSource = env.readTextFile("src/main/resources/student.txt");
        DataSet<Student> inputData = dataSource.map(new MapFunction<String, Student>() {
            @Override
            public Student map(String value) throws Exception {
                String[] splits = value.split(",");
                return new Student(splits[0], Integer.parseInt(splits[1]));
            }
        });

        //将DataSet转化为Table
        Table table = ((org.apache.flink.table.api.java.BatchTableEnvironment) tableEnv).fromDataSet(inputData);
        //注册表
        tableEnv.registerTable("student", table);

        //执行sql
        Table sqlQuery = tableEnv.sqlQuery("select count(1),avg(age) from student");

        //创建csvtablesink
        CsvTableSink csvTableSink = new CsvTableSink("src/main/resources/student.csv",",",1, FileSystem.WriteMode.OVERWRITE);
        //注册csvtablesink
        tableEnv.registerTableSink("csvOutPutTable",
                new String[]{"count","avg_age"},
                new TypeInformation[]{Types.LONG(), Types.INT()},
                csvTableSink);

        //把结果添加到csvtablesink中
        sqlQuery.insertInto("csvOutPutTable");

        env.execute("sql-batch");
    }
}

Student实体类

package com.example;

public class Student {
    private String name;
    private int age;
    public Student(){}

    public Student(String name, int age) {
        this.name = name;
        this.age = age;
    }

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public int getAge() {
        return age;
    }

    public void setAge(int age) {
        this.age = age;
    }

    @Override
    public String toString() {
        return "name:" + name + ",age:" + age ;
    }
}

pom文件

    <dependencies>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-java</artifactId>
            <version>1.7.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java_2.11</artifactId>
            <version>1.7.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-kafka-0.11_2.11</artifactId>
            <version>1.7.2</version>
        </dependency>
        <!--flink table核心包-->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table_2.11</artifactId>
            <version>1.7.2</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-scala_2.11</artifactId>
            <version>1.7.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-scala_2.11</artifactId>
            <version>1.7.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-core</artifactId>
            <version>1.7.2</version>
        </dependency>
    </dependencies>
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章