Flink 同步kafka 數據寫入hbase

package com.sitesh.test;
import java.io.*;
import java.util.Arrays;
import java.util.List;
import java.util.Properties;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.state.filesystem.FsStateBackend;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.CheckpointConfig.ExternalizedCheckpointCleanup;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010;
import org.apache.flink.util.Collector;
import org.apache.kafka.common.serialization.StringDeserializer;

public class KafkaToHbaseMain {
    private static final Properties props = new Properties();
    //初始化配置文件
    public static void init(String path)
            throws Exception
    {
        InputStream input = new FileInputStream(new File(path));
        try
        {
            props.load(input);
        }
        catch (IOException e)
        {
            e.printStackTrace();
        }
        finally
        {
            input.close();
        }
    }

    public static final class LineSplitter
            implements FlatMapFunction<String, Tuple2<String, Integer>>
    {
        private static final long serialVersionUID = 1L;

        public void flatMap(String value, Collector<Tuple2<String, Integer>> out)
        {
            String[] tokens = value.toLowerCase().split("\\W+");
            for (String token : tokens) {
                if (token.length() > 0) {
                    out.collect(new Tuple2(token, Integer.valueOf(1)));
                }
            }
        }
    }

    public static void main(String[] args)
            throws Exception
    {
        if (args.length != 1)
        {
            System.err.println("Usage: DirectKafkaToSinkUFlink <config file> ");
            System.exit(1);
        }
        try
        {
        init(args[0]);
    }
        catch (Exception e)
        {
            e.printStackTrace();
        }
        //
        String brokers = props.getProperty("kafka.bootstrap.servers").trim();
        String groupId = props.getProperty("kafka.group.id").trim();
        String topics = props.getProperty("kafka.R.topics").trim();


        String hbase_zk_quorum = props.getProperty("hbase.zookeeper.quorum").trim();
        String hbase_zk_port = props.getProperty("hbase.zookeeper.property.clientPort").trim();
        String tableName = props.getProperty("hbase.table.name").trim();

        String keyIDPos = props.getProperty("record.keyID.field.Pos").trim();
        String routeIDPos = props.getProperty("record.routeID.field.Pos").trim();


        String optypePos = props.getProperty("record.op.type.field.Pos").trim();


        String checkpoint = props.getProperty("dfs.flink.checkpoint.path").trim();



        props.clear();
        props.put("bootstrap.servers", brokers);
        props.put("group.id", groupId);
        props.put("enable.auto.commit", Boolean.valueOf(true));
        props.put("auto.commit.interval.ms", Integer.valueOf(5000));
        props.put("auto.offset.reset", "earliest");
        props.put("session.timeout.ms", Integer.valueOf(10000));
        props.put("key.deserializer", StringDeserializer.class);
        props.put("value.deserializer", StringDeserializer.class);
        List<String> R_topics = Arrays.asList(topics.split(","));


        Configuration conf = new Configuration();
        conf.setString("hbase.zookeeper.quorum", hbase_zk_quorum);
        conf.setString("hbase.zookeeper.property.clientPort", hbase_zk_port);
        conf.setString("tableName", tableName);
        conf.setString("keyIDPos", keyIDPos);
        conf.setString("routeIDPos", routeIDPos);
        conf.setString("optypePos", optypePos);


        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        env.enableCheckpointing(2000L);

        env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);

        env.getCheckpointConfig().setCheckpointTimeout(60000L);
        env.getCheckpointConfig().setMaxConcurrentCheckpoints(1);
        env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);


        env.setStateBackend(new FsStateBackend(checkpoint));

        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

        env.getConfig().setGlobalJobParameters(conf);




        FlinkKafkaConsumer010<String> consumer = new FlinkKafkaConsumer010(R_topics, new SimpleStringSchema(), props);





        consumer.setCommitOffsetsOnCheckpoints(true);
        consumer.setStartFromGroupOffsets();



        DataStream<String> stream = env.addSource(consumer);





        stream.addSink(new HbaseSink());
        try
        {
            env.execute("KafkaToSink_" + tableName);
        }
        catch (Exception ex)
        {
            Logger.getLogger(KafkaToHbaseMain.class.getName()).log(Level.SEVERE, null, ex);
            ex.printStackTrace();
        }
    }
}

 

--------------------------

package com.sitesh.test;

import java.io.IOException;
import java.io.Serializable;

import com.sitesh.constant.PropertiesConstants;
import com.sitesh.util.HbaseUtil;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;


public class HbaseSink<IN>
        extends RichSinkFunction<String>
        implements Serializable {
    private static final Log LOG = LogFactory.getLog(HbaseSink.class);
    private HbaseUtil hbaseUtil;
    public TableName tableName;
    public Connection conn;
    public int keyIDPos;
    public int routeIDPos;
    public int optypePos;
    private Table htable;


    public void open(Configuration param)
            throws Exception {
        super.open(param);


        Configuration globConf = (Configuration) getRuntimeContext().getExecutionConfig().getGlobalJobParameters();


        String hbase_zk_quorum = globConf.getString("hbase.zookeeper.quorum", "");
        String hbase_zk_port = globConf.getString("hbase.zookeeper.property.clientPort", "11001");
        this.tableName = TableName.valueOf(globConf.getString("tableName", "test"));
        this.keyIDPos = (globConf.getInteger("keyIDPos", 1) + 9);
        this.routeIDPos = (globConf.getInteger("routeIDPos", 28) + 9);
        this.optypePos = globConf.getInteger("optypePos", 4);

        org.apache.hadoop.conf.Configuration conf = HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum", hbase_zk_quorum);
        conf.set("hbase.zookeeper.property.clientPort", hbase_zk_port);
        conf.setInt("hbase.rpc.timeout", 20000);
        conf.setInt("hbase.client.operation.timeout", 30000);
        conf.setInt("hbase.client.scanner.timeout.period", 200000);
        this.conn = ConnectionFactory.createConnection(conf);
        this.htable = this.conn.getTable(this.tableName);
        this.htable.setWriteBufferSize(2097152L);
    }

    public void invoke(String s)
            throws IOException, IOException {
        if ((s.contains("\n")) || (s.contains("\r")) || (s.contains("\r\n"))) {
            s = s.trim();
        }
        String[] str = s.split(String.valueOf(PropertiesConstants.SEP));


        String keyID = str[(this.keyIDPos - 1)];
        //rowkey 反轉
        StringBuilder sb1 = new StringBuilder(keyID);
        keyID = sb1.reverse().toString();

        String routeID = str[(this.routeIDPos - 1)];
        String rowkey = keyID + "|" + routeID;
        String op_type = str[(this.optypePos - 1)];


        int index = 0;
        for (int count = 0; count < 9; count++) {
            if ((index = s.indexOf(PropertiesConstants.SEP, index)) != -1) {
                index += "\007".length();
            }
        }
        String value = StringUtils.substring(s, index);
        if ((op_type.toUpperCase().equals("INSERT")) || (op_type.toUpperCase().equals("UPDATE"))) {
            Put put = new Put(Bytes.toBytes(rowkey));
            put.addColumn(Bytes.toBytes("c1"), Bytes.toBytes("f1"), Bytes.toBytes(value));
            this.htable.put(put);
        } else if (op_type.toUpperCase().equals("DELETE")) {
            Delete de = new Delete(Bytes.toBytes(rowkey));
            this.htable.delete(de);
        }
    }


    public void close()
            throws Exception {
        if (this.htable != null) {
            this.htable.close();
        }
        if (this.conn != null) {
            this.conn.close();
        }
    }
}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章