package com.sitesh.test;
import java.io.*;
import java.util.Arrays;
import java.util.List;
import java.util.Properties;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.state.filesystem.FsStateBackend;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.CheckpointConfig.ExternalizedCheckpointCleanup;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010;
import org.apache.flink.util.Collector;
import org.apache.kafka.common.serialization.StringDeserializer;
public class KafkaToHbaseMain {
private static final Properties props = new Properties();
//初始化配置文件
public static void init(String path)
throws Exception
{
InputStream input = new FileInputStream(new File(path));
try
{
props.load(input);
}
catch (IOException e)
{
e.printStackTrace();
}
finally
{
input.close();
}
}
public static final class LineSplitter
implements FlatMapFunction<String, Tuple2<String, Integer>>
{
private static final long serialVersionUID = 1L;
public void flatMap(String value, Collector<Tuple2<String, Integer>> out)
{
String[] tokens = value.toLowerCase().split("\\W+");
for (String token : tokens) {
if (token.length() > 0) {
out.collect(new Tuple2(token, Integer.valueOf(1)));
}
}
}
}
public static void main(String[] args)
throws Exception
{
if (args.length != 1)
{
System.err.println("Usage: DirectKafkaToSinkUFlink <config file> ");
System.exit(1);
}
try
{
init(args[0]);
}
catch (Exception e)
{
e.printStackTrace();
}
//
String brokers = props.getProperty("kafka.bootstrap.servers").trim();
String groupId = props.getProperty("kafka.group.id").trim();
String topics = props.getProperty("kafka.R.topics").trim();
String hbase_zk_quorum = props.getProperty("hbase.zookeeper.quorum").trim();
String hbase_zk_port = props.getProperty("hbase.zookeeper.property.clientPort").trim();
String tableName = props.getProperty("hbase.table.name").trim();
String keyIDPos = props.getProperty("record.keyID.field.Pos").trim();
String routeIDPos = props.getProperty("record.routeID.field.Pos").trim();
String optypePos = props.getProperty("record.op.type.field.Pos").trim();
String checkpoint = props.getProperty("dfs.flink.checkpoint.path").trim();
props.clear();
props.put("bootstrap.servers", brokers);
props.put("group.id", groupId);
props.put("enable.auto.commit", Boolean.valueOf(true));
props.put("auto.commit.interval.ms", Integer.valueOf(5000));
props.put("auto.offset.reset", "earliest");
props.put("session.timeout.ms", Integer.valueOf(10000));
props.put("key.deserializer", StringDeserializer.class);
props.put("value.deserializer", StringDeserializer.class);
List<String> R_topics = Arrays.asList(topics.split(","));
Configuration conf = new Configuration();
conf.setString("hbase.zookeeper.quorum", hbase_zk_quorum);
conf.setString("hbase.zookeeper.property.clientPort", hbase_zk_port);
conf.setString("tableName", tableName);
conf.setString("keyIDPos", keyIDPos);
conf.setString("routeIDPos", routeIDPos);
conf.setString("optypePos", optypePos);
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.enableCheckpointing(2000L);
env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
env.getCheckpointConfig().setCheckpointTimeout(60000L);
env.getCheckpointConfig().setMaxConcurrentCheckpoints(1);
env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
env.setStateBackend(new FsStateBackend(checkpoint));
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.getConfig().setGlobalJobParameters(conf);
FlinkKafkaConsumer010<String> consumer = new FlinkKafkaConsumer010(R_topics, new SimpleStringSchema(), props);
consumer.setCommitOffsetsOnCheckpoints(true);
consumer.setStartFromGroupOffsets();
DataStream<String> stream = env.addSource(consumer);
stream.addSink(new HbaseSink());
try
{
env.execute("KafkaToSink_" + tableName);
}
catch (Exception ex)
{
Logger.getLogger(KafkaToHbaseMain.class.getName()).log(Level.SEVERE, null, ex);
ex.printStackTrace();
}
}
}
--------------------------
package com.sitesh.test;
import java.io.IOException;
import java.io.Serializable;
import com.sitesh.constant.PropertiesConstants;
import com.sitesh.util.HbaseUtil;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
public class HbaseSink<IN>
extends RichSinkFunction<String>
implements Serializable {
private static final Log LOG = LogFactory.getLog(HbaseSink.class);
private HbaseUtil hbaseUtil;
public TableName tableName;
public Connection conn;
public int keyIDPos;
public int routeIDPos;
public int optypePos;
private Table htable;
public void open(Configuration param)
throws Exception {
super.open(param);
Configuration globConf = (Configuration) getRuntimeContext().getExecutionConfig().getGlobalJobParameters();
String hbase_zk_quorum = globConf.getString("hbase.zookeeper.quorum", "");
String hbase_zk_port = globConf.getString("hbase.zookeeper.property.clientPort", "11001");
this.tableName = TableName.valueOf(globConf.getString("tableName", "test"));
this.keyIDPos = (globConf.getInteger("keyIDPos", 1) + 9);
this.routeIDPos = (globConf.getInteger("routeIDPos", 28) + 9);
this.optypePos = globConf.getInteger("optypePos", 4);
org.apache.hadoop.conf.Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", hbase_zk_quorum);
conf.set("hbase.zookeeper.property.clientPort", hbase_zk_port);
conf.setInt("hbase.rpc.timeout", 20000);
conf.setInt("hbase.client.operation.timeout", 30000);
conf.setInt("hbase.client.scanner.timeout.period", 200000);
this.conn = ConnectionFactory.createConnection(conf);
this.htable = this.conn.getTable(this.tableName);
this.htable.setWriteBufferSize(2097152L);
}
public void invoke(String s)
throws IOException, IOException {
if ((s.contains("\n")) || (s.contains("\r")) || (s.contains("\r\n"))) {
s = s.trim();
}
String[] str = s.split(String.valueOf(PropertiesConstants.SEP));
String keyID = str[(this.keyIDPos - 1)];
//rowkey 反轉
StringBuilder sb1 = new StringBuilder(keyID);
keyID = sb1.reverse().toString();
String routeID = str[(this.routeIDPos - 1)];
String rowkey = keyID + "|" + routeID;
String op_type = str[(this.optypePos - 1)];
int index = 0;
for (int count = 0; count < 9; count++) {
if ((index = s.indexOf(PropertiesConstants.SEP, index)) != -1) {
index += "\007".length();
}
}
String value = StringUtils.substring(s, index);
if ((op_type.toUpperCase().equals("INSERT")) || (op_type.toUpperCase().equals("UPDATE"))) {
Put put = new Put(Bytes.toBytes(rowkey));
put.addColumn(Bytes.toBytes("c1"), Bytes.toBytes("f1"), Bytes.toBytes(value));
this.htable.put(put);
} else if (op_type.toUpperCase().equals("DELETE")) {
Delete de = new Delete(Bytes.toBytes(rowkey));
this.htable.delete(de);
}
}
public void close()
throws Exception {
if (this.htable != null) {
this.htable.close();
}
if (this.conn != null) {
this.conn.close();
}
}
}