上篇文章我們描述瞭如何通過繼承RichSourceFunction來實現自定義的Source。本篇中我們將繼續講述如何通過connector讀取源數據,並將讀取的數據存入到其他數據存儲系統中,主要的思路就是Flink作爲消費者來消費Kafka Topic中的數據,並將數據實時的插入到mysql/Hbase中。數據格式是模擬網站的點擊日誌:city,loginTime(登錄時間),OS(操作系統),phoneName(手機型號),下面進入正題
一、添加Maven依賴
<dependencies>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.46</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>1.7.2</version>
<!--編譯時使用,運行時並不使用,所以本地測試需要註釋掉-->
<!--<scope>provided</scope>-->
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.11</artifactId>
<version>1.7.2</version>
<!--<scope>provided</scope>-->
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka-0.11_2.11</artifactId>
<version>1.6.1</version>
<!--<scope>provided</scope>-->
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.51</version>
</dependency>
</dependencies>
二、Entity實體類
用於封裝kafka中接收數據的實體,這樣做的好處我們在《Flink自定義DataSource之MysqlSource》一文中解釋過這樣做的優勢了,這裏不做過多贅述
package com.xpu.kafkatomysql;
/**
* 實體類封裝
* create by xiax.xpu on @Date 2019/4/13 12:14
*/
public class Entity {
public String phoneName;
public String os;
public String city;
public String loginTime;
public Entity() {
}
public Entity(String phoneName, String os, String city, String loginTime) {
this.phoneName = phoneName;
this.os = os;
this.city = city;
this.loginTime = loginTime;
}
@Override
public String toString() {
return "Entity{" +
"phoneName='" + phoneName + '\'' +
", os='" + os + '\'' +
", city='" + city + '\'' +
", loginTime='" + loginTime + '\'' +
'}';
}
public String getPhoneName() {
return phoneName;
}
public void setPhoneName(String phoneName) {
this.phoneName = phoneName;
}
public String getOs() {
return os;
}
public void setOs(String os) {
this.os = os;
}
public String getCity() {
return city;
}
public void setCity(String city) {
this.city = city;
}
public String getLoginTime() {
return loginTime;
}
public void setLoginTime(String loginTime) {
this.loginTime = loginTime;
}
}
三、 模擬Kafka Producer 生產數據
這裏通過循環產生來模擬我們擬定的數據,相關代碼如下:
package com.xpu.kafkatomysql;
import com.alibaba.fastjson.JSON;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Properties;
/**
* Producer代碼,模擬從java生產數據至Kafka Topic
*
* create by xiax.xpu on @Date 2019/4/13 14:28
*/
public class KafkaCreateData {
public static final String topic = "kafka_flink_mysql";
public static String brokerList = "192.168.83.129:9092";
public static void createDate(){
Entity entity = new Entity();
Properties props = new Properties();
//聲明Kakfa相關信息
props.put("bootstrap.servers",brokerList);
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer"); //key 序列化
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer"); //value 序列化
props.put("request.required.acks", "1");
KafkaProducer producer = new KafkaProducer<String, String>(props);
//手機信號
String phoneArray[] = {"iPhone", "HUAWEI", "xiaomi", "moto", "vivo"};
//操作系統
String osArray[] = {"Android 7.0", "Mac OS", "Apple Kernel", "Windows","kylin OS","chrome"};
//城市
String cityArray[] = {"北京","上海","杭州","南京","西藏","西安","合肥","葫蘆島"};
//隨機產生一個手機型號
int k = (int) (Math.random() *5);
String phoneName = phoneArray[k];
//隨機產生一個os
int m = (int) (Math.random() *6);
String os = osArray[m];
//隨機產生一個城市地點
int n = (int) (Math.random() *8);
String city = cityArray[n];
//時間戳,存當前時間
SimpleDateFormat sf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String loginTime = sf.format(new Date());
//加載數據到實體中
entity.setCity(city);
entity.setLoginTime(loginTime);
entity.setOs(os);
entity.setPhoneName(phoneName);
ProducerRecord record = new ProducerRecord<String, String>(topic,JSON.toJSONString(entity));
producer.send(record);
System.out.println("發送數據:"+ JSON.toJSONString(entity));
}
public static void main(String[] args) throws InterruptedException{
while (true){
createDate();
Thread.sleep(500);
}
}
}
Kafka測試idea截圖
在linux的客戶端工具,來驗證Kafka 消費者。
./kafka-console-consumer.sh --bootstrap-server 192.168.83.129:9092 --topic kafka_flink_mysql
四、 自定義MysqlSink類
我們在這部分通過繼承RichSinkFunction類來實現自定的MysqlSink,重寫open()、invoke()、close()方法來實現獲取數據庫連接、往指定表中插入數據等操作。
package com.xpu.kafkatomysql;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
/**
* create by xiax.xpu on @Date 2019/4/13 14:38
*/
public class MysqlSink extends RichSinkFunction<Entity> {
private PreparedStatement ps=null;
private Connection connection=null;
String driver = "com.mysql.jdbc.Driver";
String url = "jdbc:mysql://192.168.0.102:3306/flinktest?useUnicode=true&characterEncoding=UTF-8";
String username = "sqoopuser";
String password = "sqoopuser";
/**
* open()方法建立連接
* 這樣不用每次 invoke 的時候都要建立連接和釋放連接
* @param parameters
* @throws Exception
*/
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
//加載JDBC驅動
Class.forName(driver);
//創建連接
connection = DriverManager.getConnection(url,username,password);
String sql = "insert into web_access (city,loginTime,os,phoneName) values (?,?,?,?);";
ps = connection.prepareStatement(sql);
}
/**
* 每插入一條數據的調用一次invoke
* @param value
* @param context
* @throws Exception
*/
@Override
public void invoke(Entity value, Context context) throws Exception {
ps.setString(1,value.city);
ps.setString(2,value.loginTime);
ps.setString(3,value.os);
ps.setString(4,value.phoneName);
System.out.println("insert into web_access (city,loginTime,os,phoneName values ("+value.city+","+value.loginTime+","+value.os+","+value.phoneName);
ps.executeUpdate();
}
@Override
public void close() throws Exception {
super.close();
if(connection != null){
connection.close();
}
if (ps != null){
ps.close();
}
}
}
五、Flink入口程序
package com.xpu.kafkatomysql;
import com.alibaba.fastjson.JSON;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import java.util.Properties;
/**
* create by xiax.xpu on @Date 2019/4/13 14:50
*/
public class FlinkSubmitter {
public static void main(String[] args) throws Exception{
//獲取運行時環境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//kafka配置文件
Properties props = new Properties();
props.put("bootstrap.servers", "192.168.83.129:9092");
props.put("zookeeper.connect","192.168.83.129:2181");
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); //key 反序列化
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); //value 反序列化
//這裏我們使用的是011版本,011 與 09 或者10 的區別在於,011支持Exactly-once語義
SingleOutputStreamOperator<Entity> StreamRecord = env.addSource(new FlinkKafkaConsumer011<>(
"kafka_flink_mysql",
new SimpleStringSchema(),//String 序列
props)).map(string -> JSON.parseObject(string, Entity.class)).setParallelism(1);
StreamRecord.addSink(new MysqlSink());
env.execute("KafkatoMysql");
}
}
六、測試
Kafka 發送的數據
MysqlSink獲取連接準備插入的數據
mysql數據表中的數據: