测试阶段采用了三台虚拟机,一台为master,两台为slave,已经在集群安装好了flink,kafka,zookeeper,redis。flink没有采用高可用模式,所以不需要hadoop,需要注意redis因为没有安装集群模式,是在每一台机器上安装的,这样的话导致master中的redis不会被用到,而任务开始后,slave1或者slave2中的redis是由flink的任务来选择的。
服务开启zookeeper->kafka->flink
master进程:
slave进程:
1.代码编写
代码部分分为两块:
- 生产数据,往kafka的topic中写入,数据样式:T0,A7598,2019-08-07 18:23:20:038
- flink读取kafka数据再存入redis
第一部分,kafka:
项目目录
pom文件
<dependencies>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>0.11.0.3</version>
</dependency>
</dependencies>
代码
package com;
import org.apache.kafka.clients.producer.*;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Random;
import java.util.Properties;
/**
* 连接kafka将数据写入topic
* 保存至本地 data.txt
*/
public class KafkaProducerTest {
public static void main(String[] args) throws IOException {
//Kafka配置
Properties props = new Properties();
props.put("bootstrap.servers", "192.168.37.140:9092,192.168.37.141:9092,192.168.37.142:9092");
props.put("acks", "all");
props.put("retries", 0);
props.put("batch.size", 16384);
props.put("linger.ms", 1);
props.put("buffer.memory", 33554432);
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
Producer<String, String> producer = new KafkaProducer<>(props);
String fileName = "/home/hadoop/Test/data.txt"; //linux下的本地路径
File out = new File(fileName);
FileWriter fw = null;
try {
if (!out.exists()){
out.createNewFile();
}
System.out.println("开始写入");
fw = new FileWriter(out);
int len = 1000;
for (int i = 0; i < len; i++) {
//名字
String name ="T"+i;
//内容
String content = getContent();
//当前时间
String time = getDatetime();
//写入kafka
String value = String.format("%s,%s,%s", name, content, time);
//test4 kafka的topic
producer.send(new ProducerRecord<String, String>("test", value), new Callback() {
@Override
public void onCompletion(RecordMetadata metadata, Exception exception) {
if (exception != null) {
System.out.println("Failed to send message with exception " + exception);
}
}
});
//写入本地
fw.write(name +","+ content +","+time+"\r\n");
fw.flush();
Thread.sleep(100L);
}
System.out.println("完成写入");
} catch (InterruptedException e) {
e.printStackTrace();
}catch (IOException e) {
e.printStackTrace();
}finally {
fw.close();
}
producer.close();
}
//内容
public static String getContent(){
Random random=new Random();
String content = "A"+random.nextInt(10000);
return content;
}
//获取当前时间
public static String getDatetime(){
Date date = new Date();
SimpleDateFormat dateFormat= new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:SSS");
return dateFormat.format(date);
}
}
--------------------------------------------------------------------------------------------------------------------------------------
第二部分,flink:
项目目录
pom文件
<dependencies>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-clients -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_2.11</artifactId>
<version>1.4.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-streaming-java -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.11</artifactId>
<version>1.4.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-java -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>1.4.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-kafka-0.9 -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka-0.9_2.11</artifactId>
<version>1.4.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-redis_2.10</artifactId>
<version>1.1.5</version>
</dependency>
</dependencies>
代码部分
package com.scn;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer09;
import org.apache.flink.streaming.connectors.redis.RedisSink;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommand;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommandDescription;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisMapper;
import org.apache.flink.streaming.util.serialization.SimpleStringSchema;
import java.util.Properties;
/**flink连接kafka,读取topic中的数据,存入redis
*
*/
public class FilnkCostKafka {
public static void main(String[] args) throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.enableCheckpointing(5000);
System.out.println("===============》 开始读取kafka中的数据 ==============》");
Properties properties = new Properties();
properties.setProperty("bootstrap.servers", "master:9092,slave1:9092,slave2:9092");
properties.setProperty("zookeeper.connect", "master:2181,slave1:2181,slave2:2181");
properties.setProperty("group.id", "kafka_to_redis");
//flink连接kafka
FlinkKafkaConsumer09<String> myConsumer = new FlinkKafkaConsumer09<String>("test", new SimpleStringSchema(), properties);
DataStreamSource<String> kafkaData = env.addSource(myConsumer);
//解析kafka数据流 转化成固定格式数据流
SingleOutputStreamOperator<Tuple3<String, String, String>> userData = kafkaData.map(new MapFunction<String, Tuple3<String, String, String>>() {
@Override
public Tuple3<String, String, String> map(String s) throws Exception {
String[] split = s.split(",");
String userID = split[0];
String itemId = split[1];
String categoryId = split[2];
Tuple3<String, String, String> userInfo = new Tuple3<String, String, String>(userID, itemId, categoryId);
return userInfo;
}
});
//实例化Flink和Redis关联类FlinkJedisPoolConfig,设置Redis端口
FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig.Builder().setHost("127.0.0.1").build();
//实例化RedisSink,并通过flink的addSink的方式将flink计算的结果插入到redis
userData.addSink(new RedisSink<Tuple3<String, String, String>>(conf, new RedisExampleMapper()));
env.execute("WordCount from Kafka data");
}
//redisMap接口,设置key和value
public static final class RedisExampleMapper implements RedisMapper<Tuple3<String, String, String>> {
//设置数据使用的数据结构 HashSet 并设置key的名称
public RedisCommandDescription getCommandDescription() {
//RedisCommand.SET 指定存储类型
return new RedisCommandDescription(RedisCommand.SET, "flink");
}
/**
* 获取 value值 value的数据是键值对
* @param data
* @return
*/
//指定key
public String getKeyFromData(Tuple3<String, String, String> data) {
return "flink";
}
//指定value
public String getValueFromData(Tuple3<String, String, String> data) {
String Adata = data.f0+","+data.f1+","+data.f2;
return Adata;
}
}
}
2.使用IDEA工具将代码打成jar包,放入虚拟机中
3.创建kafka的topic
4.使用命令启动flink任务,运行flink.jar包
flink run -c com.scn.FilnkCostKafka flink.jar
在web界面上可以看见slave1在执行任务
5.java命令执行kafkajar,写入数据到kafka中
java -cp *: com.KafkaProducerTest kafka.jar
6.进入对应slave下面redis中查看数据