系統版本 windows7
JDK 11.07
kafka 2.12-2.1.1
zookeeper 3.4.13
Flink 1.10.0
需要安裝kafka、zookeeper、flink
1.進入到kafka安裝目錄,啓動kafka
cd D:\usually\kafka\kafka_2.12-2.5.0
bin\windows\zookeeper-server-start.bat config\zookeeper.properties
bin\windows\kafka-server-start.bat config\server.properties
2.進入zookeeper安裝目錄,啓動zookeeper
cd D:\usually\zookeeper\apache-zookeeper-3.5.8-bin\bin
點擊zkServer.cmd
3.進入flink安裝目錄,啓動flink
cd D:\flink\flink-1.10.0\bin
點擊start-cluster.bat
4.從新打開個CMD窗口運行flinkjob代碼稍後補上,去消費kafka數據,
bin\flink.bat run -c FlinkStream.KafkaFlinkStream D:\testData\FlinkOnKafka-1.0-SNAPSHOT.jar test D:\testData
5.在idea上運行KafkaProducerTest 代碼,將數據寫入kafka
依賴:
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>nd.baron</groupId>
<artifactId>FlinkOnKafka</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>1.10.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_2.12</artifactId>
<version>1.10.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.12</artifactId>
<version>1.10.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka_2.12</artifactId>
<version>1.10.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka-0.11_2.12</artifactId>
<version>1.10.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-log4j12 -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.30</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.1.0</version>
<configuration>
<createDependencyReducedPom>false</createDependencyReducedPom>
</configuration>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<transformers>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<!--如果要打包的話,這裏要換成對應的 main class-->
<mainClass>FlinkStream.KafkaFlinkStream</mainClass>
</transformer>
<transformer
implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
<resource>reference.conf</resource>
</transformer>
</transformers>
<filters>
<filter>
<artifact>*:*:*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>7</source>
<target>7</target>
<encoding>utf8</encoding>
</configuration>
</plugin>
</plugins>
</build>
<repositories>
<repository>
<id>spring-snapshots</id>
<name>Spring Snapshots</name>
<url>https://repo.spring.io/snapshot</url>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository>
<repository>
<id>spring-milestones</id>
<name>Spring Milestones</name>
<url>https://repo.spring.io/milestone</url>
</repository>
</repositories>
<pluginRepositories>
<pluginRepository>
<id>spring-snapshots</id>
<name>Spring Snapshots</name>
<url>https://repo.spring.io/snapshot</url>
<snapshots>
<enabled>true</enabled>
</snapshots>
</pluginRepository>
<pluginRepository>
<id>spring-milestones</id>
<name>Spring Milestones</name>
<url>https://repo.spring.io/milestone</url>
</pluginRepository>
</pluginRepositories>
</project>
Flink的消費代碼
package KafkaExtr;
import org.apache.kafka.clients.producer.*;
import java.util.Properties;
public final class KafkaProducerTest {
public static void main(String[] args) throws Exception {
Properties props = new Properties();
props.put("bootstrap.servers", "localhost:9092");
props.put("acks", "all");
props.put("retries", 0);
props.put("batch.size", 16384);
props.put("linger.ms", 1);
props.put("buffer.memory", 33554432);
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
Producer<Object, String> producer = new KafkaProducer<Object, String>(props);
int totalMessageCount = 10000;
for (int i = 0; i < totalMessageCount; i++) {
String value = String.format("%d,%s,%d", System.currentTimeMillis(), "YSS-PC", currentMemSize());
System.out.println("發送數據-->"+value);
producer.send(new ProducerRecord<Object, String>("demo", value), new Callback() {
@Override
public void onCompletion(RecordMetadata recordMetadata, Exception e) {
if (null != e) {
System.out.println("Failed to send message with exception " + e);
}
}
});
Thread.sleep(1000);
}
producer.close();
}
private static long currentMemSize() {
return MemoryUsageExtrator.currentFreeMemorySizeInBytes();
}
}
package KafkaExtr;
import com.sun.management.OperatingSystemMXBean;
import java.lang.management.ManagementFactory;
public final class MemoryUsageExtrator {
private static final OperatingSystemMXBean mxBean =
(OperatingSystemMXBean) ManagementFactory.getOperatingSystemMXBean();
/**
* Get current free memory size in bytes
* @return free RAM size
*/
static long currentFreeMemorySizeInBytes() {
OperatingSystemMXBean osmxb = ManagementFactory.getPlatformMXBean(OperatingSystemMXBean.class);
return osmxb.getFreePhysicalMemorySize();
}
}
7.其中flinkjob的代碼如下:
package FlinkStream;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.windowing.WindowFunction;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import org.apache.flink.util.Collector;
import java.util.Properties;
public final class KafkaFlinkStream {
public static void main(String[] args) throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.enableCheckpointing(5000); // 非常關鍵,一定要設置啓動檢查點!!
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
Properties props = new Properties();
props.setProperty("bootstrap.servers", "localhost:9092");
props.setProperty("group.id", "demo");
props.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); //key 反序列化
props.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.setProperty("auto.offset.reset", "latest"); //value 反序列化
FlinkKafkaConsumer011<String> consumer =new FlinkKafkaConsumer011<String>(
"demo", //kafka topic 這裏改成需要的kafka主題
new SimpleStringSchema(), // String 序列化
props);
//設置水位線
consumer.assignTimestampsAndWatermarks(new MessageWaterEmitter());
DataStream<Tuple2<String, Long>> keyedStream = env
.addSource(consumer)
.flatMap(new MessageSplitter())
.keyBy(0)
.timeWindow(Time.seconds(10))
//10秒統計數據並做均值計算
.apply(new WindowFunction<Tuple2<String, Long>, Tuple2<String, Long>, Tuple, TimeWindow>() {
@Override
public void apply(Tuple key, TimeWindow window, Iterable<Tuple2<String, Long>> input, Collector<Tuple2<String, Long>> out) {
long sum = 0L;
int count = 0;
for (Tuple2<String, Long> record: input) {
sum += record.f1;
count++;
}
Tuple2<String, Long> result = input.iterator().next();
result.f1 = sum / count;
out.collect(result);
}
});
//key流寫入文件 參數一 args[0]
keyedStream.writeAsText(args[0]);
env.execute("Flink-Kafka sample");
}
}
package FlinkStream;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.util.Collector;
public class MessageSplitter implements FlatMapFunction<String, Tuple2<String, Long>> {
@Override
public void flatMap(String value, Collector<Tuple2<String, Long>> out) throws Exception {
if (null != value && value.contains(",")) {
String[] parts = value.split(",");
out.collect(new Tuple2<String, Long>(parts[1], Long.parseLong(parts[2])));
}
}
}
package FlinkStream;
import org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks;
import org.apache.flink.streaming.api.watermark.Watermark;
import javax.annotation.Nullable;
public class MessageWaterEmitter implements AssignerWithPunctuatedWatermarks<String> {
@Nullable
@Override
public Watermark checkAndGetNextWatermark(String lastElement, long extractedTimestamp) {
if (null != lastElement && lastElement.contains(",")) {
String[] parts = lastElement.split(",");
return new Watermark(Long.parseLong(parts[0]));
}
return null;
}
@Override
public long extractTimestamp(String element, long previousElementTimestamp) {
if (null != element && element.contains(",")) {
String[] parts = element.split(",");
return Long.parseLong(parts[0]);
}
return 0L;
}
}