首先 jstorm得概念請參考官網:點擊打開鏈接,官網實例:點擊打開鏈接
運行Jstorm可分爲本地調試和分佈式環境
1.先說分佈式環境,首先搭建zookeeper集羣點擊打開鏈接
2.搭建kafka集羣點擊打開鏈接
3.搭建jstorm集羣點擊打開鏈接
4.開始貼代碼
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.skycomm</groupId>
<artifactId>demo</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>com.alibaba.jstorm</groupId>
<artifactId>jstorm-core</artifactId>
<version>2.1.1</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-nop</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-jdk14</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>jcl-over-slf4j</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-kafka</artifactId>
<version>0.9.6</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_2.11</artifactId>
<version>0.9.0.1</version>
<exclusions>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>1.7.1</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<transformers>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
自定義Bolt入口import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import com.esotericsoftware.minlog.Log;
import java.io.Serializable;
import java.util.Map;
/**
* Title:
* <p>
* Description:TODO
* <p>
* Copyright:Copyright(c)2005
* <p>
* Company:
* <p>
* Author:lishuangjiang
* <p>
* Date:2018/4/12 10:10
*/
public class MyBolt implements IRichBolt,Serializable {
private static final long serialVersionUID = 1L;
OutputCollector collector;
public void execute(Tuple input) {
try {
String string = input.getString(0);
System.out.println(string+"................................");
collector.ack(input);
} catch (Exception e) {
collector.fail(input);
Log.error("解析數據異常", e);
e.printStackTrace();
}
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields());
}
@SuppressWarnings("rawtypes")
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this.collector = collector;
}
public void cleanup() {
}
public Map<String, Object> getComponentConfiguration() {
return null;
}
}
自定義Topology入口
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.spout.SchemeAsMultiScheme;
import backtype.storm.topology.TopologyBuilder;
import storm.kafka.KafkaSpout;
import storm.kafka.SpoutConfig;
import storm.kafka.StringScheme;
import storm.kafka.ZkHosts;
import java.util.ArrayList;
import java.util.List;
/**
* Title:
* <p>
* Description:TODO
* <p>
* Copyright:Copyright(c)2005
* <p>
* Company:skycomm.com.cn
* <p>
* Author:lishuangjiang
* <p>
* Date:2018/4/12 10:00
*/
public class MyTopology {
public static void main(String[] args) throws InterruptedException {
String brokerZkStr = "119.23.20.*:2181,120.77.200.*:2181,39.108.5.*:2181";
String brokerZkPath = "/brokers";
//消費kafka得top
String topic = "testTopic";
String offset = "";
//id 可以隨意命名
String id = "testTopic";
Integer workerNumSpout = 3;
Integer workerNumBolt = 3;
Integer maxSpoutPending = 2000;
if(args.length > 1){
topic = args[1];
}
if(args.length > 2){
workerNumSpout = Integer.parseInt(args[2]);
workerNumBolt = Integer.parseInt(args[3]);
}
if(args.length > 4){
maxSpoutPending = Integer.parseInt(args[4]);
}
ZkHosts zk = new ZkHosts(brokerZkStr,brokerZkPath);
SpoutConfig spoutConf = new SpoutConfig(zk, topic,
offset,
id);
List<String> zkServices = new ArrayList<String>();
for(String str : zk.brokerZkStr.split(",")){
zkServices.add(str.split(":")[0]);
}
spoutConf.zkServers = zkServices;
spoutConf.zkPort = 2181;
spoutConf.forceFromStart = false;
spoutConf.socketTimeoutMs = 60 * 1000;
spoutConf.scheme = new SchemeAsMultiScheme(new StringScheme());
TopologyBuilder builder = new TopologyBuilder();
// Kafka我們創建了一個5分區的Topic,這裏並行度設置爲5
builder.setSpout("data", new KafkaSpout(spoutConf), workerNumSpout);
builder.setBolt("analyze", new MyBolt(), workerNumBolt) .shuffleGrouping("data");
Config config = new Config();
config.setDebug(false);
config.setNumWorkers(workerNumSpout);
config.setMaxSpoutPending(1);
config.setNumAckers(0);
config.setDebug(false);
if(maxSpoutPending > 0){
config.setMaxSpoutPending(maxSpoutPending);
}
System.out.println(" topic = " + topic + " workerNumSpout = " + workerNumSpout +
" workerNumBolt = " + workerNumBolt + " maxSpoutPending = " + maxSpoutPending);
if(args.length>0){
try {
// args有參數時在分佈式上提交任務
StormSubmitter.submitTopology(args[0], config, builder.createTopology());
} catch (Exception e) {
e.printStackTrace();
}
}else{
// args沒有參數時在本地提交任務
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("ImsiTopology", config, builder.createTopology());
}
}
}
5.打成jar 上傳到分佈式環境運行
# jstorm jar XXXX.jar com.li.test.MyTopology myTopic
xxxx.jar 爲打包後的jar
com.alibaba.xxxx.xx 爲入口類,即提交任務的類
parameter即爲提交參數
6.本地環境調試
註釋掉上圖配置運行即可。
最後奉上源碼地址點擊打開鏈接