結合 fieldsGrouping分組模式的一個jstorm使用實例,指定分組模式可以保證被指定的字段,如果相同值 則tuple會分配給同個task處理。例如 一個tuple有兩個字段field1、field2,如果指定field1爲分組字段,現在有兩個tuple分別是tuple1、tuple2,tuple1中filed1值爲a、filed2值爲b,tuple2的filed1值爲a、filed2值爲c。則tuple1和tuple2會被分到相同的線程處理
1.創建spout數據源,實例中是不斷從Redis中拉取數據
import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
import com.demo.config.SpringBeans;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.data.redis.core.RedisTemplate;
import java.util.Map;
import java.util.UUID;
public class MySpout extends BaseRichSpout {
private SpoutOutputCollector collector;
private String componentId;
private int taskId;
private RedisTemplate<String,Object> redisTemplate;
private static final Logger logger = LoggerFactory.getLogger(MySpout.class);
public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
this.collector = collector;
componentId = context.getThisComponentId();
this.taskId = context.getThisTaskId();
redisTemplate = SpringBeans.getBean("redisTemplate",RedisTemplate.class);
}
public void nextTuple() {
try {
Object value = redisTemplate.opsForList().leftPop("student");
logger.info("MySpout nextTuple componentId:{} taskId:{} value:{}",componentId,taskId,value);
//value = "{\"name\":\"zhangsan\",\"address\":\"hangzhou\"}"
if (null == value){
Thread.sleep(1000);
return;
}
String uuid = UUID.randomUUID().toString();
collector.emit(new Values(value),uuid);
}catch (Exception e){
e.printStackTrace();
}
}
//設置輸出流的字段
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("student"));
}
}
2.創建第一個bolt
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Map;
public class FirstBolt implements IRichBolt {
private Logger logger = LoggerFactory.getLogger(FirstBolt.class);
private OutputCollector collector;
private Integer taskId;
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this.collector = collector;
this.taskId = context.getThisTaskId();
}
public void execute(Tuple input) {
try {
Map<String,String> student = (Map<String, String>) input.getValue(0);
//student = "{\"name\":\"zhangsan\",\"address\":\"hangzhou\"}"
String address = student.get("address");
//address = "hangzhou"
collector.emit(input,new Values(address,student));
//上面input(old tuple)和 new Values(new tuple)關聯一起,才能保證後續子tuple的ack對spout自作用
logger.info("FirstBolt execute taskId:{} value:{}",taskId,student);
}catch (Exception e){
e.printStackTrace();
}
}
public void cleanup() {
}
//設置輸出流的字段
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("address","student"));
//address 是分組字段。 new Fields 字段跟上面 execute 中collector.emit 的new Values字段對應
}
public Map<String, Object> getComponentConfiguration() {
return null;
}
}
3。創建第二個bolt
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Tuple;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Map;
public class SecondBolt extends BaseRichBolt {
private Logger logger = LoggerFactory.getLogger(SecondBolt.class);
private OutputCollector outputCollector;
private Integer taskId;
@Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this.outputCollector = collector;
this.taskId = context.getThisTaskId();
}
@Override
public void execute(Tuple input) {
try {
String address = input.getString(0);
//address="hangzhou"
Map<String,String> student = (Map<String, String>) input.getValue(1);
//student = "{\"name\":\"zhangsan\",\"address\":\"hangzhou\"}"
logger.info("SecondBolt execute taskId:{} address:{} student:{}",taskId,address,student);
// outputCollector.ack(input); //BaseRichBolt 自動 ack 。IRichBolt需要使用者手動調用ack
}catch (Exception e){
e.printStackTrace();
}
}
@Override
public void cleanup() {
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
}
@Override
public Map<String, Object> getComponentConfiguration() {
return null;
}
}
4.生成我們的topology
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.topology.SpoutDeclarer;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.tuple.Fields;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.autoconfigure.jdbc.JdbcTemplateAutoConfiguration;
import java.util.HashMap;
import java.util.Map;
@SpringBootApplication(exclude = {JdbcTemplateAutoConfiguration.class},
scanBasePackages = "com.demo")
public class MyTopology {
public static void main(String[] args) {
SpringApplication.run(MyTopology.class,args);
Map conf = new HashMap();
//topology所有自定義的配置均放入這個Map
TopologyBuilder builder = new TopologyBuilder();
//創建topology的生成器
int spoutParal = 1;
//spout的併發設置
builder.setSpout("my-spout", new MySpout(), spoutParal);
//創建Spout, 其中new MySpout() 爲真正spout對象,"my-spout" 爲spout的名字,注意名字中不要含有空格
int boltParal = 3;
//bolt的併發設置
builder.setBolt("first-bolt", new FirstBolt(), boltParal)
.localOrShuffleGrouping("my-spout");
//創建bolt, "first-bolt" 爲bolt名字,new FirstBolt( 爲bolt對象,boltParal爲bolt併發數,
//shuffleGrouping("my-spout"),表示接收"my-spout"的數據,並且以shuffle方式,
//即每個spout隨機輪詢發送tuple到下一級bolt中
builder.setBolt("second-bolt",new SecondBolt(),boltParal)
.fieldsGrouping("first-bolt",new Fields("address"));
//創建bolt, "second-bolt" 爲bolt名字,new SecondBolt() 爲bolt對象,boltParal爲bolt併發數,
//fieldsGrouping("first-bolt",new Fields("address")),
//表示接收"first-bolt"的數據,並以fieldsGrouping方式,
//即每個上一級bolt(這裏既是 first-bolt),以"address"分組發送tuple到下一級bolt中
int ackerParal = 1;
Config.setNumAckers(conf, ackerParal);
//設置表示acker的併發數
int workerNum = 1;
conf.put(Config.TOPOLOGY_WORKERS, workerNum);
//表示整個topology將使用幾個worker
conf.put(Config.STORM_CLUSTER_MODE, "distributed");
//設置topolog模式爲分佈式,這樣topology就可以放到JStorm集羣上運行
try {
// StormSubmitter.submitTopology("first-topology", conf,
// builder.createTopology());
//本地模式
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("first-topology", conf,
builder.createTopology());
} catch (Exception e) {
e.printStackTrace();
}
//提交topology
}
}