jstorm 使用demo

結合 fieldsGrouping分組模式的一個jstorm使用實例,指定分組模式可以保證被指定的字段,如果相同值 則tuple會分配給同個task處理。例如 一個tuple有兩個字段field1、field2,如果指定field1爲分組字段,現在有兩個tuple分別是tuple1、tuple2,tuple1中filed1值爲a、filed2值爲b,tuple2的filed1值爲a、filed2值爲c。則tuple1和tuple2會被分到相同的線程處理

1.創建spout數據源,實例中是不斷從Redis中拉取數據

import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
import com.demo.config.SpringBeans;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.data.redis.core.RedisTemplate;

import java.util.Map;
import java.util.UUID;

public class MySpout extends BaseRichSpout {

    private SpoutOutputCollector collector;

    private String componentId;

    private int taskId;

    private RedisTemplate<String,Object> redisTemplate;

    private static final Logger logger = LoggerFactory.getLogger(MySpout.class);

    public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
        this.collector = collector;
        componentId = context.getThisComponentId();
        this.taskId = context.getThisTaskId();

        redisTemplate = SpringBeans.getBean("redisTemplate",RedisTemplate.class);
    }

    public void nextTuple() {
        try {
            Object value = redisTemplate.opsForList().leftPop("student");
            logger.info("MySpout nextTuple componentId:{} taskId:{} value:{}",componentId,taskId,value);
            //value = "{\"name\":\"zhangsan\",\"address\":\"hangzhou\"}"
            if (null == value){
                Thread.sleep(1000);
                return;
            }
            String uuid = UUID.randomUUID().toString();
            collector.emit(new Values(value),uuid);
        }catch (Exception e){
            e.printStackTrace();
        }

    }

    //設置輸出流的字段
    public void declareOutputFields(OutputFieldsDeclarer declarer) {
        declarer.declare(new Fields("student"));
    }
}

 

2.創建第一個bolt

import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Map;

public class FirstBolt implements IRichBolt {
    private Logger logger = LoggerFactory.getLogger(FirstBolt.class);

    private OutputCollector collector;

    private Integer taskId;

    public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
        this.collector = collector;
        this.taskId = context.getThisTaskId();
    }

    public void execute(Tuple input) {
        try {
            Map<String,String> student = (Map<String, String>) input.getValue(0);
            //student = "{\"name\":\"zhangsan\",\"address\":\"hangzhou\"}"
            String address = student.get("address");
            //address = "hangzhou"
            collector.emit(input,new Values(address,student));
            //上面input(old tuple)和 new Values(new tuple)關聯一起,才能保證後續子tuple的ack對spout自作用
            logger.info("FirstBolt execute taskId:{} value:{}",taskId,student);
        }catch (Exception e){
            e.printStackTrace();
        }

    }

    public void cleanup() {

    }

    //設置輸出流的字段
    public void declareOutputFields(OutputFieldsDeclarer declarer) {
        declarer.declare(new Fields("address","student"));
        //address 是分組字段。 new Fields 字段跟上面 execute 中collector.emit 的new Values字段對應
    }

    public Map<String, Object> getComponentConfiguration() {
        return null;
    }
}

3。創建第二個bolt

import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Tuple;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Map;

public class SecondBolt extends BaseRichBolt {

    private Logger logger = LoggerFactory.getLogger(SecondBolt.class);

    private OutputCollector outputCollector;

    private Integer taskId;
    @Override
    public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
        this.outputCollector = collector;
        this.taskId = context.getThisTaskId();
    }

    @Override
    public void execute(Tuple input) {
        try {
            String address = input.getString(0);
            //address="hangzhou"
            Map<String,String> student = (Map<String, String>) input.getValue(1);
            //student = "{\"name\":\"zhangsan\",\"address\":\"hangzhou\"}"
            logger.info("SecondBolt execute taskId:{} address:{} student:{}",taskId,address,student);
//            outputCollector.ack(input);  //BaseRichBolt 自動 ack 。IRichBolt需要使用者手動調用ack
        }catch (Exception e){
            e.printStackTrace();
        }

    }

    @Override
    public void cleanup() {

    }

    @Override
    public void declareOutputFields(OutputFieldsDeclarer declarer) {
    }

    @Override
    public Map<String, Object> getComponentConfiguration() {
        return null;
    }
}

4.生成我們的topology

import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.topology.SpoutDeclarer;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.tuple.Fields;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.autoconfigure.jdbc.JdbcTemplateAutoConfiguration;

import java.util.HashMap;
import java.util.Map;

@SpringBootApplication(exclude = {JdbcTemplateAutoConfiguration.class},
scanBasePackages = "com.demo")
public class MyTopology {
    public static void main(String[] args) {
        SpringApplication.run(MyTopology.class,args);

        Map conf = new HashMap();
        //topology所有自定義的配置均放入這個Map

        TopologyBuilder builder = new TopologyBuilder();
        //創建topology的生成器

        int spoutParal = 1;
        //spout的併發設置

        builder.setSpout("my-spout", new MySpout(), spoutParal);
        //創建Spout, 其中new MySpout() 爲真正spout對象,"my-spout" 爲spout的名字,注意名字中不要含有空格

        int boltParal = 3;
        //bolt的併發設置

        builder.setBolt("first-bolt", new FirstBolt(), boltParal)
                .localOrShuffleGrouping("my-spout");
        //創建bolt, "first-bolt" 爲bolt名字,new FirstBolt( 爲bolt對象,boltParal爲bolt併發數,
        //shuffleGrouping("my-spout"),表示接收"my-spout"的數據,並且以shuffle方式,
        //即每個spout隨機輪詢發送tuple到下一級bolt中


        builder.setBolt("second-bolt",new SecondBolt(),boltParal)
                .fieldsGrouping("first-bolt",new Fields("address"));

        //創建bolt, "second-bolt" 爲bolt名字,new SecondBolt() 爲bolt對象,boltParal爲bolt併發數,
        //fieldsGrouping("first-bolt",new Fields("address")),
        //表示接收"first-bolt"的數據,並以fieldsGrouping方式,
        //即每個上一級bolt(這裏既是 first-bolt),以"address"分組發送tuple到下一級bolt中

        int ackerParal = 1;
        Config.setNumAckers(conf, ackerParal);
        //設置表示acker的併發數

        int workerNum = 1;
        conf.put(Config.TOPOLOGY_WORKERS, workerNum);
        //表示整個topology將使用幾個worker

        conf.put(Config.STORM_CLUSTER_MODE, "distributed");
        //設置topolog模式爲分佈式,這樣topology就可以放到JStorm集羣上運行

        try {
//            StormSubmitter.submitTopology("first-topology", conf,
//                    builder.createTopology());

            
            //本地模式
            LocalCluster cluster = new LocalCluster();
            cluster.submitTopology("first-topology", conf,
                    builder.createTopology());

        } catch (Exception e) {
            e.printStackTrace();
        }
        //提交topology
    }
}

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章