Storm集成 JDBC

  • 創建 maven 工程,pom 文件如下:

    <dependencies>
    	<dependency>
    		<groupId>org.apache.storm</groupId>
    		<artifactId>storm-core</artifactId>
    		<version>1.0.3</version>
    		<scope>provided</scope>
    	</dependency>
    	<!-- 與jdbc集成 -->
    	<dependency>
    		<groupId>org.apache.storm</groupId>
    		<artifactId>storm-jdbc</artifactId>
    		<version>1.0.3</version>
    		<scope>provided</scope>
    	</dependency>
    	<dependency>
    		<groupId>mysql</groupId>
    		<artifactId>mysql-connector-java</artifactId>
    		<version>5.1.43</version>
    	</dependency>
    </dependencies>
    
  • Spout 任務代碼如下:

    package storm;
    
    import java.util.Map;
    import java.util.Random;
    
    import org.apache.storm.spout.SpoutOutputCollector;
    import org.apache.storm.task.TopologyContext;
    import org.apache.storm.topology.OutputFieldsDeclarer;
    import org.apache.storm.topology.base.BaseRichSpout;
    import org.apache.storm.tuple.Fields;
    import org.apache.storm.tuple.Values;
    import org.apache.storm.utils.Utils;
    
    public class WordCountSpout extends BaseRichSpout {
    
    	private static final long serialVersionUID = 1571765705181254611L;
    
    	// 模擬數據
    	private String[] data = {"I love Beijing", "I love China", "Beijing is the capital of China"};
    	
    	// 用於往下一個組件發送消息
    	private SpoutOutputCollector collector;
    	
    	public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
    		this.collector = collector;
    	}
    
    	public void nextTuple() {
    		Utils.sleep(3000);
    		// 由Strom框架調用,用於接收外部數據源的數據
    		int random = (new Random()).nextInt(3);
    		String sentence = data[random];
    		
    		// 發送數據
    		System.out.println("發送數據:" + sentence);
    		this.collector.emit(new Values(sentence));
    	}
    
    	public void declareOutputFields(OutputFieldsDeclarer declarer) {
    		declarer.declare(new Fields("sentence"));
    	}
    }
    
  • 用於分詞的 Bolt 任務代碼如下:

    package storm;
    
    import java.util.Map;
    
    import org.apache.storm.task.OutputCollector;
    import org.apache.storm.task.TopologyContext;
    import org.apache.storm.topology.OutputFieldsDeclarer;
    import org.apache.storm.topology.base.BaseRichBolt;
    import org.apache.storm.tuple.Fields;
    import org.apache.storm.tuple.Tuple;
    import org.apache.storm.tuple.Values;
    
    public class WordCountSplitBolt extends BaseRichBolt {
    
    	private static final long serialVersionUID = -7399165475264468561L;
    
    	private OutputCollector collector;
    	
    	public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
    		this.collector = collector;
    	}
    
    	public void execute(Tuple tuple) {
    		String sentence = tuple.getStringByField("sentence");
    		// 分詞
    		String[] words = sentence.split(" ");
    		for (String word : words) {
    			this.collector.emit(new Values(word, 1));
    		}
    	}
    
    	public void declareOutputFields(OutputFieldsDeclarer declarer) {
    		declarer.declare(new Fields("word", "count"));
    	}
    }
    
  • 用於計數的 Bolt 任務:

    package storm;
    
    import java.util.HashMap;
    import java.util.Map;
    
    import org.apache.storm.task.OutputCollector;
    import org.apache.storm.task.TopologyContext;
    import org.apache.storm.topology.OutputFieldsDeclarer;
    import org.apache.storm.topology.base.BaseRichBolt;
    import org.apache.storm.tuple.Fields;
    import org.apache.storm.tuple.Tuple;
    import org.apache.storm.tuple.Values;
    
    public class WordCountBoltCount extends BaseRichBolt {
    
    	private static final long serialVersionUID = -3206516572376524950L;
    
    	private OutputCollector collector;
    	
    	private Map<String, Integer> result = new HashMap<String, Integer>();
    	
    	public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
    		this.collector = collector;
    	}
    
    	public void execute(Tuple tuple) {
    		String word = tuple.getStringByField("word");
    		int count = tuple.getIntegerByField("count");
    		
    		if (result.containsKey(word)) {
    			result.put(word, result.get(word) + count);
    		} else {
    			result.put(word, 1);
    		}
    		// 直接輸出到屏幕
    		System.out.println("輸出的結果是:" + result);
    		
    		// 將統計結果插入到數據庫中
    		this.collector.emit(new Values(word, result.get(word)));
    	}
    
    	public void declareOutputFields(OutputFieldsDeclarer declarer) {
    		declarer.declare(new Fields("word", "total"));
    	}
    }
    
  • 用於連接的 ConnectionProvider 的代碼如下:

    package jdbc;
    
    import java.sql.Connection;
    import java.sql.DriverManager;
    import java.sql.SQLException;
    
    import org.apache.storm.jdbc.common.ConnectionProvider;
    
    public class MyConnectionProvider implements ConnectionProvider {
    
    	private static final long serialVersionUID = -4784999115987415445L;
    
    	private static String driver = "com.mysql.jdbc.Driver";
    	
    	private static String url = "jdbc:mysql://qujianlei:3306/storm";
    	
    	private static String user = "root";
    	
    	private static String password = "123";
    	
    	static {
    		try {
    			Class.forName(driver);
    		} catch (ClassNotFoundException e) {
    			e.printStackTrace();
    		}
    	}
    
    	public Connection getConnection() {
    		try {
    			return DriverManager.getConnection(url, user, password);
    		} catch (SQLException e) {
    			e.printStackTrace();
    		}
    		return null;
    	}
    
    	public void prepare() {
    	}
    	
    	public void cleanup() {
    	}
    }
    
  • 獲取 JdbcBolt 的工具類如下:

    package jdbc;
    
    import org.apache.storm.jdbc.bolt.JdbcInsertBolt;
    import org.apache.storm.jdbc.common.ConnectionProvider;
    import org.apache.storm.jdbc.mapper.JdbcMapper;
    import org.apache.storm.jdbc.mapper.SimpleJdbcMapper;
    import org.apache.storm.topology.IRichBolt;
    
    public class JdbcBoltUtils {
    	public static IRichBolt createJDBCBolt() {
    		ConnectionProvider connectionProvider = new MyConnectionProvider();
    		JdbcMapper simpleJdbcMapper = new SimpleJdbcMapper("result", connectionProvider);
    		return new JdbcInsertBolt(connectionProvider, simpleJdbcMapper).
    			withTableName("result").withQueryTimeoutSecs(30);
    	}
    }
    

    注:result 爲表的名字,共有兩個字段:word, total

  • Topology 的代碼如下:

    package storm;
    
    import org.apache.storm.Config;
    import org.apache.storm.LocalCluster;
    import org.apache.storm.generated.StormTopology;
    import org.apache.storm.topology.TopologyBuilder;
    import org.apache.storm.tuple.Fields;
    
    import jdbc.JdbcBoltUtils;
    
    public class WordCountTopology {
    
    	public static void main(String[] args) throws Exception {
    		TopologyBuilder builder = new TopologyBuilder();
    		
    		// 設置任務的spout組件
    		builder.setSpout("wordcount_spout", new WordCountSpout());
    		
    		// 設置任務的第一個bolt組件
    		builder.setBolt("wordcount_splitbolt", new WordCountSplitBolt()).
    			shuffleGrouping("wordcount_spout");
    		
    		// 設置任務的第二個bolt組件
    		builder.setBolt("wordcount_count", new WordCountBoltCount()).
    			fieldsGrouping("wordcount_splitbolt", new Fields("word"));
    		
    		// 設置任務的第三個bolt組件將數據持久化到mysql
    		builder.setBolt("wordcount_jdbcBolt", JdbcBoltUtils.createJDBCBolt()).
    			shuffleGrouping("wordcount_count");
    		
    		// 創建Topology任務
    		StormTopology wc = builder.createTopology();
    		
    		Config config = new Config();
    		
    		// 提交到本地運行
    		LocalCluster localCluster = new LocalCluster();
    		localCluster.submitTopology("mywordcount", config, wc);
    		
    		// 提交任務到Storm集羣運行
    //		StormSubmitter.submitTopology(args[0], config, wc);
    	}
    }
    
  • 右擊,運行即可(注:Eclipse 要以管理員身份啓動)。

關注我的微信公衆號(曲健磊的個人隨筆),觀看更多精彩內容:
在這裏插入圖片描述

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章