-
創建 maven 工程,pom 文件如下:
<dependencies> <dependency> <groupId>org.apache.storm</groupId> <artifactId>storm-core</artifactId> <version>1.0.3</version> <scope>provided</scope> </dependency> <!-- 與jdbc集成 --> <dependency> <groupId>org.apache.storm</groupId> <artifactId>storm-jdbc</artifactId> <version>1.0.3</version> <scope>provided</scope> </dependency> <dependency> <groupId>mysql</groupId> <artifactId>mysql-connector-java</artifactId> <version>5.1.43</version> </dependency> </dependencies>
-
Spout 任務代碼如下:
package storm; import java.util.Map; import java.util.Random; import org.apache.storm.spout.SpoutOutputCollector; import org.apache.storm.task.TopologyContext; import org.apache.storm.topology.OutputFieldsDeclarer; import org.apache.storm.topology.base.BaseRichSpout; import org.apache.storm.tuple.Fields; import org.apache.storm.tuple.Values; import org.apache.storm.utils.Utils; public class WordCountSpout extends BaseRichSpout { private static final long serialVersionUID = 1571765705181254611L; // 模擬數據 private String[] data = {"I love Beijing", "I love China", "Beijing is the capital of China"}; // 用於往下一個組件發送消息 private SpoutOutputCollector collector; public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) { this.collector = collector; } public void nextTuple() { Utils.sleep(3000); // 由Strom框架調用,用於接收外部數據源的數據 int random = (new Random()).nextInt(3); String sentence = data[random]; // 發送數據 System.out.println("發送數據:" + sentence); this.collector.emit(new Values(sentence)); } public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("sentence")); } }
-
用於分詞的 Bolt 任務代碼如下:
package storm; import java.util.Map; import org.apache.storm.task.OutputCollector; import org.apache.storm.task.TopologyContext; import org.apache.storm.topology.OutputFieldsDeclarer; import org.apache.storm.topology.base.BaseRichBolt; import org.apache.storm.tuple.Fields; import org.apache.storm.tuple.Tuple; import org.apache.storm.tuple.Values; public class WordCountSplitBolt extends BaseRichBolt { private static final long serialVersionUID = -7399165475264468561L; private OutputCollector collector; public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) { this.collector = collector; } public void execute(Tuple tuple) { String sentence = tuple.getStringByField("sentence"); // 分詞 String[] words = sentence.split(" "); for (String word : words) { this.collector.emit(new Values(word, 1)); } } public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("word", "count")); } }
-
用於計數的 Bolt 任務:
package storm; import java.util.HashMap; import java.util.Map; import org.apache.storm.task.OutputCollector; import org.apache.storm.task.TopologyContext; import org.apache.storm.topology.OutputFieldsDeclarer; import org.apache.storm.topology.base.BaseRichBolt; import org.apache.storm.tuple.Fields; import org.apache.storm.tuple.Tuple; import org.apache.storm.tuple.Values; public class WordCountBoltCount extends BaseRichBolt { private static final long serialVersionUID = -3206516572376524950L; private OutputCollector collector; private Map<String, Integer> result = new HashMap<String, Integer>(); public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) { this.collector = collector; } public void execute(Tuple tuple) { String word = tuple.getStringByField("word"); int count = tuple.getIntegerByField("count"); if (result.containsKey(word)) { result.put(word, result.get(word) + count); } else { result.put(word, 1); } // 直接輸出到屏幕 System.out.println("輸出的結果是:" + result); // 將統計結果插入到數據庫中 this.collector.emit(new Values(word, result.get(word))); } public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("word", "total")); } }
-
用於連接的 ConnectionProvider 的代碼如下:
package jdbc; import java.sql.Connection; import java.sql.DriverManager; import java.sql.SQLException; import org.apache.storm.jdbc.common.ConnectionProvider; public class MyConnectionProvider implements ConnectionProvider { private static final long serialVersionUID = -4784999115987415445L; private static String driver = "com.mysql.jdbc.Driver"; private static String url = "jdbc:mysql://qujianlei:3306/storm"; private static String user = "root"; private static String password = "123"; static { try { Class.forName(driver); } catch (ClassNotFoundException e) { e.printStackTrace(); } } public Connection getConnection() { try { return DriverManager.getConnection(url, user, password); } catch (SQLException e) { e.printStackTrace(); } return null; } public void prepare() { } public void cleanup() { } }
-
獲取 JdbcBolt 的工具類如下:
package jdbc; import org.apache.storm.jdbc.bolt.JdbcInsertBolt; import org.apache.storm.jdbc.common.ConnectionProvider; import org.apache.storm.jdbc.mapper.JdbcMapper; import org.apache.storm.jdbc.mapper.SimpleJdbcMapper; import org.apache.storm.topology.IRichBolt; public class JdbcBoltUtils { public static IRichBolt createJDBCBolt() { ConnectionProvider connectionProvider = new MyConnectionProvider(); JdbcMapper simpleJdbcMapper = new SimpleJdbcMapper("result", connectionProvider); return new JdbcInsertBolt(connectionProvider, simpleJdbcMapper). withTableName("result").withQueryTimeoutSecs(30); } }
注:result 爲表的名字,共有兩個字段:word, total
-
Topology 的代碼如下:
package storm; import org.apache.storm.Config; import org.apache.storm.LocalCluster; import org.apache.storm.generated.StormTopology; import org.apache.storm.topology.TopologyBuilder; import org.apache.storm.tuple.Fields; import jdbc.JdbcBoltUtils; public class WordCountTopology { public static void main(String[] args) throws Exception { TopologyBuilder builder = new TopologyBuilder(); // 設置任務的spout組件 builder.setSpout("wordcount_spout", new WordCountSpout()); // 設置任務的第一個bolt組件 builder.setBolt("wordcount_splitbolt", new WordCountSplitBolt()). shuffleGrouping("wordcount_spout"); // 設置任務的第二個bolt組件 builder.setBolt("wordcount_count", new WordCountBoltCount()). fieldsGrouping("wordcount_splitbolt", new Fields("word")); // 設置任務的第三個bolt組件將數據持久化到mysql builder.setBolt("wordcount_jdbcBolt", JdbcBoltUtils.createJDBCBolt()). shuffleGrouping("wordcount_count"); // 創建Topology任務 StormTopology wc = builder.createTopology(); Config config = new Config(); // 提交到本地運行 LocalCluster localCluster = new LocalCluster(); localCluster.submitTopology("mywordcount", config, wc); // 提交任務到Storm集羣運行 // StormSubmitter.submitTopology(args[0], config, wc); } }
-
右擊,運行即可(注:Eclipse 要以管理員身份啓動)。
關注我的微信公衆號(曲健磊的個人隨筆),觀看更多精彩內容: