代碼參上
/**
* storm集成Kafka、Hive、JDBC、HBase、HDFS
* Created by sker on 17-11-13
* kafka集成storm,將數據發到JobBolt做中文分詞邏輯;
* 結果發到不同bolt,然後分別存入hive、hbase、mysql和hdfs
*/
public class SegGoGo {
public static void main(String[] args) {
//創建一個TopologyBuilder實例
TopologyBuilder topologyBuilder = new TopologyBuilder();
LocalCluster localCluster = new LocalCluster();
Config conf = new Config();
/**
* 以下是kafka到storm的邏輯
*/
//kafka與storm集成需要一個zkHost和一個SpoutConfig
ZkHosts zkHosts = new ZkHosts("localhost:2181");
SpoutConfig spoutConfig = new SpoutConfig(zkHosts, "hbase", "/storm", "kafka");
/**
* 以下代碼要做的是storm與HDFS集成
*/
//kafka與HDFS集成需要一個HDFSBolt,並進行相應參數的設定
HdfsBolt hdfsBolt = new HdfsBolt()
.withFsUrl("hdfs://localhost:9000/")//設置hdfs的url
.withRecordFormat(new DelimitedRecordFormat().withFieldDelimiter(","))//設置文件分割符
.withSyncPolicy(new CountSyncPolicy(10))//同步政策
.withFileNameFormat(new DefaultFileNameFormat().withPath("/test"))//文件命名格式,參數中設置了文件路徑
.withRotationPolicy(new FileSizeRotationPolicy(1.0f, FileSizeRotationPolicy.Units.KB));//設置滾動生成文件的參數,此處爲1k生成一個文件
/**
* 以下代碼要做的是storm與hbase集成
*/
//storm與hbase集成
Config config = new Config();
Map<String, Object> hbConf = new HashMap<String, Object>();
hbConf.put("hbase.rootdir","hdfs://localhost:9000/sbsbsbs/hbase/");
hbConf.put("hbase.zookeeper.quorum", "localhost:2181");
config.put("hbase.conf", hbConf);
SimpleHBaseMapper simpleHBaseMapper = new SimpleHBaseMapper()
.withColumnFamily("cf")
.withColumnFields(new Fields("word","count"))
.withRowKeyField("word");
HBaseBolt hBaseBolt = new HBaseBolt("demo",simpleHBaseMapper)
.withConfigKey("hbase.conf");
/**
* 以下代碼要做的是storm與JDBC集成
*/
Map hikariConfigMap = Maps.newHashMap();
hikariConfigMap.put("dataSourceClassName","com.mysql.jdbc.jdbc2.optional.MysqlDataSource");
hikariConfigMap.put("dataSource.url", "jdbc:mysql://localhost/test?useunicode=true&characterencoding=utf-8");
hikariConfigMap.put("dataSource.user","root");
hikariConfigMap.put("dataSource.password","1327");
ConnectionProvider connectionProvider = new HikariCPConnectionProvider(hikariConfigMap);
String tableName = "seg";
JdbcMapper simpleJdbcMapper = new SimpleJdbcMapper(tableName, connectionProvider);
JdbcInsertBolt insertBolt = new JdbcInsertBolt(connectionProvider, simpleJdbcMapper)
// .withTableName("seg") //沒卵用
.withInsertQuery("insert into seg values (?,?)")
.withQueryTimeoutSecs(30);
JdbcInsertBolt selectBolt = new JdbcInsertBolt(connectionProvider, simpleJdbcMapper)
.withInsertQuery("select word,count(word) from seg group by word")
.withQueryTimeoutSecs(30);
/**
* 構建拓撲
*/
//kafka到storm的spout,構建拓撲的第一步
topologyBuilder.setSpout("kafka", new KafkaSpout(spoutConfig));
//數據進入JobBolt做中文分詞處理
topologyBuilder.setBolt("document",new JobBolt.GetDocument()).shuffleGrouping("kafka");
topologyBuilder.setBolt("wordCount",new JobBolt.StringToWordCount()).shuffleGrouping("document");
//數據插入mysql
topologyBuilder.setBolt("jdbc_insert",insertBolt).shuffleGrouping("wordCount");
//查詢mysql
topologyBuilder.setBolt("jdbc_select",selectBolt).shuffleGrouping("jdbc_insert");
//數據存入HDFS
topologyBuilder.setBolt("hdfs",hdfsBolt).shuffleGrouping("jdbc_select");
//數據存入HBase
topologyBuilder.setBolt("hbase",hBaseBolt).shuffleGrouping("wordCount");
localCluster.submitTopology("SegGoGo",config,topologyBuilder.createTopology());
storm與hive的集成
/**
* storm和hive集成比較麻煩,不適合word_segmentation包裏一起做聯合測試,
* 而且storm和HDFS集成很簡單,可以直接storm-hdfs然後load到hive表
*
*storm跟hive集成,需要修改hive配置,包括開啓自動分區、設置metadate的uris、設置jdbc以及開啓hive.in.test(參考文件爲同包下hive-site.xml);
* 確保實際環境的hive版本和代碼中的jar包版本一致;確保metadate和hiveserver2開啓
*
* 本測試的hive建表語句
* create table demo (id int,name string,sex string) partitioned by (age int) clustered by (id) into 3 buckets stored as orc tblproperties ("orc.compress"="NONE",'transactional'='true');
*
* storm-hive集成真的很煩,稍不注意就會失敗,而且調錯更煩,有興趣的可以自己測試,希望你能成功,哈哈
*/
public class Storm2Hive {
static class Storm_Hive_Spout extends BaseRichSpout {
SpoutOutputCollector spoutOutputCollector;
String[] name = {"aa","bb","cc","dd","ee","ff","gg","hh"};
String[] sex = {"man","woman"};
int[] id = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
Random random = new Random();
public void open(Map map, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) {
this.spoutOutputCollector=spoutOutputCollector;
}
public void nextTuple() {
Utils.sleep(1000);
String s = name[random.nextInt(name.length)];
String sex1 = sex[random.nextInt(sex.length)];
int id1 = id[random.nextInt(id.length)];
spoutOutputCollector.emit(new Values(id1,s,sex1,"18"));
System.out.println(""+id1+":"+s+":"+sex1);
}
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
outputFieldsDeclarer.declare(new Fields("id","name","sex","age"));
}
}
public static void main(String[] args) {
DelimitedRecordHiveMapper delimitedRecordHiveMapper = new DelimitedRecordHiveMapper();//映射字段,spout那邊發來的
delimitedRecordHiveMapper.withColumnFields(new Fields("id","name","sex"))
.withPartitionFields(new Fields("age"));
HiveOptions hiveOptions = new HiveOptions("thrift://localhost:9083","default","demo",delimitedRecordHiveMapper);
hiveOptions.withTxnsPerBatch(10)
.withBatchSize(20)
.withIdleTimeout(10);
HiveBolt hiveBolt = new HiveBolt(hiveOptions);
TopologyBuilder topologyBuilder = new TopologyBuilder();
topologyBuilder.setSpout("spout",new Storm_Hive_Spout());
topologyBuilder.setBolt("bolt",hiveBolt).shuffleGrouping("spout");
LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology("go",new Config(),topologyBuilder.createTopology());
完整代碼參見github:https://github.com/ZzzzZzreal/StormGoGo/tree/master/src/main/java
storm集成Hive
Storm集成HBase、JDBC、Kafka、HDFS