Storm集成HBase、JDBC、Kafka、Hive

代碼參上

/**
 * storm集成Kafka、Hive、JDBC、HBase、HDFS
 * Created by sker on 17-11-13
 * kafka集成storm，將數據發到JobBolt做中文分詞邏輯；
 * 結果發到不同bolt，然後分別存入hive、hbase、mysql和hdfs
 */
public class SegGoGo {

    public static void main(String[] args) {

        //創建一個TopologyBuilder實例
        TopologyBuilder topologyBuilder = new TopologyBuilder();
        LocalCluster localCluster = new LocalCluster();
        Config conf = new Config();

        /**
         * 以下是kafka到storm的邏輯
         */

        //kafka與storm集成需要一個zkHost和一個SpoutConfig
        ZkHosts zkHosts = new ZkHosts("localhost:2181");
        SpoutConfig spoutConfig = new SpoutConfig(zkHosts, "hbase", "/storm", "kafka");

        /**
         * 以下代碼要做的是storm與HDFS集成
         */

        //kafka與HDFS集成需要一個HDFSBolt,並進行相應參數的設定
        HdfsBolt hdfsBolt = new HdfsBolt()
                .withFsUrl("hdfs://localhost:9000/")//設置hdfs的url
                .withRecordFormat(new DelimitedRecordFormat().withFieldDelimiter(","))//設置文件分割符
                .withSyncPolicy(new CountSyncPolicy(10))//同步政策
                .withFileNameFormat(new DefaultFileNameFormat().withPath("/test"))//文件命名格式，參數中設置了文件路徑
                .withRotationPolicy(new FileSizeRotationPolicy(1.0f, FileSizeRotationPolicy.Units.KB));//設置滾動生成文件的參數，此處爲1k生成一個文件

        /**
         * 以下代碼要做的是storm與hbase集成
         */

        //storm與hbase集成
        Config config = new Config();
        Map<String, Object> hbConf = new HashMap<String, Object>();
        hbConf.put("hbase.rootdir","hdfs://localhost:9000/sbsbsbs/hbase/");
        hbConf.put("hbase.zookeeper.quorum", "localhost:2181");
        config.put("hbase.conf", hbConf);

        SimpleHBaseMapper simpleHBaseMapper = new SimpleHBaseMapper()
                .withColumnFamily("cf")
                .withColumnFields(new Fields("word","count"))
                .withRowKeyField("word");

        HBaseBolt hBaseBolt = new HBaseBolt("demo",simpleHBaseMapper)
                .withConfigKey("hbase.conf");


        /**
         * 以下代碼要做的是storm與JDBC集成
         */
        Map hikariConfigMap = Maps.newHashMap();
        hikariConfigMap.put("dataSourceClassName","com.mysql.jdbc.jdbc2.optional.MysqlDataSource");
        hikariConfigMap.put("dataSource.url", "jdbc:mysql://localhost/test?useunicode=true&characterencoding=utf-8");
        hikariConfigMap.put("dataSource.user","root");
        hikariConfigMap.put("dataSource.password","1327");

        ConnectionProvider connectionProvider = new HikariCPConnectionProvider(hikariConfigMap);

        String tableName = "seg";
        JdbcMapper simpleJdbcMapper = new SimpleJdbcMapper(tableName, connectionProvider);

        JdbcInsertBolt insertBolt = new JdbcInsertBolt(connectionProvider, simpleJdbcMapper)
//                .withTableName("seg")     //沒卵用
                .withInsertQuery("insert into seg values (?,?)")
                .withQueryTimeoutSecs(30);

        JdbcInsertBolt selectBolt = new JdbcInsertBolt(connectionProvider, simpleJdbcMapper)
                .withInsertQuery("select word,count(word) from seg group by word")
                .withQueryTimeoutSecs(30);

        /**
         * 構建拓撲
         */
        //kafka到storm的spout，構建拓撲的第一步
        topologyBuilder.setSpout("kafka", new KafkaSpout(spoutConfig));
        //數據進入JobBolt做中文分詞處理
        topologyBuilder.setBolt("document",new JobBolt.GetDocument()).shuffleGrouping("kafka");
        topologyBuilder.setBolt("wordCount",new JobBolt.StringToWordCount()).shuffleGrouping("document");
        //數據插入mysql
        topologyBuilder.setBolt("jdbc_insert",insertBolt).shuffleGrouping("wordCount");
        //查詢mysql
        topologyBuilder.setBolt("jdbc_select",selectBolt).shuffleGrouping("jdbc_insert");
        //數據存入HDFS
        topologyBuilder.setBolt("hdfs",hdfsBolt).shuffleGrouping("jdbc_select");
        //數據存入HBase
        topologyBuilder.setBolt("hbase",hBaseBolt).shuffleGrouping("wordCount");


        localCluster.submitTopology("SegGoGo",config,topologyBuilder.createTopology());

storm與hive的集成

/**
 * storm和hive集成比較麻煩，不適合word_segmentation包裏一起做聯合測試，
 * 而且storm和HDFS集成很簡單，可以直接storm-hdfs然後load到hive表
 *
 *storm跟hive集成，需要修改hive配置，包括開啓自動分區、設置metadate的uris、設置jdbc以及開啓hive.in.test（參考文件爲同包下hive-site.xml）；
 * 確保實際環境的hive版本和代碼中的jar包版本一致；確保metadate和hiveserver2開啓
 *
 * 本測試的hive建表語句
 * create table demo (id int,name string,sex string) partitioned by (age int) clustered by (id) into 3 buckets stored as orc tblproperties ("orc.compress"="NONE",'transactional'='true');
 *
 * storm-hive集成真的很煩，稍不注意就會失敗，而且調錯更煩，有興趣的可以自己測試，希望你能成功，哈哈
 */
public class Storm2Hive {
    static class Storm_Hive_Spout extends BaseRichSpout {
        SpoutOutputCollector spoutOutputCollector;
        String[] name = {"aa","bb","cc","dd","ee","ff","gg","hh"};
        String[] sex = {"man","woman"};
        int[] id = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};

        Random random = new Random();

        public void open(Map map, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) {
            this.spoutOutputCollector=spoutOutputCollector;
        }

        public void nextTuple() {
            Utils.sleep(1000);

            String s = name[random.nextInt(name.length)];
            String sex1 = sex[random.nextInt(sex.length)];
            int id1 = id[random.nextInt(id.length)];
            spoutOutputCollector.emit(new Values(id1,s,sex1,"18"));
            System.out.println(""+id1+":"+s+":"+sex1);

        }

        public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
            outputFieldsDeclarer.declare(new Fields("id","name","sex","age"));
        }
    }

    public static void main(String[] args) {


        DelimitedRecordHiveMapper delimitedRecordHiveMapper = new DelimitedRecordHiveMapper();//映射字段，spout那邊發來的
        delimitedRecordHiveMapper.withColumnFields(new Fields("id","name","sex"))
                .withPartitionFields(new Fields("age"));

        HiveOptions hiveOptions = new HiveOptions("thrift://localhost:9083","default","demo",delimitedRecordHiveMapper);
        hiveOptions.withTxnsPerBatch(10)
                .withBatchSize(20)
                .withIdleTimeout(10);

        HiveBolt hiveBolt = new HiveBolt(hiveOptions);

        TopologyBuilder topologyBuilder = new TopologyBuilder();
        topologyBuilder.setSpout("spout",new Storm_Hive_Spout());
        topologyBuilder.setBolt("bolt",hiveBolt).shuffleGrouping("spout");

        LocalCluster localCluster = new LocalCluster();
        localCluster.submitTopology("go",new Config(),topologyBuilder.createTopology());

完整代碼參見github：https://github.com/ZzzzZzreal/StormGoGo/tree/master/src/main/java

storm集成Hive

storm_hive

Storm集成HBase、JDBC、Kafka、HDFS

word_segmentation