手撕項目-Flink電商實時分析五

目錄

 

Hbase搭建

Hbase工具類

添加Pv(訪問量),Uv(獨立訪客)相關邏輯

修改map邏輯

修改reduce邏輯

總結


Hbase搭建

下載hbase-1.0.0-cdh5.5.1.tar.gz,解壓並修改配置文件

修改conf/hbase-env.sh的java環境變量

修改conf/hbase-site.xml

<configuration>
  <property>
    <name>hbase.rootdir</name>
    <value>hdfs://master:9000/hbase</value>
  </property>
  <property>
    <name>hbase.cluster.distributed</name>
    <value>true</value>
  </property>
  <property>
    <name>hbase.zookeeper.quorum</name>
    <value>master</value>
  </property>
  <property>
    <name>dfs.replication</name>
    <value>1</value>
  </property>
</configuration>

 

修改conf/regionservers 

修改爲master

啓動

訪問:

至此存儲搭建完成,比較順利

Hbase工具類

添加jar包

        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>1.2.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-server</artifactId>
            <version>1.0.0-cdh5.5.1</version>
        </dependency>

 

Hbase上創建表

添加HbaseUtil工具類,可以去文末github查看

 

 

添加Pv(訪問量),Uv(獨立訪客)相關邏輯

修改map邏輯

public class PindaopvuvMap implements FlatMapFunction<KafkaMessage, PidaoPvUv> {


    @Override
    public void flatMap(KafkaMessage value, Collector<PidaoPvUv> out) throws Exception {
        String jsonstring = value.getJsonmessage();
        long timestamp = value.getTimestamp();
        String hourtimestamp = DateUtil.getDateby(timestamp,"yyyyMMddhh");//小時
        String daytimestamp = DateUtil.getDateby(timestamp,"yyyyMMdd");//天
        String monthtimestamp = DateUtil.getDateby(timestamp,"yyyyMM");//月

        UserscanLog userscanLog = JSON.parseObject(jsonstring, UserscanLog.class);
        long pingdaoid = userscanLog.getPingdaoid();
        long userid = userscanLog.getUserid();
        //從hbase中獲取用戶的狀態信息
        UserState userState = PdvisterDao.getUserSatebyvistertime(userid+"",timestamp);
        boolean isFirsthour = userState.isFisrthour();
        boolean isFisrtday = userState.isFisrtday();
        boolean isFisrtmonth = userState.isFisrtmonth();

        PidaoPvUv pidaoPvUv = new PidaoPvUv();
        pidaoPvUv.setPingdaoid(pingdaoid);
        pidaoPvUv.setUserid(userid);
        pidaoPvUv.setPvcount(Long.valueOf(value.getCount()+""));
        pidaoPvUv.setUvcount(isFirsthour==true?1l:0l);
        pidaoPvUv.setTimestamp(timestamp);
        pidaoPvUv.setTimestring(hourtimestamp);
        pidaoPvUv.setGroupbyfield(hourtimestamp+pingdaoid);
        out.collect(pidaoPvUv);
        System.out.println("小時=="+pidaoPvUv);

        //天
        pidaoPvUv.setUvcount(isFisrtday==true?1l:0l);
        pidaoPvUv.setGroupbyfield(daytimestamp+pingdaoid);
        pidaoPvUv.setTimestring(daytimestamp);
        out.collect(pidaoPvUv);
        System.out.println("天=="+pidaoPvUv);
        //月
        pidaoPvUv.setUvcount(isFisrtmonth==true?1l:0l);
        pidaoPvUv.setGroupbyfield(monthtimestamp+pingdaoid);
        pidaoPvUv.setTimestring(monthtimestamp);
        out.collect(pidaoPvUv);
        System.out.println("月=="+pidaoPvUv);
    }
}

修改reduce邏輯

public class PindaopvuvReduce implements ReduceFunction<PidaoPvUv> {

    @Override
    public PidaoPvUv reduce(PidaoPvUv value1, PidaoPvUv value2) throws Exception {
        System.out.println( "value1=="+value1);
        System.out.println( "value2=="+value2);
        long pingdaoid = value1.getPingdaoid();
        long timestampvalue = value1.getTimestamp();
        String timestring = value1.getTimestring();
        long pvcountvalue1 = value1.getPvcount();
        long uvcountvalue1 = value1.getUvcount();

        long pvcountvalue2 = value2.getPvcount();
        long uvcountvalue2 = value2.getUvcount();

        PidaoPvUv pidaoPvUv = new PidaoPvUv();
        pidaoPvUv.setPingdaoid(pingdaoid);
        pidaoPvUv.setTimestamp(timestampvalue);
        pidaoPvUv.setTimestring(timestring);
        pidaoPvUv.setPvcount(pvcountvalue1+pvcountvalue2);
        pidaoPvUv.setUvcount(uvcountvalue1+uvcountvalue2);
        System.out.println( "recuduce --pidaoPvUv=="+pidaoPvUv);
        return  pidaoPvUv;
    }
}

本地測試類​​​​​​​

public static void main(String[] args) {
        System.setProperty("hadoop.home.dir","D:\\soft\\hadoop-2.6.0-cdh5.5.1\\hadoop_dll2.6.0");
        args = new String[]{"--input-topic","test1","--bootstrap.servers","111.231.99.181:9092",
                "--zookeeper.connect","111.231.99.181:2181","--group.id","myconsumer1","--winsdows.size","50"};

        final ParameterTool parameterTool = ParameterTool.fromArgs(args);

        if (parameterTool.getNumberOfParameters() < 5) {
            System.out.println("Missing parameters!\n" +
                    "Usage: Kafka --input-topic <topic>" +
                    "--bootstrap.servers <kafka brokers> " +
                    "--zookeeper.connect <zk quorum> --group.id <some id>");
            return;
        }

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        env.getConfig().disableSysoutLogging();
        env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(4, 10000));
        env.enableCheckpointing(5000); // create a checkpoint every 5 seconds
        env.getConfig().setGlobalJobParameters(parameterTool); // make parameters available in the web interface
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);


        FlinkKafkaConsumer010  flinkKafkaConsumer = new FlinkKafkaConsumer010<KafkaMessage>(parameterTool.getRequired("input-topic"), new KafkaMessageSchema(), parameterTool.getProperties());
        DataStream<KafkaMessage> input = env.addSource(flinkKafkaConsumer.assignTimestampsAndWatermarks(new KafkaMessageWatermarks()));
        DataStream<PidaoPvUv> map = input.flatMap(new PindaopvuvMap());
        DataStream<PidaoPvUv> reduce = map.keyBy("groupbyfield").countWindow(Long.valueOf(parameterTool.getRequired("winsdows.size"))).reduce(new PindaopvuvReduce());
        reduce.print();
        try {
            env.execute("pindaossfx");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

 

    後續將計算結果存入Hbase,暫時在本地測試,本地測試需要添加hadoop相關文件。

 

總結

       昨天的bug是因爲hosts文件修改的問題。

       後續將放在服務器上直接測試,由於服務器內存較小,測試條件有限,後續將擴容內存。後續將完善頻道新鮮度和頻道瀏覽地區分佈分析。

具體代碼可參照我的git項目地址,現有代碼均已通過測試可以使用,後續會持續更新,直到項目結束,不懂的細節,可以關注公衆號,後臺留言,會細緻解答。

git地址:https://github.com/jyqjyq/filnkDS.git​​​​​​​

​​​​​​​

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章