目錄
hive中建表
在hive中創建與業務數據一樣的表
CREATE TABLE cartinfo(userid string, productid string,num string,productamount string,createtime string,mechartid string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
...
sqoop
安裝sqoop來同步業務數據到hive
下載sqoop-1.4.6.bin__hadoop-2.0.4-alpha.tar.gz
解壓之後,將mysql驅動包放在sqoop的lib目錄下
執行如下命令同步數據
sqoop import --connect jdbc:mysql://master:3306/ds --username root --password 123456 --table cartinfo --fields-terminated-by '\t' --null-string '**' --target-dir /user/hive/warehouse/cartinfo/1 --hive-table cartinfo --m 1 --hive-import
...
可得如下結果:
同步數據完成。
flink batch實現產品成交分析
詳細代碼可參考文末github地址
map:
public class ProductanalyMap implements FlatMapFunction<String, ProductAnaly> {
@Override
public void flatMap(String value, Collector<ProductAnaly> out) throws Exception {
OrderInfo orderInfo = JSONObject.parseObject(value,OrderInfo.class);
long productid = orderInfo.getProductid();
Date date = orderInfo.getCreatetime();
String timestring = DateUtil.getDateby(date.getTime(),"yyyyMM");
Date paytime = orderInfo.getPaytime();
long chengjiaocount =0l; //成交
long weichegnjiao = 0;//未成交
if(paytime != null){
chengjiaocount = 1l;
}else{
weichegnjiao = 0l;
}
ProductAnaly productAnaly = new ProductAnaly();
productAnaly.setProductid(productid);
productAnaly.setDateString(timestring);
productAnaly.setChengjiaocount(chengjiaocount);
productAnaly.setWeichegnjiao(weichegnjiao);
productAnaly.setGroupbyfield(timestring+productid);
out.collect(productAnaly);
}
}
reduce:
public class ProductanalyReduce implements ReduceFunction<ProductAnaly> {
@Override
public ProductAnaly reduce(ProductAnaly value1, ProductAnaly value2) throws Exception {
String datetime = value1.getDateString();
long productid = value1.getProductid();
long chengjiaovalue1 = value1.getChengjiaocount();
long weichegnjiaovalue1 = value1.getWeichegnjiao();
long chengjiaovalue2 = value1.getChengjiaocount();
long weichegnjiaovalue2 = value1.getWeichegnjiao();
ProductAnaly productAnaly = new ProductAnaly();
productAnaly.setDateString(datetime);
productAnaly.setProductid(productid);
productAnaly.setChengjiaocount(chengjiaovalue1+chengjiaovalue2);
productAnaly.setWeichegnjiao(weichegnjiaovalue1+weichegnjiaovalue2);
return productAnaly;
}
}
flink執行:
DataSet<String> text = env.readTextFile(params.get("input"));
DataSet<ProductAnaly> map = text.flatMap(new ProductanalyMap());
DataSet<ProductAnaly> reduce = map.groupBy("groupbyfield").reduce(new ProductanalyReduce());
try {
List<ProductAnaly> list = reduce.collect();
for(ProductAnaly value :list){
long productid = value.getProductid();
String datatime = value.getDateString();
long chengjiaocount = value.getChengjiaocount();
long weichengjiaocount = value.getWeichegnjiao();
Map<String,String> datamap = new HashMap<String,String>();
datamap.put("chengjiaocount",chengjiaocount+"");
datamap.put("weichengjiaocount",weichengjiaocount+"");
HbaseUtil.put("pindaoinfo",productid+"=="+datatime,"info",datamap);
}
env.execute("pindaossfx");
} catch (Exception e) {
e.printStackTrace();
}
總結
創建hive業務表。通過sqoop同步電商數據。flink batch實現產品成交分析.後續搭建接口服務,調用hive獲取hive數據;搭建前端服務調用接口。
具體代碼可參照我的git項目地址,現有代碼均已通過測試可以使用,後續會持續更新,直到項目結束,不懂的細節,可以關注公衆號:阿清的日常,後臺留言,會細緻解答。