第6章需求三:品牌復購率
6.2 DWS層
6.2.1 用戶購買商品明細表(寬表)
hive (gmall)>
drop table if exists dws_sale_detail_daycount;
create external table dws_sale_detail_daycount
( user_id string comment ‘用戶 id’,
sku_id string comment ‘商品 Id’,
user_gender string comment ‘用戶性別’,
user_age string comment ‘用戶年齡’,
user_level string comment ‘用戶等級’,
order_price decimal(10,2) comment ‘商品價格’,
sku_name string comment ‘商品名稱’,
sku_tm_id string comment ‘品牌id’,
sku_category3_id string comment ‘商品三級品類id’,
sku_category2_id string comment ‘商品二級品類id’,
sku_category1_id string comment ‘商品一級品類id’,
sku_category3_name string comment ‘商品三級品類名稱’,
sku_category2_name string comment ‘商品二級品類名稱’,
sku_category1_name string comment ‘商品一級品類名稱’,
spu_id string comment ‘商品 spu’,
sku_num int comment ‘購買個數’,
order_count string comment ‘當日下單單數’,
order_amount string comment ‘當日下單金額’
) COMMENT ‘用戶購買商品明細表’
PARTITIONED BY (dt
string)
stored as parquet
location ‘/warehouse/gmall/dws/dws_user_sale_detail_daycount/’
tblproperties (“parquet.compression”=“snappy”);
6.2.2 數據導入
hive (gmall)>
with
tmp_detail as
(
select
user_id,
sku_id,
sum(sku_num) sku_num,
count() order_count,
sum(od.order_pricesku_num) order_amount
from dwd_order_detail od
where od.dt=‘2019-02-10’
group by user_id, sku_id
)
insert overwrite table dws_sale_detail_daycount partition(dt=‘2019-02-10’)
select
tmp_detail.user_id,
tmp_detail.sku_id,
u.gender,
months_between(‘2019-02-10’, u.birthday)/12 age,
u.user_level,
price,
sku_name,
tm_id,
category3_id,
category2_id,
category1_id,
category3_name,
category2_name,
category1_name,
spu_id,
tmp_detail.sku_num,
tmp_detail.order_count,
tmp_detail.order_amount
from tmp_detail
left join dwd_user_info u on tmp_detail.user_id =u.id and u.dt=‘2019-02-10’
left join dwd_sku_info s on tmp_detail.sku_id =s.id and s.dt=‘2019-02-10’
;
6.2.3 數據導入腳本
1)在/home/atguigu/bin目錄下創建腳本dws_sale.sh
[atguigu@hadoop102 bin]$ vim dws_sale.sh
在腳本中填寫如下內容
#!/bin/bash
定義變量方便修改
APP=gmall
hive=/opt/module/hive/bin/hive
如果是輸入的日期按照取輸入日期;如果沒輸入日期取當前時間的前一天
if [ -n “$1” ] ;then
do_date=APP".dwd_order_detail od
where od.dt=‘APP".dws_sale_detail_daycount partition(dt=‘do_date’, u.birthday)/12 age,
u.user_level,
price,
sku_name,
tm_id,
category3_id,
category2_id,
category1_id,
category3_name,
category2_name,
category1_name,
spu_id,
tmp_detail.sku_num,
tmp_detail.order_count,
tmp_detail.order_amount
from tmp_detail
left join "do_date’
left join “do_date’;
"
sql”
2)增加腳本執行權限
[atguigu@hadoop102 bin]$ chmod 777 dws_sale.sh
3)執行腳本導入數據
[atguigu@hadoop102 bin]$ dws_sale.sh 2019-02-11
4)查看導入數據
hive (gmall)>
select * from dws_sale_detail_daycount where dt=‘2019-02-11’ limit 2;