用戶訂單畫像報表分析
– 訂單指標表 dm_user_order_tag
drop table if exists ads_user_order_tag;
create table ads_user_order_tag(
user_id bigint ,--用戶
first_order_time string ,--第一次消費時間 --這個人的第一個訂單時間
last_order_time string ,--最近一次消費時間
first_order_ago bigint ,--首單距今時間
last_order_ago bigint ,--尾單距今時間
month1_order_cnt bigint ,--近30天購買次數
month1_order_amt double ,--近30天購買金額
month2_order_cnt bigint ,--近60天購買次數
month2_order_amt double ,--近60天購買金額
month3_order_cnt bigint ,--近90天購買次數
month3_order_amt double ,--近90天購買金額
max_order_amt double ,--最大消費金額
min_order_amt double ,--最小消費金額
total_order_cnt bigint ,--累計消費次數(不含退拒)
total_order_amt double ,--累計消費金額(不含退拒)
total_coupon_amt double ,--累計使用代金券金額
user_avg_amt double ,--客單價(含退拒)
month3_user_avg_amt double ,--近90天客單價(含退拒)
common_address string ,--常用收貨地址
common_paytype string ,--常用支付方式
month1_cart_cnt bigint ,--最近30天添加購物車次數
month1_cart_goods_cnt bigint ,--最近30天添加購物車商品總件數
month1_cart_submit_cnt bigint ,--最近30天提交商品件數
month1_cart_submit_rate double ,--最近30天提交的商品的比率
month1_cart_cancel_cnt bigint ,--最近30天商品取消數量
month1_cart_cancel_rate double ,--最近30天商品取消比率
dw_date string ,--數倉計算日期
) partitioned by
(dt string)
;
- 分析業務組合寬表(由訂單和詳細表組成)
- 字段由三個表組成訂單表、訂單詳細表、購物車
- 但購物車和訂單通過用戶關聯,所求字段有訂單數據未必有購物車數據,用full join 實現 或用union獲取所有用戶
- union 和 union all 區別: union 去重排序 union all 不去重不排序
- explain 查看執行計劃
-- etl 計算
with t1 as (
select
user_id,
min(create_time) as first_order_time, -- 首單時間
max(create_time) as last_order_time, --尾單時間
-- first_order_ago
-- last_order_ago
count(if(datediff('2019-06-16',create_time)<=30,1,null)) as month1_order_cnt,-- 30天內的訂單總數
sum(if(datediff('2019-06-16',create_time)<=30,order_money,0)) as month1_order_amt, --30天內的訂單總額
count(if(datediff('2019-06-16',create_time)<=60,1,null)) as month2_order_cnt,-- 60天內的訂單總數
sum(if(datediff('2019-06-16',create_time)<=60,order_money,0)) as month2_order_amt, --60天內的訂單總額
count(if(datediff('2019-06-16',create_time)<=90,1,null)) as month3_order_cnt,-- 90天內的訂單總數
sum(if(datediff('2019-06-16',create_time)<=90,order_money,0)) as month3_order_amt, --90天內的訂單總額
max(order_money) as max_order_amt, -- 最大消費金額
min(order_money) as min_order_amt, -- 最小消費金額
count(if(order_status in ('退貨','拒收'),null,1)) as p_total_order_cnt, -- 累計訂單總數(不含退拒)
sum(if(order_status in ('退貨','拒收'),0,order_money)) as p_total_order_amt, -- 累計消費總額(不含退拒)
sum(coupon_money) as total_coupon_amt, -- 累計使用代金券總額
count(1) as total_order_cnt, -- 累計訂單總數(含退拒)
sum(order_money) as total_order_amt -- 累計消費總額(含退拒)
-- user_avg_amt 客單價(含退拒)
-- month3_user_avg_amt --90天內的客單價(含退拒)
from dws_b2c_orders
group by user_id
),
t2 as (
select user_id,addr
from
(
select
user_id,
addr,
row_number() over(partition by user_id order by cnt desc) as rn
from
(
select
user_id,
concat_ws(',',nvl(area_name,''),nvl(address,'')) as addr, --地址
count(1) as cnt --次數
from dws_b2c_orders
group by user_id,concat_ws(',',nvl(area_name,''),nvl(address,''))
) o1
) o2
where rn=1
),
t3 as (
select user_id,pay_type
from
(
select user_id,pay_type,
row_number() over(partition by user_id order by cnt desc) as rn
from
(
select
user_id,pay_type,count(1) as cnt
from dws_b2c_orders
group by user_id,pay_type
) o1
) o2
where rn=1
),
t4 as (
select
user_id,
count(distinct session_id) as month1_cart_cnt, --添加購物車次數
sum(number) as month1_cart_goods_cnt ,-- 添加商品總件數
sum(if(submit_time is not null,number,0)) as month1_cart_submit_cnt , -- 提交的商品件數
-- 提交比率
sum(if(cancel_time is not null,number,0)) as month1_cart_cancel_cnt
-- 取消比率
from
(select * from ods_b2c_cart
where datediff('2019-06-16',add_time)<=30) cart
group by user_id
),
t5 as (
select user_id from dws_b2c_orders
union
select user_id from ods_b2c_cart
)
-- 總查詢
insert into table ads_user_order_tag
select
t5.user_id ,
t1.first_order_time ,
t1.last_order_time ,
datediff('2019-06-16',t1.first_order_time) as first_order_ago ,
datediff('2019-06-16',t1.last_order_time) as last_order_ago ,
t1.month1_order_cnt ,
t1.month1_order_amt ,
t1.month2_order_cnt ,
t1.month2_order_amt ,
t1.month3_order_cnt ,
t1.month3_order_amt ,
t1.max_order_amt ,
t1.min_order_amt ,
t1.total_order_cnt ,
t1.total_order_amt ,
t1.total_coupon_amt ,
t1.total_order_amt/t1.total_order_cnt as user_avg_amt ,
t1.month3_order_amt/t1.month2_order_cnt as month3_user_avg_amt ,
t2.addr as common_address ,
t3.pay_type as common_paytype ,
t4.month1_cart_cnt ,
t4.month1_cart_goods_cnt ,
t4.month1_cart_submit_cnt ,
t4.month1_cart_submit_cnt/t4.month1_cart_goods_cnt as month1_cart_submit_rate ,
t4.month1_cart_cancel_cnt ,
t4.month1_cart_cancel_cnt/t4.month1_cart_goods_cnt as month1_cart_cancel_rate ,
'2019-06-16' as dw_date
from t5
join t1 on t5.user_id=t1.user_id
join t2 on t5.user_id=t2.user_id
join t3 on t5.user_id=t3.user_id
join t4 on t5.user_id=t4.user_id
;
商品退換貨分析
-- 訂單與商品寬表
drop table if exists dws_orders_goods;
create table dws_orders_goods(
order_id bigint ,-- 訂單ID
goods_id bigint ,-- 商品ID
size_id bigint ,-- 商品規格id
goods_price double ,-- 商品售價
goods_amount bigint ,-- 商品數量
cat1_id bigint ,-- 類目1ID
cat1_name string ,-- 類目1名稱
cat2_id bigint ,-- 類目2ID
cat2_name string ,-- 類目2名稱
cat3_id bigint ,-- 類目3id
cat3_name string ,-- 類目3名稱
order_no string ,-- 訂單流水號
order_date string ,-- 訂單創建日期
user_id bigint ,-- 用戶ID
user_name string ,-- 登錄名
order_money double ,-- 訂單金額 --扣除促銷、減免之後的金額
order_type string ,-- 訂單類型
order_status string ,-- 訂單狀態
pay_type string ,-- 支付類型
pay_status string ,-- 支付狀態
order_source string ,-- 訂單來源
dw_date string -- 數倉計算日期
)
stored as parquet
;
-- 寬表包含所有字段
insert into table dws_orders_goods
select
b.order_id ,-- 訂單ID
b.goods_id ,-- 商品ID
b.size_id ,-- 條碼ID
b.goods_price ,-- 商品價格
b.goods_amount ,-- 數量
c.first_cat ,-- 類目1ID
c.first_cat_name ,-- 類目1名稱
c.second_cat ,-- 類目2ID
c.second_cat_name ,-- 類目2名稱
c.third_cat ,-- 類目3ID
c.third_cat_name ,-- 類目3名稱
a.order_no ,-- 訂單號
a.order_date ,-- 訂單日期
a.user_id ,-- 用戶ID
a.user_name ,-- 登錄名
a.order_money ,-- 訂單金額 --扣除促銷、減免之後的金額
a.order_type ,-- 訂單類型
a.order_status ,-- 訂單狀態
a.pay_type ,-- 支付類型
a.pay_status ,-- 支付狀態
a.order_source ,-- 訂單來源
'2019-06-16'
from ods_b2c_orders a
join ods_b2c_orders_goods b on a.order_id = b.order_id
join ods_b2c_goods c on b.goods_id= c.goods_id
--用戶訂單退拒商品指標計算
drop table if exists dm_user_goods_amt;
create table dm_user_goods_amt(
user_id bigint ,-- 用戶
p_sales_cnt bigint ,-- 排除退拒商品銷售數量
p_sales_amt double ,-- 排除退拒商品銷售金額
p_sales_cut_amt double ,-- 排除退拒商品銷售金額(扣促銷減免)
h_sales_cnt bigint ,-- 含退拒銷售數量
h_sales_amt double ,-- 含退拒銷售金額
h_sales_cut_amt double ,-- 含退拒銷售金額(扣促銷減免)
return_cnt bigint ,-- 退貨商品數量
return_amt double ,-- 退貨商品金額
reject_cnt bigint ,-- 拒收商品數量
reject_amt double ,-- 拒收商品金額
common_first_cat bigint ,-- 最常購買商品一級類目名稱
common_second_cat bigint ,-- 最常購買商品二級類目名稱
common_third_cat bigint ,-- 最常購買商品三級類目名稱
dw_date bigint
) partitioned by (dt string)
stored as parquet
;
with t1 as
(
select
user_id,
sum(if(order_status in ('退換', '拒收'), 0, goods_amount)) as p_sales_cnt,
sum(if(order_status in ('退換', '拒收'), 0, goods_amount*goods_price)) as p_sales_amt,
sum(goods_amount) as h_sales_cnt,
sum(goods_amount*goods_price) as h_sales_amt,
sum(if(order_status ='退換', 0, goods_amount)) as return_cnt,
sum(if(order_status ='退換', 0, goods_amount*goods_price)) as return_amt,
sum(if(order_status ='拒收', 0, goods_amount)) as reject_cnt,
sum(if(order_status ='拒收', 0, goods_amount*goods_price)) as reject_amt
from
dws_orders_goods
group by user_id),
--可從ods層獲取join後變多條的數據不好求和字段
t2 as (
select
user_id,
sum(if(order_status in ('退貨','拒收'),0,order_money)) as p_sales_cut_amt, -- 排除退拒的總金額(扣除了優惠)
sum(order_money) as h_sales_cut_amt -- 含退拒的總金額(扣除了優惠)
from ods_b2c_orders
group by user_id
),
--分組獲取單個項目的topN
t3 as(
select
user_id,
cat1_name
from
(select
user_id,
cat1_name,
amt,
row_number() over(partition by user_id order by amt desc) rn
from(
select
user_id,
cat1_name,
sum(goods_amount) as amt
from
dws_orders_goods
group by user_id, cat1_name) o
)o1 where rn = 1
)
,
t4 as (
select
user_id,
cat2_name
from
(
select
user_id,cat2_name,row_number() over(partition by user_id order by cat2_cnt desc) as rn
from
(
select
user_id,
cat2_name,
sum(goods_amount) as cat2_cnt
from dws_orders_goods
group by user_id,cat2_name
) o1
) o2
where rn=1
),
t5 as (
select
user_id,
cat3_name
from
(
select
user_id,cat3_name,row_number() over(partition by user_id order by cat3_cnt desc) as rn
from
(
select
user_id,
cat3_name,
sum(goods_amount) as cat3_cnt
from dws_orders_goods
group by user_id,cat3_name
) o1
) o2
where rn=1
)
select
t1.user_id ,
t1.p_sales_cnt ,
t1.p_sales_amt ,
t2.p_sales_cut_amt ,
t1.h_sales_cnt ,
t1.h_sales_amt ,
t2.h_sales_cut_amt ,
t1.return_cnt ,
t1.return_amt ,
t1.reject_cnt ,
t1.reject_amt ,
t3.cat1_name ,
t4.cat2_name ,
t5.cat3_name ,
'2019-06-16' first_cat
from t1
join t2 on t1.user_id=t2.user_id
join t3 on t1.user_id=t3.user_id
join t4 on t1.user_id=t4.user_id
join t5 on t1.user_id=t5.user_id