數據倉庫項目筆記9

用戶訂單畫像報表分析

– 訂單指標表 dm_user_order_tag

drop table if exists ads_user_order_tag;
create table ads_user_order_tag(
user_id                        bigint     ,--用戶
first_order_time               string     ,--第一次消費時間    --這個人的第一個訂單時間
last_order_time                string     ,--最近一次消費時間
first_order_ago                bigint     ,--首單距今時間
last_order_ago                 bigint     ,--尾單距今時間
month1_order_cnt               bigint     ,--近30天購買次數
month1_order_amt               double     ,--近30天購買金額
month2_order_cnt               bigint     ,--近60天購買次數
month2_order_amt               double     ,--近60天購買金額
month3_order_cnt               bigint     ,--近90天購買次數
month3_order_amt               double     ,--近90天購買金額
max_order_amt                  double     ,--最大消費金額
min_order_amt                  double     ,--最小消費金額
total_order_cnt                bigint     ,--累計消費次數(不含退拒)
total_order_amt                double     ,--累計消費金額(不含退拒)
total_coupon_amt               double     ,--累計使用代金券金額
user_avg_amt                   double     ,--客單價(含退拒)
month3_user_avg_amt            double     ,--近90天客單價(含退拒)
common_address                 string     ,--常用收貨地址
common_paytype                 string     ,--常用支付方式
month1_cart_cnt                bigint     ,--最近30天添加購物車次數
month1_cart_goods_cnt          bigint     ,--最近30天添加購物車商品總件數
month1_cart_submit_cnt         bigint     ,--最近30天提交商品件數
month1_cart_submit_rate        double     ,--最近30天提交的商品的比率
month1_cart_cancel_cnt         bigint     ,--最近30天商品取消數量
month1_cart_cancel_rate        double     ,--最近30天商品取消比率
dw_date                        string     ,--數倉計算日期
) partitioned by
(dt string)
;
  • 分析業務組合寬表(由訂單和詳細表組成)
    • 字段由三個表組成訂單表、訂單詳細表、購物車
    • 但購物車和訂單通過用戶關聯,所求字段有訂單數據未必有購物車數據,用full join 實現 或用union獲取所有用戶
    • union 和 union all 區別: union 去重排序 union all 不去重不排序
    • explain 查看執行計劃
-- etl 計算

with t1 as (

select
user_id,
min(create_time) as first_order_time, -- 首單時間
max(create_time) as last_order_time, --尾單時間
-- first_order_ago
-- last_order_ago
count(if(datediff('2019-06-16',create_time)<=30,1,null)) as month1_order_cnt,-- 30天內的訂單總數
sum(if(datediff('2019-06-16',create_time)<=30,order_money,0)) as month1_order_amt, --30天內的訂單總額

count(if(datediff('2019-06-16',create_time)<=60,1,null)) as month2_order_cnt,-- 60天內的訂單總數
sum(if(datediff('2019-06-16',create_time)<=60,order_money,0)) as month2_order_amt, --60天內的訂單總額

count(if(datediff('2019-06-16',create_time)<=90,1,null)) as month3_order_cnt,-- 90天內的訂單總數
sum(if(datediff('2019-06-16',create_time)<=90,order_money,0)) as month3_order_amt, --90天內的訂單總額

max(order_money) as max_order_amt, -- 最大消費金額
min(order_money) as min_order_amt, -- 最小消費金額

count(if(order_status in ('退貨','拒收'),null,1)) as p_total_order_cnt, -- 累計訂單總數(不含退拒)
sum(if(order_status in ('退貨','拒收'),0,order_money)) as p_total_order_amt, -- 累計消費總額(不含退拒)

sum(coupon_money) as total_coupon_amt, -- 累計使用代金券總額

count(1) as total_order_cnt, -- 累計訂單總數(含退拒)
sum(order_money) as total_order_amt -- 累計消費總額(含退拒)
-- user_avg_amt 客單價(含退拒)
-- month3_user_avg_amt --90天內的客單價(含退拒)

from dws_b2c_orders

group by user_id

),

t2 as (

select user_id,addr
from 
(
select
user_id,
addr,
row_number() over(partition by user_id order by cnt desc) as rn
from 
(
select
user_id,
concat_ws(',',nvl(area_name,''),nvl(address,''))  as addr, --地址
count(1) as cnt --次數
from dws_b2c_orders
group by user_id,concat_ws(',',nvl(area_name,''),nvl(address,''))
) o1

) o2
where rn=1
),

t3 as (

select user_id,pay_type
from
(
select user_id,pay_type,
row_number() over(partition by user_id order by cnt desc) as rn
from 
(
select
user_id,pay_type,count(1) as cnt
from dws_b2c_orders
group by user_id,pay_type
) o1
) o2
where rn=1
),

t4 as (
select
user_id,
count(distinct session_id) as month1_cart_cnt, --添加購物車次數
sum(number) as month1_cart_goods_cnt ,-- 添加商品總件數
sum(if(submit_time is not null,number,0)) as month1_cart_submit_cnt , -- 提交的商品件數
-- 提交比率
sum(if(cancel_time is not null,number,0)) as month1_cart_cancel_cnt 
-- 取消比率
from
(select * from ods_b2c_cart
where datediff('2019-06-16',add_time)<=30) cart
group by user_id
),

t5 as (

select user_id from dws_b2c_orders
union
select user_id from ods_b2c_cart

)

-- 總查詢

insert into table ads_user_order_tag 
select

 t5.user_id                                                                         ,
 t1.first_order_time                                                                ,
 t1.last_order_time                                                                 ,
 datediff('2019-06-16',t1.first_order_time)  as first_order_ago                     ,
 datediff('2019-06-16',t1.last_order_time)  as last_order_ago                       ,
 t1.month1_order_cnt                                                                ,
 t1.month1_order_amt                                                                ,
 t1.month2_order_cnt                                                                ,
 t1.month2_order_amt                                                                ,
 t1.month3_order_cnt                                                                ,
 t1.month3_order_amt                                                                ,
 t1.max_order_amt                                                                   ,
 t1.min_order_amt                                                                   ,
 t1.total_order_cnt                                                                 ,
 t1.total_order_amt                                                                 ,
 t1.total_coupon_amt                                                                ,
 t1.total_order_amt/t1.total_order_cnt  as user_avg_amt                             ,
 t1.month3_order_amt/t1.month2_order_cnt as month3_user_avg_amt                     ,
 t2.addr as common_address                                                          ,
 t3.pay_type as common_paytype                                                      ,
 t4.month1_cart_cnt                                                                 ,
 t4.month1_cart_goods_cnt                                                           ,
 t4.month1_cart_submit_cnt                                                          ,
 t4.month1_cart_submit_cnt/t4.month1_cart_goods_cnt   as  month1_cart_submit_rate   ,
 t4.month1_cart_cancel_cnt                                                          ,
 t4.month1_cart_cancel_cnt/t4.month1_cart_goods_cnt   as  month1_cart_cancel_rate   ,
 '2019-06-16' as dw_date
from t5 
   join t1 on t5.user_id=t1.user_id
   join t2 on t5.user_id=t2.user_id
   join t3 on t5.user_id=t3.user_id
   join t4 on t5.user_id=t4.user_id
;
商品退換貨分析
-- 訂單與商品寬表
drop table if exists dws_orders_goods;
create table dws_orders_goods(
order_id            bigint    ,-- 訂單ID
goods_id            bigint    ,-- 商品ID
size_id             bigint    ,-- 商品規格id
goods_price         double    ,-- 商品售價
goods_amount        bigint    ,-- 商品數量
cat1_id             bigint    ,-- 類目1ID
cat1_name           string    ,-- 類目1名稱
cat2_id             bigint    ,-- 類目2ID
cat2_name          string    ,-- 類目2名稱
cat3_id            bigint    ,-- 類目3id
cat3_name          string    ,-- 類目3名稱
order_no           string    ,-- 訂單流水號
order_date         string    ,-- 訂單創建日期
user_id            bigint    ,-- 用戶ID
user_name          string    ,-- 登錄名
order_money        double    ,-- 訂單金額  --扣除促銷、減免之後的金額
order_type         string    ,-- 訂單類型
order_status       string    ,-- 訂單狀態
pay_type           string    ,-- 支付類型
pay_status         string    ,-- 支付狀態
order_source       string    ,-- 訂單來源
dw_date            string     -- 數倉計算日期

)
stored as parquet
;
-- 寬表包含所有字段
insert into table dws_orders_goods
select
b.order_id         ,-- 訂單ID
b.goods_id         ,-- 商品ID
b.size_id          ,-- 條碼ID
b.goods_price      ,-- 商品價格
b.goods_amount     ,-- 數量
c.first_cat        ,-- 類目1ID
c.first_cat_name   ,-- 類目1名稱
c.second_cat       ,-- 類目2ID
c.second_cat_name  ,-- 類目2名稱
c.third_cat        ,-- 類目3ID
c.third_cat_name   ,-- 類目3名稱
a.order_no         ,-- 訂單號
a.order_date       ,-- 訂單日期
a.user_id          ,-- 用戶ID
a.user_name        ,-- 登錄名
a.order_money      ,-- 訂單金額  --扣除促銷、減免之後的金額
a.order_type       ,-- 訂單類型
a.order_status     ,-- 訂單狀態
a.pay_type         ,-- 支付類型
a.pay_status       ,-- 支付狀態
a.order_source     ,-- 訂單來源
'2019-06-16'
from ods_b2c_orders  a
  join ods_b2c_orders_goods b  on a.order_id = b.order_id
  join ods_b2c_goods c on b.goods_id= c.goods_id

--用戶訂單退拒商品指標計算
drop table if exists dm_user_goods_amt;
create table dm_user_goods_amt(
user_id                           bigint      ,-- 用戶
p_sales_cnt                       bigint      ,-- 排除退拒商品銷售數量
p_sales_amt                       double      ,-- 排除退拒商品銷售金額
p_sales_cut_amt                   double      ,-- 排除退拒商品銷售金額(扣促銷減免)
h_sales_cnt                       bigint      ,-- 含退拒銷售數量
h_sales_amt                       double      ,-- 含退拒銷售金額
h_sales_cut_amt                   double      ,-- 含退拒銷售金額(扣促銷減免)
return_cnt                        bigint      ,-- 退貨商品數量
return_amt                        double      ,-- 退貨商品金額
reject_cnt                        bigint      ,-- 拒收商品數量
reject_amt                        double      ,-- 拒收商品金額
common_first_cat                  bigint      ,-- 最常購買商品一級類目名稱
common_second_cat                 bigint      ,-- 最常購買商品二級類目名稱
common_third_cat                  bigint      ,-- 最常購買商品三級類目名稱
dw_date                           bigint
) partitioned by (dt string)
stored as parquet
;
with t1 as
(
select
user_id,
sum(if(order_status in ('退換', '拒收'), 0, goods_amount)) as p_sales_cnt,
sum(if(order_status in ('退換', '拒收'), 0, goods_amount*goods_price)) as p_sales_amt,

sum(goods_amount) as h_sales_cnt,
sum(goods_amount*goods_price) as h_sales_amt,

sum(if(order_status ='退換', 0, goods_amount)) as return_cnt,
sum(if(order_status ='退換', 0, goods_amount*goods_price)) as return_amt,

sum(if(order_status ='拒收', 0, goods_amount)) as reject_cnt,
sum(if(order_status ='拒收', 0, goods_amount*goods_price)) as reject_amt


from
dws_orders_goods
group by user_id),
--可從ods層獲取join後變多條的數據不好求和字段
t2 as (
select
user_id,
sum(if(order_status in ('退貨','拒收'),0,order_money)) as p_sales_cut_amt, -- 排除退拒的總金額(扣除了優惠)
sum(order_money) as h_sales_cut_amt -- 含退拒的總金額(扣除了優惠)
from ods_b2c_orders
group by user_id
),
--分組獲取單個項目的topN
t3 as(

select
user_id,
cat1_name
from
(select
user_id,
cat1_name,
amt,
row_number() over(partition by user_id order by amt desc) rn
from(
select
user_id,
cat1_name,
sum(goods_amount) as amt
from
dws_orders_goods
group by user_id, cat1_name) o
)o1 where  rn = 1
)
,

t4 as (
select
user_id,
cat2_name
from
(
select
user_id,cat2_name,row_number() over(partition by user_id order by cat2_cnt desc) as rn
from
(
select
user_id,
cat2_name,
sum(goods_amount) as cat2_cnt
from dws_orders_goods
group by user_id,cat2_name
) o1
) o2
where rn=1
),

t5 as (
select
user_id,
cat3_name
from
(
select
user_id,cat3_name,row_number() over(partition by user_id order by cat3_cnt desc) as rn
from
(
select
user_id,
cat3_name,
sum(goods_amount) as cat3_cnt
from dws_orders_goods
group by user_id,cat3_name
) o1
) o2
where rn=1
)

select
t1.user_id              ,
t1.p_sales_cnt          ,
t1.p_sales_amt          ,
t2.p_sales_cut_amt      ,
t1.h_sales_cnt          ,
t1.h_sales_amt          ,
t2.h_sales_cut_amt      ,
t1.return_cnt           ,
t1.return_amt           ,
t1.reject_cnt           ,
t1.reject_amt           ,
t3.cat1_name            ,
t4.cat2_name            ,
t5.cat3_name            ,
'2019-06-16' first_cat

from t1
  join t2 on t1.user_id=t2.user_id
  join t3 on t1.user_id=t3.user_id
  join t4 on t1.user_id=t4.user_id
  join t5 on t1.user_id=t5.user_id
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章