數據倉庫項目筆記7

用戶活躍度
用戶連續活躍情況
所需字段 : 日期 當日活躍總數 連續活躍天數  連續活躍人數
select 
'2019-06-09' dt,
datediff('2019-06-09',first_login)+1 continue_days,
count(1) act_cnts
from
demo_user_continue_act
where continue_end = '9999-12-31' 
group by datediff('2019-06-09',first_login)+1
用戶訪問間隔分佈
簡單需求,求出不活躍間隔?天的?人數
不活躍間隔: 按結束時間排序後下一行的開始時間減上一行的結束時間即爲不活躍間隔天數

所需字段

範圍開始時間 範圍結束時間 間隔天數 人數
  • 需要用lead over /lage over 函數 或者 用自連接實現
測試數據
使用上一篇dws_user_continue_act數據
create table ads_user_acc_interval(
range_start string,
range_end string,
interval_days int,  -- 間隔天數
interval_cnts int   -- 發生次數
)
stored as parquet;

-- etl計算
with tmp as(
select
uid,
continue_end,
lead(continue_start,1) over(partition by uid order by continue_start) as next_start
from 
dws_user_continue_act
where 
continue_end>=date_sub('2019-06-16',30)
)

insert into table ads_user_acc_interval
select 
date_sub('2019-06-16',30) as range_start,
'2019-06-16' as range_end,
datediff(next_start,continue_end)-1 as interval_days,
count(1) as interval_cnts
from tmp
where next_start is not null
group by datediff(next_start,continue_end)-1
;

自連接實現:

with tmp as(
select
uid,
continue_start,
continue_end,
row_number() over(partition by uid order by continue_start) as rn
from
dws_user_continue_act
where continue_end>=date_sub('2019-06-16',30)
)


select
date_sub('2019-06-16',30) range_start,
'2019-06-16' range_end,
o.interval_days,
count(1) interval_cnts
from
(select
t1.uid,
datediff(t2.continue_start,t1.continue_end)-1 interval_days
from
tmp t1
join tmp t2
on t1.uid = t2.uid and t1.rn = t2.rn - 1
)o
where o.interval_days is not null
group by o.interval_days

用戶訪問間隔增強需求
計算邏輯:
1.限定一個時間範圍
2.同一個人的兩個相鄰活躍區間之間的天數間隔(包括間隔0天),就是訪問間隔
3.統計各種間隔的出現次數,就得到結果
4.求兩個相鄰活躍區間之間的天數差,需要跨行計算,得用窗口分析函數lag over
-- 假設數據如下
uid,start,end
a,     3,  6
a,     8,  9
a,     13, 16
-- 把下一行的continue_start提到上一行
uid,start,end,next_start
a,    3,   6,    8
a,    8,   9,    13
a,    13,  16,   null
--然後將end-start,就是隔0天的次數,  next_start-end 就是間隔天數,發生次數爲1
a,map(0,6-3,8-6,1)
a,map(0,9-8,13-9,1)
a,map(0,16-13,null-16,1)

-- 然後把map字段炸裂
0,    6-3
8-6,  1
0,    9-8
13-9, 1
0,    16-13
null,1

with tmp as(
select
uid,
map(
0,
datediff(if(continue_end='9999-12-31','2019-06-16',continue_end),continue_start),
datediff(lead(continue_start,1) over(partition by uid order by continue_start) ,if(continue_end='9999-12-31','2019-06-16',continue_end)),
1
) as m
from 
dws_user_continue_act
where 
continue_end>=date_sub('2019-06-16',30)
)
select 
interval_days,
sum(interval_cnts) as interval_cnts

from 
(
select
uid,t.interval_days,t.interval_cnts
from tmp
lateral view
explode(m) t as interval_days,interval_cnts
) o
where o.interval_days is not null
group by interval_days
;
事件概況分析

所需字段: 消息數即session數 用戶數即uid數
在這裏插入圖片描述

事件id 用戶id 會話id 事件發生頁面 事件標的 事件value 時間戳
event_type uid sid url event_dest event_value commit_time
  • 事件的標的:事件發生在誰(點贊商品, 搜索url, 評價的電影)身上
  • 事件的 value: 商品的點贊次數 ,評價電影的分數
/*
	事件分析:dwd層明細記錄表
	@src : ods_eventlog 日誌貼源表
	@dst :dwd_event_dtl 事件明細表
	-- 邏輯: 抽取字段

*/
-- 建表
drop table if exists dwd_event_dtl;
create table dwd_event_dtl(
event_type string comment '事件名稱',
uid string,
sessionid string,
location string comment '事件所發生的頁面',
event_dest string comment '事件的標的',
event_value double comment '事件的值(比如一個商品的價格,一個評分分值)',
commit_time bigint comment '事件發生的時間戳'
)
partitioned by (dt string)
stored as parquet;

-- ETL計算
insert into table dwd_event_dtl partition(dt='2019-06-16')
select
logtype as event_type,
COALESCE(
 if(trim(account) ='',null,account),
 if(trim(imei) = '',null,imei),
 if(trim(androidid) = '',null,androidid),
 if(trim(deviceid) = '',null,deviceid),
 if(trim(cookieid) = '',null,cookieid)
) as uid,
sessionid,
event['url'] as location,
case logtype
 when 'pg_view' then event['url']
 when 'thumbup' then event['skuid']
 when 'favor' then event['skuid']
 when 'ad_show' then event['ad_id']
 when 'ad_click' then event['ad_id']
 when 'add_cart' then event['skuid']
 when 'commit_cart' then event['order_id']
 when 'rate' then event['skuid']
 else event['skuid']
 end as event_dest,
case logtype
 when 'add_cart' then event['price']  -- 如果是添加商品到購物車,value爲商品的價格
 when 'rate' then event['score']  -- 如果是評分,value爲評分的分值
 when 'commit_cart' then event['order_amt'] --如果是提交購物車,value爲訂單的總價格
 else null
 end as event_value, 
commit_time 
from  
ods_eventlog where dt='2019-06-16'
;

/*
	事件分析:事件概況報表
	@src :dwd_event_dtl事件明細表
	@dst :ads_event_overall
+---------------+---------+---------------+------------------------+---------------+----------------+----------------+-------------+
| t.event_type  |  t.uid  |  t.sessionid  |       t.location       | t.event_dest  | t.event_value  | t.commit_time  |    t.dt     |
+---------------+---------+---------------+------------------------+---------------+----------------+----------------+-------------+
| ad_show       | LXOGIR  | 3fkDqxogxMWE  | http://www.51doit.com  | 1             | NULL           | 1560672636603  | 2019-06-16  |
| ad_show       | LXOGIR  | 3fkDqxogxMWE  | http://www.51doit.com  | 5             | NULL           | 1560672636604  | 2019-06-16  |
| ad_show       | LXOGIR  | 3fkDqxogxMWE  | http://www.51doit.com  | 6             | NULL           | 1560672636605  | 2019-06-16  |
| ad_show       | 05289   | aNHKnDe44hS0  | http://www.51doit.com  | 1             | NULL           | 1560672636615  | 2019-06-16  |
| ad_show       | 05289   | aNHKnDe44hS0  | http://www.51doit.com  | 4             | NULL           | 1560672636616  | 2019-06-16  |
| ad_show       | 05289   | aNHKnDe44hS0  | http://www.51doit.com  | 5             | NULL           | 1560672636616  | 2019-06-16  |
| ad_show       | YFG6OV  | K3XBY77W6NkO  | http://www.51doit.com  | 1             | NULL           | 1560672636675  | 2019-06-16  |
| ad_show       | YFG6OV  | K3XBY77W6NkO  | http://www.51doit.com  | 2             | NULL           | 1560672636676  | 2019-06-16  |
| ad_show       | EK2MNA  | doJ9ymYGYXtU  | http://www.51doit.com  | 1             | NULL           | 1560672636679  | 2019-06-16  |
| ad_show       | EK2MNA  | doJ9ymYGYXtU  | http://www.51doit.com  | 4             | NULL           | 1560672636680  | 2019-06-16  |
| ad_show       | EK2MNA  | doJ9ymYGYXtU  | http://www.51doit.com  | 5             | NULL           | 1560672636680  | 2019-06-16  |
| ad_show       | 73129   | 7EpctWPksQIv  | http://www.51doit.com  | 1             | NULL           | 1560672636683  | 2019-06-16  |
| search        | 73129   | 7EpctWPksQIv  |                        | NULL          | NULL           | 1560672636683  | 2019-06-16  |
+---------------+---------+---------------+------------------------+---------------+----------------+----------------+-------------+
	
	
*/
-- 建表
create table ads_event_overall(
dt string,-- 統計日期
event_type string,  -- 事件名稱
event_cishu int,   -- 事件發生的次數
event_renshu int  -- 事件發生的人數
)
stored as parquet
;


-- etl計算
-- 先按人聚合事件的次數
with tmp as (
select
'2019-06-16' as dt,
event_type,
uid,
count(1) as cishu
from dwd_event_dtl
where dt='2019-06-16'
group by event_type,uid
)

/*
ad_show  LXOGIR  3
ad_show  05289   3
ad_show  YFG6OV  2
ad_show  EK2MNA  3
ad_show  73129   1
*/
--insert into table ads_event_overall
select 
'2019-06-16' as dt,
event_type,
sum(cishu) as event_cishu,
count(1) as event_renshu
from tmp
group by event_type
;
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章