【項目】數倉項目(十)

在這裏插入圖片描述在這裏插入圖片描述在這裏插入圖片描述在這裏插入圖片描述在這裏插入圖片描述

(圖片來源於網絡,侵刪)


一、數倉搭建 - ADS 層

1.1 設備主題

1.1.1 活躍設備數(日、周、月)

需求定義:

日活:當日活躍的設備數

周活:當週活躍的設備數

月活:當月活躍的設備數

1)建表語句

drop table if exists ads_uv_count;
create external table ads_uv_count(
`dt` string COMMENT '統計日期',
`day_count` bigint COMMENT '當日用戶數量',
`wk_count` bigint COMMENT '當週用戶數量',
`mn_count` bigint COMMENT '當月用戶數量',
`is_weekend` string COMMENT 'Y,N 是否是週末,用於得到本週最終結果',
`is_monthend` string COMMENT 'Y,N 是否是月末,用於得到本月最終結果'
) COMMENT '活躍設備數'
row format delimited fields terminated by '\t'
location '/warehouse/gmall/ads/ads_uv_count/';

2)導入數據

insert into table ads_uv_count
select
'2020-03-10' dt,
daycount.ct,
wkcount.ct,
mncount.ct,
if(date_add(next_day('2020-03-10','MO'),-1)='2020-03-10','Y','N') ,
if(last_day('2020-03-10')='2020-03-10','Y','N')
from
(
select
'2020-03-10' dt,
count(*) ct
from dwt_uv_topic
where login_date_last='2020-03-10'
)daycount join
(
select
'2020-03-10' dt,
count (*) ct
from dwt_uv_topic
where login_date_last>=date_add(next_day('2020-03-10','MO'),-7)
and login_date_last<= date_add(next_day('2020-03-10','MO'),-1)
) wkcount on daycount.dt=wkcount.dt
join
(
select
'2020-03-10' dt,
count (*) ct
from dwt_uv_topic
where
date_format(login_date_last,'yyyy-MM')=date_format('2020-03-10','yyyy-MM')
)mncount on daycount.dt=mncount.dt;

3)查詢導入結果

select * from ads_uv_count;
1.1.2 每日新增設備

1)建表語句

drop table if exists ads_new_mid_count;
create external table ads_new_mid_count
(
`create_date` string comment '創建時間' ,
`new_mid_count` BIGINT comment '新增設備數量'
) COMMENT '每日新增設備信息數量'
row format delimited fields terminated by '\t'
location '/warehouse/gmall/ads/ads_new_mid_count/';

2)導入數據

insert into table ads_new_mid_count
select
login_date_first,
count(*)
from dwt_uv_topic
where login_date_first='2020-03-10'
group by login_date_first;

3)查詢導入數據

select * from ads_new_mid_count;
1.1.3 沉默用戶數

需求定義:

沉默用戶:只在安裝當天啓動過,且啓動時間是在 7 天前

1)建表語句

drop table if exists ads_silent_count;
create external table ads_silent_count(
`dt` string COMMENT '統計日期',
`silent_count` bigint COMMENT '沉默設備數'
)
row format delimited fields terminated by '\t'
location '/warehouse/gmall/ads/ads_silent_count';

2)導入 2020-03-20 數據

insert into table ads_silent_count
select
'2020-03-15',
count(*)
from dwt_uv_topic
where login_date_first=login_date_last
and login_date_last<=date_add('2020-03-15',-7);

3)查詢導入數據

select * from ads_silent_count;

1.1.4 本週迴流用戶數

需求定義:

本週迴流用戶:上週未活躍,本週活躍的設備,且不是本週新增設備

1)建表語句

drop table if exists ads_back_count;
create external table ads_back_count(
`dt` string COMMENT '統計日期',
`wk_dt` string COMMENT '統計日期所在周',
`wastage_count` bigint COMMENT '迴流設備數'
)
row format delimited fields terminated by '\t'
location '/warehouse/gmall/ads/ads_back_count';

2)導入數據:

insert into table ads_back_count
select
'2020-03-15',
count(*)
from
(
select
mid_id
from dwt_uv_topic
where login_date_last>=date_add(next_day('2020-03-15','MO'),-7)
and login_date_last<= date_add(next_day('2020-03-15','MO'),-1)
and login_date_first<date_add(next_day('2020-03-15','MO'),-7)
)current_wk
left join
(
select
mid_id
from dws_uv_detail_daycount
where dt>=date_add(next_day('2020-03-15','MO'),-7*2)
and dt<= date_add(next_day('2020-03-15','MO'),-7-1)
group by mid_id
)last_wk
on current_wk.mid_id=last_wk.mid_id
where last_wk.mid_id is null;

3)查詢結果

select * from ads_back_count;
1.1.5 流失用戶數

需求定義:

流失用戶:最近 7 天未活躍的設備

1)建表語句

drop table if exists ads_wastage_count;
create external table ads_wastage_count(
`dt` string COMMENT '統計日期',
`wastage_count` bigint COMMENT '流失設備數'
)
row format delimited fields terminated by '\t'
location '/warehouse/gmall/ads/ads_wastage_count';

2)導入 2020-03-20 數據

insert into table ads_wastage_count
select
'2020-03-20',
count(*)
from
(
select
mid_id
from dwt_uv_topic
where login_date_last<=date_add('2020-03-20',-7)
group by mid_id
)t1;

3)查詢結果

select * from ads_wastage_count;
1.1.6 留存率

在這裏插入圖片描述
1)建表語句

drop table if exists ads_user_retention_day_rate;
create external table ads_user_retention_day_rate
(
`stat_date` string comment '統計日期',
`create_date` string comment '設備新增日期',
`retention_day` int comment '截止當前日期留存天數',
`retention_count` bigint comment '留存數量',
`new_mid_count` bigint comment '設備新增數量',
`retention_ratio` decimal(10,2) comment '留存率'
) COMMENT '每日用戶留存情況'
row format delimited fields terminated by '\t'
location '/warehouse/gmall/ads/ads_user_retention_day_rate/';

2)導入數據

insert into table ads_user_retention_day_rate
select
'2020-03-10',--統計日期
date_add('2020-03-10',-1),--新增日期
1,--留存天數
sum(if(login_date_first=date_add('2020-03-10',-1) and
login_date_last='2020-03-10',1,0)),--2020-03-09 的 1 日留存數
sum(if(login_date_first=date_add('2020-03-10',-1),1,0)),--2020-03-09 新增
sum(if(login_date_first=date_add('2020-03-10',-1) and
login_date_last='2020-03-10',1,0))/sum(if(login_date_first=date_add('2020-03-10',-
1),1,0))*100
from dwt_uv_topic
union all
select
'2020-03-10',--統計日期
date_add('2020-03-10',-2),--新增日期
2,--留存天數
sum(if(login_date_first=date_add('2020-03-10',-2) and
login_date_last='2020-03-10',1,0)),--2020-03-08 的 2 日留存數
sum(if(login_date_first=date_add('2020-03-10',-2),1,0)),--2020-03-08 新增
sum(if(login_date_first=date_add('2020-03-10',-2) and
login_date_last='2020-03-10',1,0))/sum(if(login_date_first=date_add('2020-03-10',-
2),1,0))*100
from dwt_uv_topic
union all
select
'2020-03-10',--統計日期
date_add('2020-03-10',-3),--新增日期
3,--留存天數
sum(if(login_date_first=date_add('2020-03-10',-3) and
login_date_last='2020-03-10',1,0)),--2020-03-07 的 3 日留存數
sum(if(login_date_first=date_add('2020-03-10',-3),1,0)),--2020-03-07 新增
sum(if(login_date_first=date_add('2020-03-10',-3) and
login_date_last='2020-03-10',1,0))/sum(if(login_date_first=date_add('2020-03-10',-
3),1,0))*100
from dwt_uv_topic;

3)查詢導入數據

select * from ads_user_retention_day_rate;
1.1.7 最近連續三週活躍用戶數

1)建表語句

drop table if exists ads_continuity_wk_count;
create external table ads_continuity_wk_count(
`dt` string COMMENT '統計日期,一般用結束週週日日期,如果每天計算一次,可用當天日
期',
`wk_dt` string COMMENT '持續時間',
`continuity_count` bigint COMMENT '活躍次數'
)
row format delimited fields terminated by '\t'
location '/warehouse/gmall/ads/ads_continuity_wk_count';

2)導入 2020-03-20 所在周的數據

insert into table ads_continuity_wk_count
select
'2020-03-15',
concat(date_add(next_day('2020-03-15','MO'),-7*3),'_',date_add(next_day('
2020-03-15','MO'),-1)),
count(*)
from
(
select
mid_id
from
(
select
mid_id
from dws_uv_detail_daycount
where dt>=date_add(next_day('2020-03-10','monday'),-7)
and dt<=date_add(next_day('2020-03-10','monday'),-1)
group by mid_id
union all
select
mid_id
from dws_uv_detail_daycount
where dt>=date_add(next_day('2020-03-10','monday'),-7*2)
and dt<=date_add(next_day('2020-03-10','monday'),-7-1)
group by mid_id
union all
select
mid_id
from dws_uv_detail_daycount
where dt>=date_add(next_day('2020-03-10','monday'),-7*3)
and dt<=date_add(next_day('2020-03-10','monday'),-7*2-1)
group by mid_id
)t1
group by mid_id
having count(*)=3
)t2

3)查詢

select * from ads_continuity_wk_count;
1.1.8 最近七天內連續三天活躍用戶數

1)建表語句

drop table if exists ads_continuity_uv_count;
create external table ads_continuity_uv_count(
`dt` string COMMENT '統計日期',
`wk_dt` string COMMENT '最近 7 天日期',
`continuity_count` bigint
) COMMENT '連續活躍設備數'
row format delimited fields terminated by '\t'
location '/warehouse/gmall/ads/ads_continuity_uv_count';

2)寫出導入數據的 SQL 語句

insert into table ads_continuity_uv_count
select
'2020-03-12',
concat(date_add('2020-03-12',-6),'_','2020-03-12'),
count(*)
from
(
select mid_id
from
(
select mid_id
from
(
select
mid_id,
date_sub(dt,rank) date_dif
from
(
select
mid_id,
dt,
rank() over(partition by mid_id order by dt) rank
from dws_uv_detail_daycount
where dt>=date_add('2020-03-12',-6) and
dt<='2020-03-12'
)t1
)t2
group by mid_id,date_dif
having count(*)>=3
)t3
group by mid_id
)t4;

3)查詢

select * from ads_continuity_uv_count;

結束語


都看到這裏了,點贊評論一下吧!!!

在這裏插入圖片描述

點擊查看👇

敬請期待!

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章