6.1 DWS層
目標:統計當日、當週、當月活動的每個設備明細
6.1.1 每日活躍設備明細
1)建表語句
hive (gmall)>
drop table if exists dws_uv_detail_day;
create external table dws_uv_detail_day
(
mid_id
string COMMENT ‘設備唯一標識’,
user_id
string COMMENT ‘用戶標識’,
version_code
string COMMENT ‘程序版本號’,
version_name
string COMMENT ‘程序版本名’,
lang
string COMMENT ‘系統語言’,
source
string COMMENT ‘渠道號’,
os
string COMMENT ‘安卓系統版本’,
area
string COMMENT ‘區域’,
model
string COMMENT ‘手機型號’,
brand
string COMMENT ‘手機品牌’,
sdk_version
string COMMENT ‘sdkVersion’,
gmail
string COMMENT ‘gmail’,
height_width
string COMMENT ‘屏幕寬高’,
app_time
string COMMENT ‘客戶端日誌產生時的時間’,
network
string COMMENT ‘網絡模式’,
lng
string COMMENT ‘經度’,
lat
string COMMENT ‘緯度’
)
partitioned by(dt string)
stored as parquet
location ‘/warehouse/gmall/dws/dws_uv_detail_day’
;
2)數據導入
以用戶單日訪問爲key進行聚合,如果某個用戶在一天中使用了兩種操作系統、兩個系統版本、多個地區,登錄不同賬號,只取其中之一
hive (gmall)>
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table dws_uv_detail_day
partition(dt=‘2019-02-10’)
select
mid_id,
concat_ws(’|’, collect_set(user_id)) user_id,
concat_ws(’|’, collect_set(version_code)) version_code,
concat_ws(’|’, collect_set(version_name)) version_name,
concat_ws(’|’, collect_set(lang))lang,
concat_ws(’|’, collect_set(source)) source,
concat_ws(’|’, collect_set(os)) os,
concat_ws(’|’, collect_set(area)) area,
concat_ws(’|’, collect_set(model)) model,
concat_ws(’|’, collect_set(brand)) brand,
concat_ws(’|’, collect_set(sdk_version)) sdk_version,
concat_ws(’|’, collect_set(gmail)) gmail,
concat_ws(’|’, collect_set(height_width)) height_width,
concat_ws(’|’, collect_set(app_time)) app_time,
concat_ws(’|’, collect_set(network)) network,
concat_ws(’|’, collect_set(lng)) lng,
concat_ws(’|’, collect_set(lat)) lat
from dwd_start_log
where dt=‘2019-02-10’
group by mid_id;
3)查詢導入結果
hive (gmall)> select * from dws_uv_detail_day limit 1;
hive (gmall)> select count(*) from dws_uv_detail_day;
4)思考:不同渠道來源的每日活躍數統計怎麼計算?
6.1.2 每週活躍設備明細
根據日用戶訪問明細,獲得周用戶訪問明細。
1)建表語句
hive (gmall)>
drop table if exists dws_uv_detail_wk;
create external table dws_uv_detail_wk(
mid_id
string COMMENT ‘設備唯一標識’,
user_id
string COMMENT ‘用戶標識’,
version_code
string COMMENT ‘程序版本號’,
version_name
string COMMENT ‘程序版本名’,
lang
string COMMENT ‘系統語言’,
source
string COMMENT ‘渠道號’,
os
string COMMENT ‘安卓系統版本’,
area
string COMMENT ‘區域’,
model
string COMMENT ‘手機型號’,
brand
string COMMENT ‘手機品牌’,
sdk_version
string COMMENT ‘sdkVersion’,
gmail
string COMMENT ‘gmail’,
height_width
string COMMENT ‘屏幕寬高’,
app_time
string COMMENT ‘客戶端日誌產生時的時間’,
network
string COMMENT ‘網絡模式’,
lng
string COMMENT ‘經度’,
lat
string COMMENT ‘緯度’,
monday_date
string COMMENT ‘週一日期’,
sunday_date
string COMMENT ‘週日日期’
) COMMENT ‘活躍用戶按周明細’
PARTITIONED BY (wk_dt
string)
stored as parquet
location ‘/warehouse/gmall/dws/dws_uv_detail_wk/’
;
2)數據導入
hive (gmall)>
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table dws_uv_detail_wk partition(wk_dt)
select
mid_id,
concat_ws(’|’, collect_set(user_id)) user_id,
concat_ws(’|’, collect_set(version_code)) version_code,
concat_ws(’|’, collect_set(version_name)) version_name,
concat_ws(’|’, collect_set(lang)) lang,
concat_ws(’|’, collect_set(source)) source,
concat_ws(’|’, collect_set(os)) os,
concat_ws(’|’, collect_set(area)) area,
concat_ws(’|’, collect_set(model)) model,
concat_ws(’|’, collect_set(brand)) brand,
concat_ws(’|’, collect_set(sdk_version)) sdk_version,
concat_ws(’|’, collect_set(gmail)) gmail,
concat_ws(’|’, collect_set(height_width)) height_width,
concat_ws(’|’, collect_set(app_time)) app_time,
concat_ws(’|’, collect_set(network)) network,
concat_ws(’|’, collect_set(lng)) lng,
concat_ws(’|’, collect_set(lat)) lat,
date_add(next_day(‘2019-02-10’,‘MO’),-7),
date_add(next_day(‘2019-02-10’,‘MO’),-1),
concat(date_add( next_day(‘2019-02-10’,‘MO’),-7), ‘_’ , date_add(next_day(‘2019-02-10’,‘MO’),-1)
)
from dws_uv_detail_day
where dt>=date_add(next_day(‘2019-02-10’,‘MO’),-7) and dt<=date_add(next_day(‘2019-02-10’,‘MO’),-1)
group by mid_id;
3)查詢導入結果
hive (gmall)> select * from dws_uv_detail_wk limit 1;
hive (gmall)> select count(*) from dws_uv_detail_wk;
6.1.3 每月活躍設備明細
1)建表語句
hive (gmall)>
drop table if exists dws_uv_detail_mn;
create external table dws_uv_detail_mn(
mid_id
string COMMENT ‘設備唯一標識’,
user_id
string COMMENT ‘用戶標識’,
version_code
string COMMENT ‘程序版本號’,
version_name
string COMMENT ‘程序版本名’,
lang
string COMMENT ‘系統語言’,
source
string COMMENT ‘渠道號’,
os
string COMMENT ‘安卓系統版本’,
area
string COMMENT ‘區域’,
model
string COMMENT ‘手機型號’,
brand
string COMMENT ‘手機品牌’,
sdk_version
string COMMENT ‘sdkVersion’,
gmail
string COMMENT ‘gmail’,
height_width
string COMMENT ‘屏幕寬高’,
app_time
string COMMENT ‘客戶端日誌產生時的時間’,
network
string COMMENT ‘網絡模式’,
lng
string COMMENT ‘經度’,
lat
string COMMENT ‘緯度’
) COMMENT ‘活躍用戶按月明細’
PARTITIONED BY (mn
string)
stored as parquet
location ‘/warehouse/gmall/dws/dws_uv_detail_mn/’
;
2)數據導入
hive (gmall)>
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table dws_uv_detail_mn partition(mn)
select
mid_id,
concat_ws(’|’, collect_set(user_id)) user_id,
concat_ws(’|’, collect_set(version_code)) version_code,
concat_ws(’|’, collect_set(version_name)) version_name,
concat_ws(’|’, collect_set(lang)) lang,
concat_ws(’|’, collect_set(source)) source,
concat_ws(’|’, collect_set(os)) os,
concat_ws(’|’, collect_set(area)) area,
concat_ws(’|’, collect_set(model)) model,
concat_ws(’|’, collect_set(brand)) brand,
concat_ws(’|’, collect_set(sdk_version)) sdk_version,
concat_ws(’|’, collect_set(gmail)) gmail,
concat_ws(’|’, collect_set(height_width)) height_width,
concat_ws(’|’, collect_set(app_time)) app_time,
concat_ws(’|’, collect_set(network)) network,
concat_ws(’|’, collect_set(lng)) lng,
concat_ws(’|’, collect_set(lat)) lat,
date_format(‘2019-02-10’,‘yyyy-MM’)
from dws_uv_detail_day
where date_format(dt,‘yyyy-MM’) = date_format(‘2019-02-10’,‘yyyy-MM’)
group by mid_id;
3)查詢導入結果
hive (gmall)> select * from dws_uv_detail_mn limit 1;
hive (gmall)> select count(*) from dws_uv_detail_mn ;