4.3.12 DWD層事件表加載數據腳本
1)在hadoop102的/home/atguigu/bin目錄下創建腳本
[atguigu@hadoop102 bin]$ vim dwd_event_log.sh
在腳本中編寫如下內容
#!/bin/bash
定義變量方便修改
APP=gmall
hive=/opt/module/hive/bin/hive
如果是輸入的日期按照取輸入日期;如果沒輸入日期取當前時間的前一天
if [ -n “$1” ] ;then
do_date=$1
else
do_date=date -d "-1 day" +%F
fi
sql="
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table “APP".dwddisplaylogPARTITION(dt=′do_date’)
select
mid_id,
user_id,
version_code,
version_name,
lang,
source,
os,
area,
model,
brand,
sdk_version,
gmail,
height_width,
app_time,
network,
lng,
lat,
get_json_object(event_json,’.kv.action′)action,getjsonobject(eventjson,′.kv.goodsid’) goodsid,
get_json_object(event_json,’.kv.place′)place,getjsonobject(eventjson,′.kv.extend1’) extend1,
get_json_object(event_json,'.kv.category′)category,servertimefrom"APP”.dwd_base_event_log
where dt=’$do_date’ and event_name=‘display’;
insert overwrite table "APP".dwdnewsdetaillogPARTITION(dt=′do_date’)
select
mid_id,
user_id,
version_code,
version_name,
lang,
source,
os,
area,
model,
brand,
sdk_version,
gmail,
height_width,
app_time,
network,
lng,
lat,
get_json_object(event_json,’.kv.entry′)entry,getjsonobject(eventjson,′.kv.action’) action,
get_json_object(event_json,’.kv.goodsid′)goodsid,getjsonobject(eventjson,′.kv.showtype’) showtype,
get_json_object(event_json,’.kv.newsstaytime′)newsstaytime,getjsonobject(eventjson,′.kv.loading_time’) loading_time,
get_json_object(event_json,’.kv.type1′)type1,getjsonobject(eventjson,′.kv.category’) category,
server_time
from "APP".dwdbaseeventlogwheredt=′do_date’ and event_name=‘newsdetail’;
insert overwrite table “APP".dwdloadinglogPARTITION(dt=′do_date’)
select
mid_id,
user_id,
version_code,
version_name,
lang,
source,
os,
area,
model,
brand,
sdk_version,
gmail,
height_width,
app_time,
network,
lng,
lat,
get_json_object(event_json,’.kv.action′)action,getjsonobject(eventjson,′.kv.loading_time’) loading_time,
get_json_object(event_json,’.kv.loadingway′)loadingway,getjsonobject(eventjson,′.kv.extend1’) extend1,
get_json_object(event_json,’.kv.extend2′)extend2,getjsonobject(eventjson,′.kv.type’) type,
get_json_object(event_json,'.kv.type1′)type1,servertimefrom"APP”.dwd_base_event_log
where dt=’$do_date’ and event_name=‘loading’;
insert overwrite table "APP".dwdadlogPARTITION(dt=′do_date’)
select
mid_id,
user_id,
version_code,
version_name,
lang,
source,
os,
area,
model,
brand,
sdk_version,
gmail,
height_width,
app_time,
network,
lng,
lat,
get_json_object(event_json,’.kv.entry′)entry,getjsonobject(eventjson,′.kv.action’) action,
get_json_object(event_json,’.kv.content′)content,getjsonobject(eventjson,′.kv.detail’) detail,
get_json_object(event_json,’.kv.source′)adsource,getjsonobject(eventjson,′.kv.behavior’) behavior,
get_json_object(event_json,’.kv.newstype′)newstype,getjsonobject(eventjson,′.kv.show_style’) show_style,
server_time
from "APP".dwdbaseeventlogwheredt=′do_date’ and event_name=‘ad’;
insert overwrite table "APP".dwdnotificationlogPARTITION(dt=′do_date’)
select
mid_id,
user_id,
version_code,
version_name,
lang,
source,
os,
area,
model,
brand,
sdk_version,
gmail,
height_width,
app_time,
network,
lng,
lat,
get_json_object(event_json,’.kv.action′)action,getjsonobject(eventjson,′.kv.noti_type’) noti_type,
get_json_object(event_json,’.kv.aptime′)aptime,getjsonobject(eventjson,′.kv.content’) content,
server_time
from "APP".dwdbaseeventlogwheredt=′do_date’ and event_name=‘notification’;
insert overwrite table "APP".dwdactiveforegroundlogPARTITION(dt=′do_date’)
select
mid_id,
user_id,
version_code,
version_name,
lang,
source,
os,
area,
model,
brand,
sdk_version,
gmail,
height_width,
app_time,
network,
lng,
lat,
get_json_object(event_json,’.kv.pushid′)pushid,getjsonobject(eventjson,′.kv.access’) access,
server_time
from "APP".dwdbaseeventlogwheredt=′do_date’ and event_name=‘active_foreground’;
insert overwrite table “APP".dwdactivebackgroundlogPARTITION(dt=′do_date’)
select
mid_id,
user_id,
version_code,
version_name,
lang,
source,
os,
area,
model,
brand,
sdk_version,
gmail,
height_width,
app_time,
network,
lng,
lat,
get_json_object(event_json,'.kv.activesource′)activesource,servertimefrom"APP”.dwd_base_event_log
where dt=’$do_date’ and event_name=‘active_background’;
insert overwrite table "APP".dwdcommentlogPARTITION(dt=′do_date’)
select
mid_id,
user_id,
version_code,
version_name,
lang,
source,
os,
area,
model,
brand,
sdk_version,
gmail,
height_width,
app_time,
network,
lng,
lat,
get_json_object(event_json,’.kv.commentid′)commentid,getjsonobject(eventjson,′.kv.userid’) userid,
get_json_object(event_json,’.kv.pcommentid′)pcommentid,getjsonobject(eventjson,′.kv.content’) content,
get_json_object(event_json,’.kv.addtime′)addtime,getjsonobject(eventjson,′.kv.other_id’) other_id,
get_json_object(event_json,’.kv.praisecount′)praisecount,getjsonobject(eventjson,′.kv.reply_count’) reply_count,
server_time
from "APP".dwdbaseeventlogwheredt=′do_date’ and event_name=‘comment’;
insert overwrite table "APP".dwdfavoriteslogPARTITION(dt=′do_date’)
select
mid_id,
user_id,
version_code,
version_name,
lang,
source,
os,
area,
model,
brand,
sdk_version,
gmail,
height_width,
app_time,
network,
lng,
lat,
get_json_object(event_json,’.kv.id′)id,getjsonobject(eventjson,′.kv.course_id’) course_id,
get_json_object(event_json,’.kv.userid′)userid,getjsonobject(eventjson,′.kv.add_time’) add_time,
server_time
from "APP".dwdbaseeventlogwheredt=′do_date’ and event_name=‘favorites’;
insert overwrite table “APP".dwdpraiselogPARTITION(dt=′do_date’)
select
mid_id,
user_id,
version_code,
version_name,
lang,
source,
os,
area,
model,
brand,
sdk_version,
gmail,
height_width,
app_time,
network,
lng,
lat,
get_json_object(event_json,’.kv.id′)id,getjsonobject(eventjson,′.kv.userid’) userid,
get_json_object(event_json,’.kv.targetid′)targetid,getjsonobject(eventjson,′.kv.type’) type,
get_json_object(event_json,'.kv.addtime′)addtime,servertimefrom"APP”.dwd_base_event_log
where dt=’$do_date’ and event_name=‘praise’;
insert overwrite table "APP".dwderrorlogPARTITION(dt=′do_date’)
select
mid_id,
user_id,
version_code,
version_name,
lang,
source,
os,
area,
model,
brand,
sdk_version,
gmail,
height_width,
app_time,
network,
lng,
lat,
get_json_object(event_json,’.kv.errorBrief′)errorBrief,getjsonobject(eventjson,′.kv.errorDetail’) errorDetail,
server_time
from "APP".dwdbaseeventlogwheredt=′do_date’ and event_name=‘error’;
"
hive−e"sql"
2)增加腳本執行權限
[atguigu@hadoop102 bin]$ chmod 777 dwd_event_log.sh
3)腳本使用
[atguigu@hadoop102 module]$ dwd_event_log.sh 2019-02-11
4)查詢導入結果
hive (gmall)>
select * from dwd_comment_log where dt=‘2019-02-11’ limit 2;
5)腳本執行時間
企業開發中一般在每日凌晨30分~1點