4.3.12 DWD层事件表加载数据脚本
1)在hadoop102的/home/atguigu/bin目录下创建脚本
[atguigu@hadoop102 bin]$ vim dwd_event_log.sh
在脚本中编写如下内容
#!/bin/bash
定义变量方便修改
APP=gmall
hive=/opt/module/hive/bin/hive
如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n “$1” ] ;then
do_date=$1
else
do_date=date -d "-1 day" +%F
fi
sql="
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table “APP".dwddisplaylogPARTITION(dt=′do_date’)
select
mid_id,
user_id,
version_code,
version_name,
lang,
source,
os,
area,
model,
brand,
sdk_version,
gmail,
height_width,
app_time,
network,
lng,
lat,
get_json_object(event_json,’.kv.action′)action,getjsonobject(eventjson,′.kv.goodsid’) goodsid,
get_json_object(event_json,’.kv.place′)place,getjsonobject(eventjson,′.kv.extend1’) extend1,
get_json_object(event_json,'.kv.category′)category,servertimefrom"APP”.dwd_base_event_log
where dt=’$do_date’ and event_name=‘display’;
insert overwrite table "APP".dwdnewsdetaillogPARTITION(dt=′do_date’)
select
mid_id,
user_id,
version_code,
version_name,
lang,
source,
os,
area,
model,
brand,
sdk_version,
gmail,
height_width,
app_time,
network,
lng,
lat,
get_json_object(event_json,’.kv.entry′)entry,getjsonobject(eventjson,′.kv.action’) action,
get_json_object(event_json,’.kv.goodsid′)goodsid,getjsonobject(eventjson,′.kv.showtype’) showtype,
get_json_object(event_json,’.kv.newsstaytime′)newsstaytime,getjsonobject(eventjson,′.kv.loading_time’) loading_time,
get_json_object(event_json,’.kv.type1′)type1,getjsonobject(eventjson,′.kv.category’) category,
server_time
from "APP".dwdbaseeventlogwheredt=′do_date’ and event_name=‘newsdetail’;
insert overwrite table “APP".dwdloadinglogPARTITION(dt=′do_date’)
select
mid_id,
user_id,
version_code,
version_name,
lang,
source,
os,
area,
model,
brand,
sdk_version,
gmail,
height_width,
app_time,
network,
lng,
lat,
get_json_object(event_json,’.kv.action′)action,getjsonobject(eventjson,′.kv.loading_time’) loading_time,
get_json_object(event_json,’.kv.loadingway′)loadingway,getjsonobject(eventjson,′.kv.extend1’) extend1,
get_json_object(event_json,’.kv.extend2′)extend2,getjsonobject(eventjson,′.kv.type’) type,
get_json_object(event_json,'.kv.type1′)type1,servertimefrom"APP”.dwd_base_event_log
where dt=’$do_date’ and event_name=‘loading’;
insert overwrite table "APP".dwdadlogPARTITION(dt=′do_date’)
select
mid_id,
user_id,
version_code,
version_name,
lang,
source,
os,
area,
model,
brand,
sdk_version,
gmail,
height_width,
app_time,
network,
lng,
lat,
get_json_object(event_json,’.kv.entry′)entry,getjsonobject(eventjson,′.kv.action’) action,
get_json_object(event_json,’.kv.content′)content,getjsonobject(eventjson,′.kv.detail’) detail,
get_json_object(event_json,’.kv.source′)adsource,getjsonobject(eventjson,′.kv.behavior’) behavior,
get_json_object(event_json,’.kv.newstype′)newstype,getjsonobject(eventjson,′.kv.show_style’) show_style,
server_time
from "APP".dwdbaseeventlogwheredt=′do_date’ and event_name=‘ad’;
insert overwrite table "APP".dwdnotificationlogPARTITION(dt=′do_date’)
select
mid_id,
user_id,
version_code,
version_name,
lang,
source,
os,
area,
model,
brand,
sdk_version,
gmail,
height_width,
app_time,
network,
lng,
lat,
get_json_object(event_json,’.kv.action′)action,getjsonobject(eventjson,′.kv.noti_type’) noti_type,
get_json_object(event_json,’.kv.aptime′)aptime,getjsonobject(eventjson,′.kv.content’) content,
server_time
from "APP".dwdbaseeventlogwheredt=′do_date’ and event_name=‘notification’;
insert overwrite table "APP".dwdactiveforegroundlogPARTITION(dt=′do_date’)
select
mid_id,
user_id,
version_code,
version_name,
lang,
source,
os,
area,
model,
brand,
sdk_version,
gmail,
height_width,
app_time,
network,
lng,
lat,
get_json_object(event_json,’.kv.pushid′)pushid,getjsonobject(eventjson,′.kv.access’) access,
server_time
from "APP".dwdbaseeventlogwheredt=′do_date’ and event_name=‘active_foreground’;
insert overwrite table “APP".dwdactivebackgroundlogPARTITION(dt=′do_date’)
select
mid_id,
user_id,
version_code,
version_name,
lang,
source,
os,
area,
model,
brand,
sdk_version,
gmail,
height_width,
app_time,
network,
lng,
lat,
get_json_object(event_json,'.kv.activesource′)activesource,servertimefrom"APP”.dwd_base_event_log
where dt=’$do_date’ and event_name=‘active_background’;
insert overwrite table "APP".dwdcommentlogPARTITION(dt=′do_date’)
select
mid_id,
user_id,
version_code,
version_name,
lang,
source,
os,
area,
model,
brand,
sdk_version,
gmail,
height_width,
app_time,
network,
lng,
lat,
get_json_object(event_json,’.kv.commentid′)commentid,getjsonobject(eventjson,′.kv.userid’) userid,
get_json_object(event_json,’.kv.pcommentid′)pcommentid,getjsonobject(eventjson,′.kv.content’) content,
get_json_object(event_json,’.kv.addtime′)addtime,getjsonobject(eventjson,′.kv.other_id’) other_id,
get_json_object(event_json,’.kv.praisecount′)praisecount,getjsonobject(eventjson,′.kv.reply_count’) reply_count,
server_time
from "APP".dwdbaseeventlogwheredt=′do_date’ and event_name=‘comment’;
insert overwrite table "APP".dwdfavoriteslogPARTITION(dt=′do_date’)
select
mid_id,
user_id,
version_code,
version_name,
lang,
source,
os,
area,
model,
brand,
sdk_version,
gmail,
height_width,
app_time,
network,
lng,
lat,
get_json_object(event_json,’.kv.id′)id,getjsonobject(eventjson,′.kv.course_id’) course_id,
get_json_object(event_json,’.kv.userid′)userid,getjsonobject(eventjson,′.kv.add_time’) add_time,
server_time
from "APP".dwdbaseeventlogwheredt=′do_date’ and event_name=‘favorites’;
insert overwrite table “APP".dwdpraiselogPARTITION(dt=′do_date’)
select
mid_id,
user_id,
version_code,
version_name,
lang,
source,
os,
area,
model,
brand,
sdk_version,
gmail,
height_width,
app_time,
network,
lng,
lat,
get_json_object(event_json,’.kv.id′)id,getjsonobject(eventjson,′.kv.userid’) userid,
get_json_object(event_json,’.kv.targetid′)targetid,getjsonobject(eventjson,′.kv.type’) type,
get_json_object(event_json,'.kv.addtime′)addtime,servertimefrom"APP”.dwd_base_event_log
where dt=’$do_date’ and event_name=‘praise’;
insert overwrite table "APP".dwderrorlogPARTITION(dt=′do_date’)
select
mid_id,
user_id,
version_code,
version_name,
lang,
source,
os,
area,
model,
brand,
sdk_version,
gmail,
height_width,
app_time,
network,
lng,
lat,
get_json_object(event_json,’.kv.errorBrief′)errorBrief,getjsonobject(eventjson,′.kv.errorDetail’) errorDetail,
server_time
from "APP".dwdbaseeventlogwheredt=′do_date’ and event_name=‘error’;
"
hive−e"sql"
2)增加脚本执行权限
[atguigu@hadoop102 bin]$ chmod 777 dwd_event_log.sh
3)脚本使用
[atguigu@hadoop102 module]$ dwd_event_log.sh 2019-02-11
4)查询导入结果
hive (gmall)>
select * from dwd_comment_log where dt=‘2019-02-11’ limit 2;
5)脚本执行时间
企业开发中一般在每日凌晨30分~1点