(6)数仓搭建之DWD层启动表6

数仓搭建之DWD层
4.1 DWD层启动表数据解析
4.1.1 创建启动表

1)建表语句
hive (gmall)>
drop table if exists dwd_start_log;
CREATE EXTERNAL TABLE dwd_start_log(
mid_id string,
user_id string,
version_code string,
version_name string,
lang string,
source string,
os string,
area string,
model string,
brand string,
sdk_version string,
gmail string,
height_width string,
app_time string,
network string,
lng string,
lat string,
entry string,
open_ad_type string,
action string,
loading_time string,
detail string,
extend1 string
)
PARTITIONED BY (dt string)
location ‘/warehouse/gmall/dwd/dwd_start_log/’;
4.1.2 向启动表导入数据

hive (gmall)>
insert overwrite table dwd_start_log
PARTITION (dt=‘2019-02-10’)
select
get_json_object(line,’.mid)midid,getjsonobject(line,.mid') mid_id, get_json_object(line,'.uid’) user_id,
get_json_object(line,’.vc)versioncode,getjsonobject(line,.vc') version_code, get_json_object(line,'.vn’) version_name,
get_json_object(line,’.l)lang,getjsonobject(line,.l') lang, get_json_object(line,'.sr’) source,
get_json_object(line,’.os)os,getjsonobject(line,.os') os, get_json_object(line,'.ar’) area,
get_json_object(line,’.md)model,getjsonobject(line,.md') model, get_json_object(line,'.ba’) brand,
get_json_object(line,’.sv)sdkversion,getjsonobject(line,.sv') sdk_version, get_json_object(line,'.g’) gmail,
get_json_object(line,’.hw)heightwidth,getjsonobject(line,.hw') height_width, get_json_object(line,'.t’) app_time,
get_json_object(line,’.nw)network,getjsonobject(line,.nw') network, get_json_object(line,'.ln’) lng,
get_json_object(line,’.la)lat,getjsonobject(line,.la') lat, get_json_object(line,'.entry’) entry,
get_json_object(line,’.openadtype)openadtype,getjsonobject(line,.open_ad_type') open_ad_type, get_json_object(line,'.action’) action,
get_json_object(line,’.loadingtime)loadingtime,getjsonobject(line,.loading_time') loading_time, get_json_object(line,'.detail’) detail,
get_json_object(line,’$.extend1’) extend1
from ods_start_log
where dt=‘2019-02-10’;
3)测试
hive (gmall)> select * from dwd_start_log limit 2;
4.1.3 DWD层启动表加载数据脚本

1)在hadoop102的/home/atguigu/bin目录下创建脚本
[atguigu@hadoop102 bin]$ vim dwd_start_log.sh
在脚本中编写如下内容
#!/bin/bash

定义变量方便修改

APP=gmall
hive=/opt/module/hive/bin/hive

如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天

if [ -n “$1” ] ;then
do_date=$1
else
do_date=date -d "-1 day" +%F
fi

sql="
set hive.exec.dynamic.partition.mode=nonstrict;

insert overwrite table “APP".dwdstartlogPARTITION(dt=APP".dwd_start_log PARTITION (dt='do_date’)
select
get_json_object(line,’.mid)midid,getjsonobject(line,.mid') mid_id, get_json_object(line,'.uid’) user_id,
get_json_object(line,’.vc)versioncode,getjsonobject(line,.vc') version_code, get_json_object(line,'.vn’) version_name,
get_json_object(line,’.l)lang,getjsonobject(line,.l') lang, get_json_object(line,'.sr’) source,
get_json_object(line,’.os)os,getjsonobject(line,.os') os, get_json_object(line,'.ar’) area,
get_json_object(line,’.md)model,getjsonobject(line,.md') model, get_json_object(line,'.ba’) brand,
get_json_object(line,’.sv)sdkversion,getjsonobject(line,.sv') sdk_version, get_json_object(line,'.g’) gmail,
get_json_object(line,’.hw)heightwidth,getjsonobject(line,.hw') height_width, get_json_object(line,'.t’) app_time,
get_json_object(line,’.nw)network,getjsonobject(line,.nw') network, get_json_object(line,'.ln’) lng,
get_json_object(line,’.la)lat,getjsonobject(line,.la') lat, get_json_object(line,'.entry’) entry,
get_json_object(line,’.openadtype)openadtype,getjsonobject(line,.open_ad_type') open_ad_type, get_json_object(line,'.action’) action,
get_json_object(line,’.loadingtime)loadingtime,getjsonobject(line,.loading_time') loading_time, get_json_object(line,'.detail’) detail,
get_json_object(line,'.extend1)extend1from".extend1') extend1 from "APP”.ods_start_log
where dt=’$do_date’;
"

hivee"hive -e "sql"
2)增加脚本执行权限
[atguigu@hadoop102 bin]$ chmod 777 dwd_start_log.sh
3)脚本使用
[atguigu@hadoop102 module]$ dwd_start_log.sh 2019-02-11
4)查询导入结果
hive (gmall)>
select * from dwd_start_log where dt=‘2019-02-11’ limit 2;
5)脚本执行时间
企业开发中一般在每日凌晨30分~1点

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章