9(11)6.1.4 DWS層加載數據腳本11

1)在hadoop102的/home/atguigu/bin目錄下創建腳本
[atguigu@hadoop102 bin]$ vim dws_uv_log.sh
在腳本中編寫如下內容
#!/bin/bash

定義變量方便修改

APP=gmall
hive=/opt/module/hive/bin/hive

如果是輸入的日期按照取輸入日期;如果沒輸入日期取當前時間的前一天

if [ -n “$1” ] ;then
do_date=$1
else
do_date=date -d "-1 day" +%F
fi

sql="
set hive.exec.dynamic.partition.mode=nonstrict;

insert overwrite table "APP".dwsuvdetaildaypartition(dt=APP".dws_uv_detail_day partition(dt='do_date’)
select
mid_id,
concat_ws(’|’, collect_set(user_id)) user_id,
concat_ws(’|’, collect_set(version_code)) version_code,
concat_ws(’|’, collect_set(version_name)) version_name,
concat_ws(’|’, collect_set(lang)) lang,
concat_ws(’|’, collect_set(source)) source,
concat_ws(’|’, collect_set(os)) os,
concat_ws(’|’, collect_set(area)) area,
concat_ws(’|’, collect_set(model)) model,
concat_ws(’|’, collect_set(brand)) brand,
concat_ws(’|’, collect_set(sdk_version)) sdk_version,
concat_ws(’|’, collect_set(gmail)) gmail,
concat_ws(’|’, collect_set(height_width)) height_width,
concat_ws(’|’, collect_set(app_time)) app_time,
concat_ws(’|’, collect_set(network)) network,
concat_ws(’|’, collect_set(lng)) lng,
concat_ws(’|’, collect_set(lat)) lat
from "APP".dwdstartlogwheredt=APP".dwd_start_log where dt='do_date’
group by mid_id;

insert overwrite table “APP".dwsuvdetailwkpartition(wkdt)selectmidid,concatws(,collectset(userid))userid,concatws(,collectset(versioncode))versioncode,concatws(,collectset(versionname))versionname,concatws(,collectset(lang))lang,concatws(,collectset(source))source,concatws(,collectset(os))os,concatws(,collectset(area))area,concatws(,collectset(model))model,concatws(,collectset(brand))brand,concatws(,collectset(sdkversion))sdkversion,concatws(,collectset(gmail))gmail,concatws(,collectset(heightwidth))heightwidth,concatws(,collectset(apptime))apptime,concatws(,collectset(network))network,concatws(,collectset(lng))lng,concatws(,collectset(lat))lat,dateadd(nextday(APP".dws_uv_detail_wk partition(wk_dt) select mid_id, concat_ws('|', collect_set(user_id)) user_id, concat_ws('|', collect_set(version_code)) version_code, concat_ws('|', collect_set(version_name)) version_name, concat_ws('|', collect_set(lang)) lang, concat_ws('|', collect_set(source)) source, concat_ws('|', collect_set(os)) os, concat_ws('|', collect_set(area)) area, concat_ws('|', collect_set(model)) model, concat_ws('|', collect_set(brand)) brand, concat_ws('|', collect_set(sdk_version)) sdk_version, concat_ws('|', collect_set(gmail)) gmail, concat_ws('|', collect_set(height_width)) height_width, concat_ws('|', collect_set(app_time)) app_time, concat_ws('|', collect_set(network)) network, concat_ws('|', collect_set(lng)) lng, concat_ws('|', collect_set(lat)) lat, date_add(next_day('do_date’,‘MO’),-7),
date_add(next_day(‘dodate,MO),1),concat(dateadd(nextday(do_date','MO'),-1), concat(date_add( next_day('do_date’,‘MO’),-7), ‘_’ , date_add(next_day('dodate,MO),1))from"do_date','MO'),-1) ) from "APP”.dws_uv_detail_day
where dt>=date_add(next_day(‘dodate,MO),7)anddt<=dateadd(nextday(do_date','MO'),-7) and dt<=date_add(next_day('do_date’,‘MO’),-1)
group by mid_id;

insert overwrite table "APP".dwsuvdetailmnpartition(mn)selectmidid,concatws(,collectset(userid))userid,concatws(,collectset(versioncode))versioncode,concatws(,collectset(versionname))versionname,concatws(,collectset(lang))lang,concatws(,collectset(source))source,concatws(,collectset(os))os,concatws(,collectset(area))area,concatws(,collectset(model))model,concatws(,collectset(brand))brand,concatws(,collectset(sdkversion))sdkversion,concatws(,collectset(gmail))gmail,concatws(,collectset(heightwidth))heightwidth,concatws(,collectset(apptime))apptime,concatws(,collectset(network))network,concatws(,collectset(lng))lng,concatws(,collectset(lat))lat,dateformat(APP".dws_uv_detail_mn partition(mn) select mid_id, concat_ws('|', collect_set(user_id)) user_id, concat_ws('|', collect_set(version_code)) version_code, concat_ws('|', collect_set(version_name)) version_name, concat_ws('|', collect_set(lang))lang, concat_ws('|', collect_set(source)) source, concat_ws('|', collect_set(os)) os, concat_ws('|', collect_set(area)) area, concat_ws('|', collect_set(model)) model, concat_ws('|', collect_set(brand)) brand, concat_ws('|', collect_set(sdk_version)) sdk_version, concat_ws('|', collect_set(gmail)) gmail, concat_ws('|', collect_set(height_width)) height_width, concat_ws('|', collect_set(app_time)) app_time, concat_ws('|', collect_set(network)) network, concat_ws('|', collect_set(lng)) lng, concat_ws('|', collect_set(lat)) lat, date_format('do_date’,‘yyyy-MM’)
from "APP".dwsuvdetaildaywheredateformat(dt,yyyyMM)=dateformat(APP".dws_uv_detail_day where date_format(dt,'yyyy-MM') = date_format('do_date’,‘yyyy-MM’)
group by mid_id;
"

hivee"hive -e "sql"
2)增加腳本執行權限
[atguigu@hadoop102 bin]$ chmod 777 dws_uv_log.sh
3)腳本使用
[atguigu@hadoop102 module]$ dws_uv_log.sh 2019-02-11
4)查詢結果
hive (gmall)> select count() from dws_uv_detail_day where dt=‘2019-02-11’;
hive (gmall)> select count(
) from dws_uv_detail_wk;
hive (gmall)> select count(*) from dws_uv_detail_mn ;
5)腳本執行時間
企業開發中一般在每日凌晨30分~1點

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章