比如,計算第一次、總量等,正常情況下需要用到所有歷史數據進行計算。但有些表數據特別大,用全景歷史數據計算比較費力,可能就算不出來。
這時,可以考慮用這種方式。其優點是,數據涉及到的數據量偏小;其缺點也很明顯,需要從歷史數據起始之日,一天天刷到當前,中間少一天不可。如果中間有一天出現問題,則後面的數據也全部出錯。
可以在實踐中,權衡利裨,根據實際情況,選擇合適的方案。
1、樣例表結構
CREATE TABLE dwf_seller_publish_agr_full_1d(
shop_id bigint COMMENT '用戶id',
first_publish_time string COMMENT '第一次發拍時間',
last_publish_time string COMMENT '最後一次發拍時間',
publish_cnt_d string COMMENT '當天發拍數',
publish_cnt_30d string COMMENT '近30天發拍數',
publish_cnt_60d string COMMENT '近60天發拍數',
publish_cnt_90d string COMMENT '近90天發拍數',
publish_cnt_all bigint COMMENT '發拍總量',
publish_cnt_live bigint COMMENT '發拍直播拍品量',
publish_cnt_no_live bigint COMMENT '發拍非直播拍品量',
publish_cnt_ykj bigint COMMENT '一口價總髮拍量(所有的)',
publish_cnt_ykj_zb bigint COMMENT '一口價直播發拍量',
publish_cnt_yyp bigint COMMENT '一元拍總髮拍量',
no_ykj_sales bigint COMMENT '展示拍品數,排除一口價',
ykj_sales bigint COMMENT '展示拍品數,一口價拍品',
yyp_sales bigint COMMENT '展示拍品數,一元拍拍品')
comment '用戶寬表-發拍聚合'
partitioned by (dt string)
STORED AS orc tblproperties ("orc.compress"="SNAPPY");
2、計算sql
insert overwrite table ${c.to.dwf}.dwf_seller_publish_agr_full_1d partition(dt='${dt}')
select
nvl(a.shop_id,b.shop_id),
nvl(b.first_publish_time,a.first_publish_time),
nvl(a.last_publish_time,b.last_publish_time),
nvl(a.publish_cnt_d,0),
nvl(a.publish_cnt_30d,0),
nvl(a.publish_cnt_60d,0),
nvl(a.publish_cnt_90d,0),
nvl(a.publish_cnt_d,0) + nvl(b.publish_cnt_all,0),
nvl(a.publish_cnt_live,0) + nvl(b.publish_cnt_live,0),
nvl(a.publish_cnt_d,0) - nvl(a.publish_cnt_live,0) + nvl(b.publish_cnt_no_live,0),
nvl(a.publish_cnt_ykj_day,0) + nvl(b.publish_cnt_ykj,0),
nvl(a.publish_cnt_ykj_zb_day,0) + nvl(b.publish_cnt_ykj_zb,0),
nvl(a.publish_cnt_yyp_day,0) + nvl(b.publish_cnt_yyp,0),
nvl(a.no_ykj_sales,0),
nvl(a.ykj_sales,0),
nvl(a.yyp_sales,0)
FROM
(select shop_id,
min(publish_time) AS first_publish_time,
max(publish_time) AS last_publish_time,
sum(if(dt='${dt}',multi_wins,0)) AS publish_cnt_d,
sum(if(dt>date_sub('${dt}',30),multi_wins,0)) AS publish_cnt_30d,
sum(if(dt>date_sub('${dt}',60),multi_wins,0)) AS publish_cnt_60d,
sum(if(dt>date_sub('${dt}',90),multi_wins,0)) AS publish_cnt_90d,
sum(if(dt='${dt}' and is_live=1,multi_wins,0)) AS publish_cnt_live,
sum(if(dt='${dt}' and (sale_type=7 or sale_type=12),multi_wins,0)) AS publish_cnt_ykj_day,
sum(if(dt='${dt}' and sale_type=12,multi_wins,0)) AS publish_cnt_ykj_zb_day,
sum(if(dt='${dt}' and sale_type=11,multi_wins,0)) AS publish_cnt_yyp_day,
sum(if(sale_type!=7 and sale_type !=12 and end_date>date_sub('${dt}',1),multi_wins,0)) AS no_ykj_sales,
sum(if((sale_type=7 or sale_type =12) and end_date>date_sub('${dt}',1),multi_wins,0)) AS ykj_sales,
sum(if(sale_type=11 and end_date>date_sub('${dt}',1),multi_wins,0)) AS yyp_sales
from ${c.from.dwf}.dwf_seller_publish_incr_1d where dt>date_sub('${dt}',90) and dt <='${dt}' and pid = '0' group by shop_id
) a
full JOIN
(select * from ${c.from.dwf}.dwf_seller_publish_agr_full_1d where dt = date_sub('${dt}',1)) b
on a.shop_id=b.shop_id;