拉鍊表實現(demo)

創建庫

create database shop_ods;

創建訂單增量分區表(每天一個分區)

CREATE TABLE shop_ods.ods_orders_inc (
orderid INT,
createtime STRING,
modifiedtime STRING,
status STRING
) 
partitioned by(day string )
row format delimited fields terminated by '\t'
stored as orc
;

在數據庫中創建業務表

CREATE TABLE orders (
orderid INT primary key,
createtime timestamp,
modifiedtime timestamp,
status varchar(5)
)
;

加載第1天數據

insert into orders values(1,"2019-10-01 00:00:00","2019-10-01 00:00:00",'創建');
insert into orders values(2,"2019-10-01 00:00:00","2019-10-01 00:00:00",'創建');
insert into orders values(3,"2019-10-01 00:00:00","2019-10-01 00:00:00",'創建');

使用sqoop腳本導入數據

sqoop導入hive分區數據,數據格式爲ORC(版本需要1.4.7支持,需要配置hcatalog)

export HCAT_HOME=/apps/hive-1.2.1/hcatalog
export PATH=$PATH:$HCAT_HOME/bin:$HCAT_HOME/sbin
#!/bin/bash
dt=`date -d "1 days ago" +%s`
zero_dt=$(((${dt}+3600*8)/86400*86400-3600*8))
echo $dt
echo $zero_dt
dt_part=`date -d "1 days ago "+%Y-%m-%d"`
echo $dt_part
/apps/sqoop-1.4.7/bin/sqoop import \
--connect jdbc:mysql://mini1:3306/shop \
--username hive \
--password pwd123456 \
--query "select orderid, from orders where UNIX_TIMESTAMP(modifiedtime)>=${zero_dt} and \$CONDITIONS" \
--null-string '\\N' \
--null-non-string '\\N' \
--hcatalog-database shop_ods \
--hcatalog-table ods_orders_inc \
--hcatalog-partition-keys day \
--hcatalog-partition-values ${dt_part} \
-m 1 

sqoop導入hive分區數據(版本需要1.4.7支持)不屬於拉鍊表的實現內容

sqoop import \
--hive-import \
--connect jdbc:mysql://mini1:3306/shop \
--username hive \
--password pwd123456 \
--query "select * from orders where substring(modifiedtime,1,10)>'2016-08-20' and \$CONDITIONS" \
--hive-database shop_ods \
--fields-terminated-by '\t' \
--lines-terminated-by '\n' \
--hive-table ods_orders_inc \
--null-string '\\N' \
--null-non-string '\\N' \
--hive-partition-key day \
--hive-partition-value 20190918 \
--target-dir day=20190918 \
-m 1 

=======================================================

創建shop_dw庫

create database shop_dw;

創建拉鍊表

CREATE TABLE shop_dw.dw_orders_his (
orderid INT,
createtime STRING,
modifiedtime STRING,
status STRING,
dw_start_date STRING,
dw_end_date STRING
) 
row format delimited fields terminated by '\t' 
stored as orc
;

初始裝載

INSERT overwrite TABLE shop_dw.dw_orders_his
SELECT 
orderid,
createtime,
modifiedtime,
status,
createtime AS dw_start_date,
'9999-12-31' AS dw_end_date
FROM shop_ods.ods_orders_inc
WHERE day = '2019-10-01'
;

如下結果

select * from shop_dw.dw_orders_his;

OK
1 2016-08-20 2016-08-20 創建 2016-08-20 9999-12-31
2 2016-08-20 2016-08-20 創建 2016-08-20 9999-12-31
3 2016-08-20 2016-08-20 創建 2016-08-20 9999-12-31

加載第2天數據

insert into orders values(1,'2019-10-01 01:00:05','2019-10-02 10:00:03','支付');
insert into orders values(2,'2019-10-01 01:00:05','2019-10-02 10:00:03','完成');
insert into orders values(4,'2019-10-02 01:00:05','2019-10-02 10:00:03','創建');

使用sqoop腳本導入數據

select * from shop_ods.ods_orders_inc where day=‘2019-10-02’;
OK
1 2019-10-01 2019-10-02 支付 2019-10-02
2 2019-10-01 2019-10-02 完成 2019-10-02
4 2019-10-02 2019-10-02 創建 2019-10-02

創建臨時層
create database if not exists shop_tmp;

語句實現

create table table shop_tmp.tmp_order_his
as
select
orderid,
createtime,
modifiedtime,
status status,
case when (lag(modifiedtime) over(distribute by orderid sort by modifiedtime)) is null then dw_start_date else modifiedtime end dw_start_date,
nvl(lead(modifiedtime) over(distribute by orderid sort by modifiedtime),dw_end_date) dw_end_date
from
(select 
orderid,
createtime,
modifiedtime,
status,
dw_start_date,
dw_end_date dw_end_date
from shop_dw.dw_orders_his
union all
SELECT 
orderid,
createtime,
modifiedtime,
status,
createtime AS dw_start_date,
'9999-12-31' AS dw_end_date
FROM shop_ods.ods_orders_inc
WHERE day = '2019-10-02' //${dt}
) tmp
;

shell腳本實現

#!/bin/bash
dt=`date -d "1 days ago" +%Y-%m-%d`
hive -e "
create database if not exists shop_tmp;
set hive.exec.mode.local.auto=true;
set hive.groupby.skewindata=true;
create as table shop_tmp.tmp_order_his
select
orderid,
createtime,
modifiedtime,
status status,
case when (lag(modifiedtime) over(distribute by orderid sort by modifiedtime)) is null then dw_start_date else modifiedtime end dw_start_date,
nvl(lead(modifiedtime) over(distribute by orderid sort by modifiedtime),dw_end_date) dw_end_date
from
(select 
orderid,
createtime,
modifiedtime,
status,
dw_start_date,
dw_end_date dw_end_date
from shop_dw.dw_orders_his
union all
SELECT 
orderid,
createtime,
modifiedtime,
status,
createtime AS dw_start_date,
'9999-12-31' AS dw_end_date
FROM shop_ods.ods_orders_inc
WHERE day = '${dt}'
) tmp
; 
insert overwrite shop_dw.dw_orders_his
select * from shop_tmp.tmp_order_his
;
drop table table shop_tmp.tmp_order_his;
"

1 2019-10-01 00:00:00.0 2019-10-01 00:00:00.0 創建 2019-10-01 00:00:00.0 9999-12-31
1 2019-10-01 01:00:05.0 2019-10-02 10:00:03.0 支付 2019-10-01 01:00:05.0 9999-12-31
2 2019-10-01 00:00:00.0 2019-10-01 00:00:00.0 創建 2019-10-01 00:00:00.0 9999-12-31
2 2019-10-01 01:00:05.0 2019-10-02 10:00:03.0 完成 2019-10-01 01:00:05.0 9999-12-31
3 2019-10-01 00:00:00.0 2019-10-01 00:00:00.0 創建 2019-10-01 00:00:00.0 9999-12-31
4 2019-10-02 01:00:05.0 2019-10-02 10:00:03.0 創建 2019-10-02 01:00:05.0 9999-12-31

加載第3天數據
insert into orders values(1,‘2019-10-01 10:10:10’,‘2019-10-03 09:09:09’,‘完成’);
insert into orders values(3,‘2019-10-01 10:10:10’,‘2019-10-03 09:09:09’,‘支付’);
insert into orders values(4,‘2019-10-02 10:10:10’,‘2019-10-03 09:09:09’,‘支付’);
insert into orders values(5,‘2019-10-03 10:10:10’,‘2019-10-03 09:09:09’,‘創建’);
使用sqoop腳本導入數據

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章