#!/bin/bash
#注意:作業調度的服務器系統時間要正確 建議等原表的分區數據接入成功後再啓動腳本執行
echo 'job start...'
nowtime=`date -d today "+%Y-%m-%d-%H"`
echo 'nowtime='$nowtime
#grepCondition="2020-05-14-22-25"
grepCondition=$nowtime
partition="set hive.cli.print.header=flase;show partitions test.hn_csgcjs_spgl_xmjbxxb;"
v_partition=`hive -e "$partition" | grep data_dt="${grepCondition}" | sort | tail -n 1` #取最新的分區
echo $v_partition
#獲取源表的最新分區號
partition_nums=${v_partition:8:16}
echo 'partition_nums='$partition_nums
#===================合法數據邏輯開始==========================================
echo 'correct data job start....'
#創建sqoop分區表
HIVE_SQL_CREATE_TABLE="CREATE TABLE IF NOT EXISTS test.hn_csgcjs_spgl_xmjbxxb_transfer_sqoop_p2 (
dfsjzj string,
xzqhdm string,
xmdm string,
xmmc string,
gcdm string,
gcfw string,
qjdgcdm string,
xmtzly INT,
tdhqfs INT,
tdsfdsjfa INT,
sfwcqypg INT,
splclx INT,
lxlx INT,
gcfl INT,
jsxz INT,
xmzjsx INT,
gbhydmfbnd string,
gbhy string,
nkgsj date,
njcsj date,
xmsfwqbj INT,
xmwqbjsj TIMESTAMP,
ztze DOUBLE,
jsddxzqh string,
jsdd string,
xmjsddx DOUBLE,
xmjsddy DOUBLE,
jsgmjnr string,
ydmj DOUBLE,
jzmj DOUBLE,
sbsj TIMESTAMP,
splcbm string,
splbbh DOUBLE,
sjyxbs INT,
sjwxyy string,
SJSCZT INT,
SBYY string,
ctime TIMESTAMP,
utime TIMESTAMP,
SYNC_TIME TIMESTAMP,
SystemSource string,
First_Sync_time TIMESTAMP
) partitioned by(data_dt string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001';"
#插入分區數據
HIVE_SQL_INSERT_TABLE="insert overwrite TABLE test.hn_csgcjs_spgl_xmjbxxb_transfer_sqoop_p2 partition(data_dt='$partition_nums')
SELECT
DFSJZJ,
CASE
WHEN m.country_xzqhdm IS NOT NULL THEN
m.country_xzqhdm
ELSE
a.XZQHDM
END AS XZQHDM,
XMDM,
XMMC,
GCDM,
GCFW,
QJDGCDM,
XMTZLY,
TDHQFS,
TDSFDSJFA,
SFWCQYPG,
SPLCLX,
LXLX,
GCFL,
JSXZ,
XMZJSX,
GBHYDMFBND,
CONCAT (
CASE
WHEN c.priv_code IS NULL THEN
b.priv_code
ELSE
c.priv_code
END,
CAST (
SUBSTRING (
regexp_replace (
CASE
WHEN c.priv_code IS NULL THEN
b.priv_code
ELSE
c.priv_code
END,
'X',
''
),
2,
6
) AS VARCHAR (6)
)
) AS GBHY,
NKGSJ,
NJCSJ,
XMSFWQBJ,
XMWQBJSJ,
ZTZE,
JSDDXZQH,
JSDD,
XMJSDDX,
XMJSDDY,
JSGMJNR,
YDMJ,
JZMJ,
SBSJ,
SPLCBM,
SPLBBH,
SJYXBS,
SJWXYY,
SJSCZT,
'' AS SBYY,
CTIME,
UTIME,
from_unixtime(unix_timestamp()) AS SYNC_TIME,
'' AS SystemSource,
from_unixtime(unix_timestamp()) AS First_Sync_time
FROM
test.HN_CSGCJS_SPGL_XMJBXXB a
LEFT JOIN test.hn_zwfw_xzqhdm_map m ON a.XZQHDM = m.city_xzqhdm
LEFT JOIN (
SELECT
a.hy_name,
a.hy_code,
a.old_code,
regexp_replace (c.p_code, 'X', '') AS priv_code
FROM
test.hn_zwfw_hyfl a
LEFT JOIN test.hn_zwfw_hyfl b ON a.p_code = b.hy_code
LEFT JOIN test.hn_zwfw_hyfl c ON b.p_code = c.hy_code
) b ON a.GBHY = b.old_code
LEFT JOIN (
SELECT
a.hy_name,
a.hy_code,
a.old_code,
regexp_replace (c.p_code, 'X', '') AS priv_code
FROM
test.hn_zwfw_hyfl a
LEFT JOIN test.hn_zwfw_hyfl b ON a.p_code = b.hy_code
LEFT JOIN test.hn_zwfw_hyfl c ON b.p_code = c.hy_code
) c ON a.GBHY = c.hy_code
WHERE GBHYDMFBND='2017' and a.data_dt='$partition_nums';"
#執行hql
hive -e "$HIVE_SQL_CREATE_TABLE"
echo 'insert correct data start....'
hive -e "$HIVE_SQL_INSERT_TABLE"
echo 'insert correct data end...'
echo 'correct data job sqoop start...'
sqoop export --connect 'jdbc:sqlserver://59.212.146.201:1433;DatabaseName=Exchange_GCJSXM_HNS_TEST' --username 'GCJSXM_GGFW' --password 'GCJSXM_GGFW!@#$20200511' --table 'SPGL_XMJBXXB' --columns 'DFSJZJ,XZQHDM,XMDM,XMMC,GCDM,GCFW,QJDGCDM,XMTZLY,TDHQFS,TDSFDSJFA,SFWCQYPG,SPLCLX,LXLX,GCFL,JSXZ,XMZJSX,GBHYDMFBND,GBHY,NKGSJ,NJCSJ,XMSFWQBJ,XMWQBJSJ,ZTZE,JSDDXZQH,JSDD,XMJSDDX,XMJSDDY,JSGMJNR,YDMJ,JZMJ,SBSJ,SPLCBM,SPLBBH,SJYXBS,SJWXYY,SJSCZT,SBYY,CTIME,UTIME,SYNC_TIME,SystemSource,First_Sync_time' --fields-terminated-by '\001' --export-dir 'hdfs://hainan/apps/hive/warehouse/test.db/hn_csgcjs_spgl_xmjbxxb_transfer_sqoop_p2/data_dt='$partition_nums'' --input-null-string '\\N' --input-null-non-string '\\N' --hive-partition-key data_dt --hive-partition-value $partition_nums
echo 'correct data job sqoop end...'
echo 'correct data job end....'
#===================不合法數據邏輯開始==========================================
echo 'correct data job start....'
#創建sqoop分區表
HIVE_SQL_CREATE_TABLE_INVALID="CREATE TABLE IF NOT EXISTS test.hn_csgcjs_spgl_xmjbxxb_invalid_sqoop_p2 (
dfsjzj string,
xzqhdm string,
xmdm string,
xmmc string,
gcdm string,
gcfw string,
qjdgcdm string,
xmtzly INT,
tdhqfs INT,
tdsfdsjfa INT,
sfwcqypg INT,
splclx INT,
lxlx INT,
gcfl INT,
jsxz INT,
xmzjsx INT,
gbhydmfbnd string,
gbhy string,
nkgsj date,
njcsj date,
xmsfwqbj INT,
xmwqbjsj TIMESTAMP,
ztze DOUBLE,
jsddxzqh string,
jsdd string,
xmjsddx DOUBLE,
xmjsddy DOUBLE,
jsgmjnr string,
ydmj DOUBLE,
jzmj DOUBLE,
sbsj TIMESTAMP,
splcbm string,
splbbh DOUBLE,
sjyxbs INT,
sjwxyy string,
SJSCZT INT,
SBYY string,
ctime TIMESTAMP,
utime TIMESTAMP,
SYNC_TIME TIMESTAMP,
SystemSource string,
First_Sync_time TIMESTAMP
) partitioned by(data_dt string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001';"
#插入分區數據
HIVE_SQL_INSERT_TABLE_INVALID="insert overwrite TABLE test.hn_csgcjs_spgl_xmjbxxb_invalid_sqoop_p2 partition(data_dt='$partition_nums')
SELECT
DFSJZJ,
CASE
WHEN m.country_xzqhdm IS NOT NULL THEN
m.country_xzqhdm
ELSE
a.XZQHDM
END AS XZQHDM,
XMDM,
XMMC,
GCDM,
GCFW,
QJDGCDM,
XMTZLY,
TDHQFS,
TDSFDSJFA,
SFWCQYPG,
SPLCLX,
LXLX,
GCFL,
JSXZ,
XMZJSX,
GBHYDMFBND,
CONCAT (
CASE
WHEN c.priv_code IS NULL THEN
b.priv_code
ELSE
c.priv_code
END,
CAST (
SUBSTRING (
regexp_replace (
CASE
WHEN c.priv_code IS NULL THEN
b.priv_code
ELSE
c.priv_code
END,
'X',
''
),
2,
6
) AS VARCHAR (6)
)
) AS GBHY,
NKGSJ,
NJCSJ,
XMSFWQBJ,
XMWQBJSJ,
ZTZE,
JSDDXZQH,
JSDD,
XMJSDDX,
XMJSDDY,
JSGMJNR,
YDMJ,
JZMJ,
SBSJ,
SPLCBM,
SPLBBH,
SJYXBS,
SJWXYY,
SJSCZT,
'' AS SBYY,
CTIME,
UTIME,
from_unixtime(unix_timestamp()) AS SYNC_TIME,
'' AS SystemSource,
from_unixtime(unix_timestamp()) AS First_Sync_time
FROM
test.HN_CSGCJS_SPGL_XMJBXXB a
LEFT JOIN test.hn_zwfw_xzqhdm_map m ON a.XZQHDM = m.city_xzqhdm
LEFT JOIN (
SELECT
a.hy_name,
a.hy_code,
a.old_code,
regexp_replace (c.p_code, 'X', '') AS priv_code
FROM
test.hn_zwfw_hyfl a
LEFT JOIN test.hn_zwfw_hyfl b ON a.p_code = b.hy_code
LEFT JOIN test.hn_zwfw_hyfl c ON b.p_code = c.hy_code
) b ON a.GBHY = b.old_code
LEFT JOIN (
SELECT
a.hy_name,
a.hy_code,
a.old_code,
regexp_replace (c.p_code, 'X', '') AS priv_code
FROM
test.hn_zwfw_hyfl a
LEFT JOIN test.hn_zwfw_hyfl b ON a.p_code = b.hy_code
LEFT JOIN test.hn_zwfw_hyfl c ON b.p_code = c.hy_code
) c ON a.GBHY = c.hy_code
WHERE (GBHYDMFBND<>'2017' or GBHYDMFBND is null) and a.data_dt='$partition_nums';"
#執行hql
hive -e "$HIVE_SQL_CREATE_TABLE_INVALID"
echo 'insert invalid data start....'
hive -e "$HIVE_SQL_INSERT_TABLE_INVALID"
echo 'insert invalid data end...'
echo 'invalid data job sqoop start...'
#錯誤的數據目前還沒確定同步到哪裏
#sqoop export --connect 'jdbc:sqlserver://59.212.146.201:1433;DatabaseName=Exchange_GCJSXM_HNS_TEST' --username 'GCJSXM_GGFW' --password 'GCJSXM_GGFW!@#$20200511' --table 'SPGL_XMJBXXB' --columns 'DFSJZJ,XZQHDM,XMDM,XMMC,GCDM,GCFW,QJDGCDM,XMTZLY,TDHQFS,TDSFDSJFA,SFWCQYPG,SPLCLX,LXLX,GCFL,JSXZ,XMZJSX,GBHYDMFBND,GBHY,NKGSJ,NJCSJ,XMSFWQBJ,XMWQBJSJ,ZTZE,JSDDXZQH,JSDD,XMJSDDX,XMJSDDY,JSGMJNR,YDMJ,JZMJ,SBSJ,SPLCBM,SPLBBH,SJYXBS,SJWXYY,SJSCZT,SBYY,CTIME,UTIME,SYNC_TIME,SystemSource,First_Sync_time' --fields-terminated-by '\001' --export-dir 'hdfs://hainan/apps/hive/warehouse/test.db/hn_csgcjs_spgl_xmjbxxb_invalid_sqoop_p2/data_dt='$partition_nums'' --input-null-string '\\N' --input-null-non-string '\\N' --hive-partition-key data_dt --hive-partition-value $partition_nums
echo 'invalid data job sqoop end...'
echo 'invalid data job end....'
echo 'job end...'
|