項目環境初始化
3.1 Hive分層說明
分庫存放
ods層
dw層
ads層
命名規則
ods層表與原始數據庫表名稱相同
dw層表
ofact_前綴表示事實表
odim_前綴表示維度表
創建分層數據庫:
create database itcast_ods;
create database itcast_dw;
create database itcast_ads;
3.2 創建ods層數據表
hive 分爲外部表與內部表,爲便於管理,該部分均使用內部表(內外部表的區別就在於刪除表的時候真正的數據是否會被刪除,我們一般是ods層使用外部表,因爲這個表是我們所有部門共用的,不能輕易刪除數據)
執行 “ods層建表語句業務數據.sql”
3.3 ods層全量數據抽取
步驟:
1、拖拽組件構建Kettle作業結構圖
2、轉換結構圖–》配置命名參數
3、配置Hive SQL腳本
msck repair table itcast_ods.itcast_orders;
msck repair table itcast_ods.itcast_goods;
msck repair table itcast_ods.itcast_order_goods;
msck repair table itcast_ods.itcast_shops;
msck repair table itcast_ods.itcast_goods_cats;
msck repair table itcast_ods.itcast_org;
msck repair table itcast_ods.itcast_order_refunds;
msck repair table itcast_ods.itcast_users;
msck repair table itcast_ods.itcast_user_address;
msck repair table itcast_ods.itcast_payments;
4、配置表輸入
SELECT * FROM itcast_orders
WHERE DATE_FORMAT(createtime, ‘%Y%m%d’) <= ‘${dt}’;
//同步歷史所有數據
5、配置字段選擇指定日期格式,配置parquet格式並設置snappy壓縮輸出
配置文件位置
配置文件輸出內容格式
測試數據是否都正確被加載!
select * from itcast_ods.itcast_orders limit 2;
select * from itcast_ods.itcast_goods limit 2;
select * from itcast_ods.itcast_order_goods limit 2;
select * from itcast_ods.itcast_shops limit 2;
select * from itcast_ods.itcast_goods_cats limit 2;
select * from itcast_ods.itcast_org limit 2;
select * from itcast_ods.itcast_order_refunds limit 2;
select * from itcast_ods.itcast_users limit 2;
select * from itcast_ods.itcast_user_address limit 2;
select * from itcast_ods.itcast_payments limit 2;
注意:
1:其中itcast_orders,itcast_order_goods,itcast_order_refunds表是根據時間抽取,其餘表進行全量抽取!!
2:注意使用Hadoop file ouput組件時要注意修改日期格式爲UTF8!!,parquet中fields中date類型改爲UTF8類型!!
3.4 ods層增量數據抽取
測試SQL語句:
– 查詢訂單
select * from itcast_ods.itcast_orders where dt=‘20190910’ limit 2;
select * from itcast_ods.itcast_goods where dt=‘20190910’ limit 2;
select * from itcast_ods.itcast_order_goods where dt=‘20190910’ limit 2;
select * from itcast_ods.itcast_shops where dt=‘20190910’ limit 2;
select * from itcast_ods.itcast_goods_cats where dt=‘20190910’ limit 2;
select * from itcast_ods.itcast_org where dt=‘20190910’ limit 2;
select * from itcast_ods.itcast_order_refunds where dt=‘20190910’ limit 2;
select * from itcast_ods.itcast_users where dt=‘20190910’ limit 2;
select * from itcast_ods.itcast_user_address where dt=‘20190910’ limit 2;
select * from itcast_ods.itcast_payments where dt=‘20190910’ limit 2;
hive建表語句鏈接:
hive建表語句和kettle操作
https://pan.baidu.com/s/1TPlx2qd0m4vDyT7ATWI8oQ
提取碼: y5tv
複製這段內容後打開百度網盤手機App,操作更方便哦
ods層建表語句業務數據
-- 創建ods層訂單表
drop table if exists `itcast_ods`.`itcast_orders`;
create EXTERNAL table `itcast_ods`.`itcast_orders`(
orderId bigint,
orderNo string,
shopId bigint,
userId bigint,
orderStatus bigint,
goodsMoney double,
deliverType bigint,
deliverMoney double,
totalMoney double,
realTotalMoney double,
payType bigint,
isPay bigint,
areaId bigint,
userAddressId bigint,
areaIdPath string,
userName string,
userAddress string,
userPhone string,
orderScore bigint,
isInvoice bigint,
invoiceClient string,
orderRemarks string,
orderSrc bigint,
needPay double,
payRand bigint,
orderType bigint,
isRefund bigint,
isAppraise bigint,
cancelReason bigint,
rejectReason bigint,
rejectOtherReason string,
isClosed bigint,
goodsSearchKeys string,
orderunique string,
receiveTime string,
deliveryTime string,
tradeNo string,
dataFlag bigint,
createTime string,
settlementId bigint,
commissionFee double,
scoreMoney double,
useScore bigint,
orderCode string,
extraJson string,
orderCodeTargetId bigint,
noticeDeliver bigint,
invoiceJson string,
lockCashMoney double,
payTime string,
isBatch bigint,
totalPayFee bigint
)
partitioned by (dt string)
STORED AS PARQUET;
-- 創建ods層訂單明細表
drop table if exists `itcast_ods`.`itcast_order_goods`;
create EXTERNAL table `itcast_ods`.`itcast_order_goods`(
ogId bigint,
orderId bigint,
goodsId bigint,
goodsNum bigint,
goodsPrice double,
payPrice double,
goodsSpecId bigint,
goodsSpecNames string,
goodsName string,
goodsImg string,
extraJson string,
goodsType bigint,
commissionRate double,
goodsCode string,
promotionJson string,
createtime string
)
partitioned by (dt string)
STORED AS PARQUET;
-- 創建ods層店鋪表
drop table if exists `itcast_ods`.`itcast_shops`;
create EXTERNAL table `itcast_ods`.`itcast_shops`(
shopId bigint,
shopSn string,
userId bigint,
areaIdPath string,
areaId bigint,
isSelf bigint,
shopName string,
shopkeeper string,
telephone string,
shopCompany string,
shopImg string,
shopTel string,
shopQQ string,
shopWangWang string,
shopAddress string,
bankId bigint,
bankNo string,
bankUserName string,
isInvoice bigint,
invoiceRemarks string,
serviceStartTime bigint,
serviceEndTime bigint,
freight bigint,
shopAtive bigint,
shopStatus bigint,
statusDesc string,
dataFlag bigint,
createTime string,
shopMoney double,
lockMoney double,
noSettledOrderNum bigint,
noSettledOrderFee double,
paymentMoney double,
bankAreaId bigint,
bankAreaIdPath string,
applyStatus bigint,
applyDesc string,
applyTime string,
applyStep bigint,
shopNotice string,
rechargeMoney double,
longitude double,
latitude double,
mapLevel bigint,
BDcode string,
modifyTime string
)
partitioned by (dt string)
STORED AS PARQUET;
-- 創建ods層商品表
drop table if exists `itcast_ods`.`itcast_goods`;
create EXTERNAL table `itcast_ods`.`itcast_goods`(
goodsId bigint,
goodsSn string,
productNo string,
goodsName string,
goodsImg string,
shopId bigint,
goodsType bigint,
marketPrice double,
shopPrice double,
warnStock bigint,
goodsStock bigint,
goodsUnit string,
goodsTips string,
isSale bigint,
isBest bigint,
isHot bigint,
isNew bigint,
isRecom bigint,
goodsCatIdPath string,
goodsCatId bigint,
shopCatId1 bigint,
shopCatId2 bigint,
brandId bigint,
goodsDesc string,
goodsStatus bigint,
saleNum bigint,
saleTime string,
visitNum bigint,
appraiseNum bigint,
isSpec bigint,
gallery string,
goodsSeoKeywords string,
illegalRemarks string,
dataFlag bigint,
createTime string,
isFreeShipping bigint,
goodsSerachKeywords string,
modifyTime string
)
partitioned by (dt string)
STORED AS PARQUET;
-- 創建ods層組織機構表
drop table `itcast_ods`.`itcast_org`;
create EXTERNAL table `itcast_ods`.`itcast_org`(
orgId bigint,
parentId bigint,
orgName string,
orgLevel bigint,
managerCode string,
isdelete bigint,
createTime string,
updateTime string,
isShow bigint,
orgType bigint
)
partitioned by (dt string)
STORED AS PARQUET;
-- 創建ods層商品分類表
drop table if exists `itcast_ods`.`itcast_goods_cats`;
create EXTERNAL table `itcast_ods`.`itcast_goods_cats`(
catId bigint,
parentId bigint,
catName string,
isShow bigint,
isFloor bigint,
catSort bigint,
dataFlag bigint,
createTime string,
commissionRate double,
catImg string,
subTitle string,
simpleName string,
seoTitle string,
seoKeywords string,
seoDes string,
catListTheme string,
detailTheme string,
mobileCatListTheme string,
mobileDetailTheme string,
wechatCatListTheme string,
wechatDetailTheme string,
cat_level bigint
)
partitioned by (dt string)
STORED AS PARQUET;
-- 創建ods層用戶表
drop table if exists `itcast_ods`.`itcast_users`;
create EXTERNAL table `itcast_ods`.`itcast_users`(
userId bigint,
loginName string,
loginSecret bigint,
loginPwd string,
userType bigint,
userSex bigint,
userName string,
trueName string,
brithday string,
userPhoto string,
userQQ string,
userPhone string,
userEmail string,
userScore bigint,
userTotalScore bigint,
lastIP string,
lastTime string,
userFrom bigint,
userMoney double,
lockMoney double,
userStatus bigint,
dataFlag bigint,
createTime string,
payPwd string,
rechargeMoney double,
isInform bigint
)
partitioned by (dt string)
STORED AS PARQUET;
-- 創建ods層退貨表
drop table if exists `itcast_ods`.`itcast_order_refunds`;
create EXTERNAL table `itcast_ods`.`itcast_order_refunds`(
id bigint,
orderId bigint,
goodsId bigint,
refundTo bigint,
refundReson bigint,
refundOtherReson string,
backMoney double,
refundTradeNo string,
refundRemark string,
refundTime string,
shopRejectReason string,
refundStatus bigint,
createTime string
)
partitioned by (dt string)
STORED AS PARQUET;
-- 創建ods層地址表
drop table if exists `itcast_ods`.`itcast_user_address`;
create EXTERNAL table `itcast_ods`.`itcast_user_address`(
addressId bigint,
userId bigint,
userName string,
otherName string,
userPhone string,
areaIdPath string,
areaId bigint,
userAddress string,
isDefault bigint,
dataFlag bigint,
createTime string
)
partitioned by (dt string)
STORED AS PARQUET;
-- 創建ods層支付方式表
drop table if exists `itcast_ods`.`itcast_payments`;
create EXTERNAL table `itcast_ods`.`itcast_payments`(
id bigint,
payCode string,
payName string,
payDesc string,
payOrder bigint,
payConfig string,
enabled bigint,
isOnline bigint,
payFor string
)
partitioned by (dt string)
STORED AS PARQUET;