Hive 內外表轉換、表結構複製、動態分區等實操

內部表和外部表的轉換

alter table tablePartition set TBLPROPERTIES ('EXTERNAL'='TRUE');  //內部錶轉外部表
alter table tablePartition set TBLPROPERTIES ('EXTERNAL'='FALSE');  //外部錶轉內部表

複製非分區表表結構和數據

CREATE TABLE bigdata17_new AS SELECT * FROM bigdata17_old;

複製分區表表結構和數據

CREATE TABLE bigdata17_new like bigdata17_old;
insert overwrite table bigdata17_new partition(dt) select * from bigdata17_old;

動態分區開啓

-- 開啓動態分區,默認開啓
set hive.exec.dynamic.partition=true;
-- 嚴格模式要求只少有一個分區字段是指定值的
set hive.exec.dynamic.partition.mode=nonstrict;
-- 一個DML操作可以創建的最大動態分區數,默認1000
set hive.exec.max.dynamic.partitions=100000;
-- 每個mapper和reducer可以創建最大分區數量,默認100
set hive.exec.max.dynamic.partitions.pernode=100000;

實操案例

create table tmp.xx_external_transfer like dwt.dwt_user_visit_pv_incr_1d;
show create table tmp.xx_external_transfer;
CREATE TABLE `tmp.xx_external_transfer`(
  `userinfo_uri` string COMMENT '用戶uri', 
  `web_type` string COMMENT '平臺類型', 
  `start_time` bigint COMMENT '訪問開始時間', 
  `end_time` bigint COMMENT '訪問結束時間', 
  `pv` bigint COMMENT '訪問pv')
PARTITIONED BY ( 
  `dt` string)
ROW FORMAT SERDE 
  'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
STORED AS INPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
OUTPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION
  'hdfs://HDFS41368/usr/hive/warehouse/tmp.db/xx_external_transfer'
TBLPROPERTIES (
  'orc.compress'='SNAPPY', 
  'transient_lastDdlTime'='1576494038');

alter table tmp.xx_external_transfer set TBLPROPERTIES ('EXTERNAL'='TRUE');

show create table tmp.xx_external_transfer;
CREATE EXTERNAL TABLE `tmp.xx_external_transfer`(
  `userinfo_uri` string COMMENT '用戶uri', 
  `web_type` string COMMENT '平臺類型', 
  `start_time` bigint COMMENT '訪問開始時間', 
  `end_time` bigint COMMENT '訪問結束時間', 
  `pv` bigint COMMENT '訪問pv')
PARTITIONED BY ( 
  `dt` string)
ROW FORMAT SERDE 
  'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
STORED AS INPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
OUTPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION
  'hdfs://HDFS41368/usr/hive/warehouse/tmp.db/xx_external_transfer'
TBLPROPERTIES (
  'last_modified_by'='hadoop', 
  'last_modified_time'='1576494088', 
  'orc.compress'='SNAPPY', 
  'transient_lastDdlTime'='1576494088');

set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;


insert overwrite table tmp.xx_external_transfer partition(dt)
select * 
from dwt.dwt_user_visit_pv_incr_1d
where dt>='2019-12-01';

show partitions tmp.xx_external_transfer;

hadoop dfs -ls /usr/hive/warehouse/tmp.db/xx_external_transfer/
hadoop dfs -ls /usr/hive/warehouse/tmp.db/xx_external_transfer/dt=2019-12-13
hadoop dfs -du -h /usr/hive/warehouse/tmp.db/xx_external_transfer/dt=2019-12-13

drop table if exists tmp.xx_external_transfer;
CREATE TABLE `tmp.xx_external_transfer`(
  `userinfo_uri` string COMMENT '用戶uri', 
  `web_type` string COMMENT '平臺類型', 
  `start_time` bigint COMMENT '訪問開始時間', 
  `end_time` bigint COMMENT '訪問結束時間', 
  `pv` bigint COMMENT '訪問pv')
PARTITIONED BY ( 
  `dt` string)
ROW FORMAT SERDE 
  'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
STORED AS INPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
OUTPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION
  'hdfs://HDFS41368/usr/hive/warehouse/tmp.db/xx_external_transfer'
TBLPROPERTIES (
  'orc.compress'='SNAPPY', 
  'transient_lastDdlTime'='1576494038');

alter table tmp.xx_external_transfer add partition(dt='2019-12-01');
alter table tmp.xx_external_transfer add partition(dt='2019-12-02');
alter table tmp.xx_external_transfer add partition(dt='2019-12-03');
alter table tmp.xx_external_transfer add partition(dt='2019-12-04');
alter table tmp.xx_external_transfer add partition(dt='2019-12-05');
alter table tmp.xx_external_transfer add partition(dt='2019-12-06');
alter table tmp.xx_external_transfer add partition(dt='2019-12-07');
alter table tmp.xx_external_transfer add partition(dt='2019-12-08');
alter table tmp.xx_external_transfer add partition(dt='2019-12-09');
alter table tmp.xx_external_transfer add partition(dt='2019-12-10');
alter table tmp.xx_external_transfer add partition(dt='2019-12-11');
alter table tmp.xx_external_transfer add partition(dt='2019-12-12');
alter table tmp.xx_external_transfer add partition(dt='2019-12-13');
alter table tmp.xx_external_transfer add partition(dt='2019-12-14');
alter table tmp.xx_external_transfer add partition(dt='2019-12-15');

select *
from tmp.xx_external_transfer
where dt='2019-12-01'
limit 10;

drop table if exists tmp.xx_external_transfer;

hadoop dfs -ls /usr/hive/warehouse/tmp.db/xx_external_transfer/
hadoop dfs -ls /usr/hive/warehouse/tmp.db/xx_external_transfer/dt=2019-12-13
hadoop dfs -du -h /usr/hive/warehouse/tmp.db/xx_external_transfer/dt=2019-12-13

ls: `/usr/hive/warehouse/tmp.db/xx_external_transfer/': No such file or directory

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章