Hive 内外表转换、表结构复制、动态分区等实操

内部表和外部表的转换

alter table tablePartition set TBLPROPERTIES ('EXTERNAL'='TRUE');  //内部表转外部表
alter table tablePartition set TBLPROPERTIES ('EXTERNAL'='FALSE');  //外部表转内部表

复制非分区表表结构和数据

CREATE TABLE bigdata17_new AS SELECT * FROM bigdata17_old;

复制分区表表结构和数据

CREATE TABLE bigdata17_new like bigdata17_old;
insert overwrite table bigdata17_new partition(dt) select * from bigdata17_old;

动态分区开启

-- 开启动态分区,默认开启
set hive.exec.dynamic.partition=true;
-- 严格模式要求只少有一个分区字段是指定值的
set hive.exec.dynamic.partition.mode=nonstrict;
-- 一个DML操作可以创建的最大动态分区数,默认1000
set hive.exec.max.dynamic.partitions=100000;
-- 每个mapper和reducer可以创建最大分区数量,默认100
set hive.exec.max.dynamic.partitions.pernode=100000;

实操案例

create table tmp.xx_external_transfer like dwt.dwt_user_visit_pv_incr_1d;
show create table tmp.xx_external_transfer;
CREATE TABLE `tmp.xx_external_transfer`(
  `userinfo_uri` string COMMENT '用户uri', 
  `web_type` string COMMENT '平台类型', 
  `start_time` bigint COMMENT '访问开始时间', 
  `end_time` bigint COMMENT '访问结束时间', 
  `pv` bigint COMMENT '访问pv')
PARTITIONED BY ( 
  `dt` string)
ROW FORMAT SERDE 
  'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
STORED AS INPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
OUTPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION
  'hdfs://HDFS41368/usr/hive/warehouse/tmp.db/xx_external_transfer'
TBLPROPERTIES (
  'orc.compress'='SNAPPY', 
  'transient_lastDdlTime'='1576494038');

alter table tmp.xx_external_transfer set TBLPROPERTIES ('EXTERNAL'='TRUE');

show create table tmp.xx_external_transfer;
CREATE EXTERNAL TABLE `tmp.xx_external_transfer`(
  `userinfo_uri` string COMMENT '用户uri', 
  `web_type` string COMMENT '平台类型', 
  `start_time` bigint COMMENT '访问开始时间', 
  `end_time` bigint COMMENT '访问结束时间', 
  `pv` bigint COMMENT '访问pv')
PARTITIONED BY ( 
  `dt` string)
ROW FORMAT SERDE 
  'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
STORED AS INPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
OUTPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION
  'hdfs://HDFS41368/usr/hive/warehouse/tmp.db/xx_external_transfer'
TBLPROPERTIES (
  'last_modified_by'='hadoop', 
  'last_modified_time'='1576494088', 
  'orc.compress'='SNAPPY', 
  'transient_lastDdlTime'='1576494088');

set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;


insert overwrite table tmp.xx_external_transfer partition(dt)
select * 
from dwt.dwt_user_visit_pv_incr_1d
where dt>='2019-12-01';

show partitions tmp.xx_external_transfer;

hadoop dfs -ls /usr/hive/warehouse/tmp.db/xx_external_transfer/
hadoop dfs -ls /usr/hive/warehouse/tmp.db/xx_external_transfer/dt=2019-12-13
hadoop dfs -du -h /usr/hive/warehouse/tmp.db/xx_external_transfer/dt=2019-12-13

drop table if exists tmp.xx_external_transfer;
CREATE TABLE `tmp.xx_external_transfer`(
  `userinfo_uri` string COMMENT '用户uri', 
  `web_type` string COMMENT '平台类型', 
  `start_time` bigint COMMENT '访问开始时间', 
  `end_time` bigint COMMENT '访问结束时间', 
  `pv` bigint COMMENT '访问pv')
PARTITIONED BY ( 
  `dt` string)
ROW FORMAT SERDE 
  'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
STORED AS INPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
OUTPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION
  'hdfs://HDFS41368/usr/hive/warehouse/tmp.db/xx_external_transfer'
TBLPROPERTIES (
  'orc.compress'='SNAPPY', 
  'transient_lastDdlTime'='1576494038');

alter table tmp.xx_external_transfer add partition(dt='2019-12-01');
alter table tmp.xx_external_transfer add partition(dt='2019-12-02');
alter table tmp.xx_external_transfer add partition(dt='2019-12-03');
alter table tmp.xx_external_transfer add partition(dt='2019-12-04');
alter table tmp.xx_external_transfer add partition(dt='2019-12-05');
alter table tmp.xx_external_transfer add partition(dt='2019-12-06');
alter table tmp.xx_external_transfer add partition(dt='2019-12-07');
alter table tmp.xx_external_transfer add partition(dt='2019-12-08');
alter table tmp.xx_external_transfer add partition(dt='2019-12-09');
alter table tmp.xx_external_transfer add partition(dt='2019-12-10');
alter table tmp.xx_external_transfer add partition(dt='2019-12-11');
alter table tmp.xx_external_transfer add partition(dt='2019-12-12');
alter table tmp.xx_external_transfer add partition(dt='2019-12-13');
alter table tmp.xx_external_transfer add partition(dt='2019-12-14');
alter table tmp.xx_external_transfer add partition(dt='2019-12-15');

select *
from tmp.xx_external_transfer
where dt='2019-12-01'
limit 10;

drop table if exists tmp.xx_external_transfer;

hadoop dfs -ls /usr/hive/warehouse/tmp.db/xx_external_transfer/
hadoop dfs -ls /usr/hive/warehouse/tmp.db/xx_external_transfer/dt=2019-12-13
hadoop dfs -du -h /usr/hive/warehouse/tmp.db/xx_external_transfer/dt=2019-12-13

ls: `/usr/hive/warehouse/tmp.db/xx_external_transfer/': No such file or directory

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章