内部表和外部表的转换
alter table tablePartition set TBLPROPERTIES ('EXTERNAL'='TRUE'); //内部表转外部表
alter table tablePartition set TBLPROPERTIES ('EXTERNAL'='FALSE'); //外部表转内部表
复制非分区表表结构和数据
CREATE TABLE bigdata17_new AS SELECT * FROM bigdata17_old;
复制分区表表结构和数据
CREATE TABLE bigdata17_new like bigdata17_old;
insert overwrite table bigdata17_new partition(dt) select * from bigdata17_old;
动态分区开启
-- 开启动态分区,默认开启
set hive.exec.dynamic.partition=true;
-- 严格模式要求只少有一个分区字段是指定值的
set hive.exec.dynamic.partition.mode=nonstrict;
-- 一个DML操作可以创建的最大动态分区数,默认1000
set hive.exec.max.dynamic.partitions=100000;
-- 每个mapper和reducer可以创建最大分区数量,默认100
set hive.exec.max.dynamic.partitions.pernode=100000;
实操案例
create table tmp.xx_external_transfer like dwt.dwt_user_visit_pv_incr_1d;
show create table tmp.xx_external_transfer;
CREATE TABLE `tmp.xx_external_transfer`(
`userinfo_uri` string COMMENT '用户uri',
`web_type` string COMMENT '平台类型',
`start_time` bigint COMMENT '访问开始时间',
`end_time` bigint COMMENT '访问结束时间',
`pv` bigint COMMENT '访问pv')
PARTITIONED BY (
`dt` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION
'hdfs://HDFS41368/usr/hive/warehouse/tmp.db/xx_external_transfer'
TBLPROPERTIES (
'orc.compress'='SNAPPY',
'transient_lastDdlTime'='1576494038');
alter table tmp.xx_external_transfer set TBLPROPERTIES ('EXTERNAL'='TRUE');
show create table tmp.xx_external_transfer;
CREATE EXTERNAL TABLE `tmp.xx_external_transfer`(
`userinfo_uri` string COMMENT '用户uri',
`web_type` string COMMENT '平台类型',
`start_time` bigint COMMENT '访问开始时间',
`end_time` bigint COMMENT '访问结束时间',
`pv` bigint COMMENT '访问pv')
PARTITIONED BY (
`dt` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION
'hdfs://HDFS41368/usr/hive/warehouse/tmp.db/xx_external_transfer'
TBLPROPERTIES (
'last_modified_by'='hadoop',
'last_modified_time'='1576494088',
'orc.compress'='SNAPPY',
'transient_lastDdlTime'='1576494088');
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table tmp.xx_external_transfer partition(dt)
select *
from dwt.dwt_user_visit_pv_incr_1d
where dt>='2019-12-01';
show partitions tmp.xx_external_transfer;
hadoop dfs -ls /usr/hive/warehouse/tmp.db/xx_external_transfer/
hadoop dfs -ls /usr/hive/warehouse/tmp.db/xx_external_transfer/dt=2019-12-13
hadoop dfs -du -h /usr/hive/warehouse/tmp.db/xx_external_transfer/dt=2019-12-13
drop table if exists tmp.xx_external_transfer;
CREATE TABLE `tmp.xx_external_transfer`(
`userinfo_uri` string COMMENT '用户uri',
`web_type` string COMMENT '平台类型',
`start_time` bigint COMMENT '访问开始时间',
`end_time` bigint COMMENT '访问结束时间',
`pv` bigint COMMENT '访问pv')
PARTITIONED BY (
`dt` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION
'hdfs://HDFS41368/usr/hive/warehouse/tmp.db/xx_external_transfer'
TBLPROPERTIES (
'orc.compress'='SNAPPY',
'transient_lastDdlTime'='1576494038');
alter table tmp.xx_external_transfer add partition(dt='2019-12-01');
alter table tmp.xx_external_transfer add partition(dt='2019-12-02');
alter table tmp.xx_external_transfer add partition(dt='2019-12-03');
alter table tmp.xx_external_transfer add partition(dt='2019-12-04');
alter table tmp.xx_external_transfer add partition(dt='2019-12-05');
alter table tmp.xx_external_transfer add partition(dt='2019-12-06');
alter table tmp.xx_external_transfer add partition(dt='2019-12-07');
alter table tmp.xx_external_transfer add partition(dt='2019-12-08');
alter table tmp.xx_external_transfer add partition(dt='2019-12-09');
alter table tmp.xx_external_transfer add partition(dt='2019-12-10');
alter table tmp.xx_external_transfer add partition(dt='2019-12-11');
alter table tmp.xx_external_transfer add partition(dt='2019-12-12');
alter table tmp.xx_external_transfer add partition(dt='2019-12-13');
alter table tmp.xx_external_transfer add partition(dt='2019-12-14');
alter table tmp.xx_external_transfer add partition(dt='2019-12-15');
select *
from tmp.xx_external_transfer
where dt='2019-12-01'
limit 10;
drop table if exists tmp.xx_external_transfer;
hadoop dfs -ls /usr/hive/warehouse/tmp.db/xx_external_transfer/
hadoop dfs -ls /usr/hive/warehouse/tmp.db/xx_external_transfer/dt=2019-12-13
hadoop dfs -du -h /usr/hive/warehouse/tmp.db/xx_external_transfer/dt=2019-12-13
ls: `/usr/hive/warehouse/tmp.db/xx_external_transfer/': No such file or directory