新增字段&跑歷史分區
1、現象
hive(筆者使用的hive版本爲1.1.0)表新增加列,重跑歷史分區數據,發現新增的列在老分區中取值都爲NULL。如下示例
1)初始化表
//init table
drop table if exists tmp.test;
create table tmp.test
(
id string
)
partitioned by (dt string);
insert overwrite table tmp.test
partition(dt='20190509')
select
'0001';
// show
select
*
from
tmp.test
where
dt='20190509';
+-------+-----------
| id | dt |
+-------+-----------+
| 0001 | 20190509 |
+-------+-----------+
1 row selected (0.234 seconds)
2)添加字段重寫數據新增字段爲空
//add columns
alter table tmp.test add columns(name string);
//insert data
insert overwrite table tmp.test
partition(dt='20190509')
select
'0001' as id,
'光頭強' as name;
//show
select
*
from
tmp.test
where
dt='20190509';
+-------+-------+-----------+
| id | name | dt |
+-------+-------+-----------+
| 0001 | NULL | 20190509 |
+-------+-------+-----------+
1 row selected (0.956 seconds)
2、解決辦法
添加字段後,重跑已有分區前,先刪除已有分區
//add columns
alter table tmp.test add columns(name string);
//drop old partition
alter table tmp.test drop if exists partition(dt='20190509');
//insert data
insert overwrite table tmp.test
partition(dt='20190509')
select
'0001' as id,
'光頭強' as name;
//show
select
*
from
tmp.test
where
dt='20190509';
+-------+-------+-----------+
| id | name | dt |
+-------+-------+-----------+
| 0001 | 光頭強 | 20190509 |
+-------+-------+-----------+
1 row selected (0.184 seconds)