hive 分區
分區表,優化手段之一,從目錄的層面控制搜索數據的範圍。
單分區
創建分區表
//不能有空格,否則報錯
create table t1(
id int
,name string
,hobby array<string>
,add map<String,string>
)
partitioned by (pt_d string)
row format delimited
fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
;
裝載數據
//準備數據 hive_data
1,xiaoming,book-TV-music,henan:zhengzhou-henan:luoyang
2,lilei,book-sport,shanxi:xian-shanxi:yanan
//裝載數據
hive> load data local inpath '/home/hadoop/hive_data' overwrite into table t1 partition(pt_d = "201906");
Loading data to table mydb2.t1 partition (pt_d=201906)
OK
Time taken: 0.627 seconds
hive>
查看數據與分區
hive> select * from t1;
OK
1 yk ["book","TV","code"] {"henan":"zhengzhou","guizhou":"guiyang"} 201906
2 xb ["book","code"] {"henan":"zhengzhou","gansu":"lanzhou"} 201906
Time taken: 0.146 seconds, Fetched: 2 row(s)
hive> show partitions t1;
OK
pt_d=201906
Time taken: 0.104 seconds, Fetched: 1 row(s)
hive>
插入另一個分區
hive> load data local inpath '/home/hadoop/hive_data' overwrite into table t1 partition(pt_d = "201905");
Loading data to table mydb2.t1 partition (pt_d=201905)
OK
Time taken: 0.481 seconds
hive> select * from t1;
OK
1 yk ["book","TV","code"] {"henan":"zhengzhou","guizhou":"guiyang"} 201905
2 xb ["book","code"] {"henan":"zhengzhou","gansu":"lanzhou"} 201905
1 yk ["book","TV","code"] {"henan":"zhengzhou","guizhou":"guiyang"} 201906
2 xb ["book","code"] {"henan":"zhengzhou","gansu":"lanzhou"} 201906
Time taken: 0.138 seconds, Fetched: 4 row(s)
觀察hdfs上的文件
[hadoop@slave108 ~]$ hdfs dfs -lsr /user/hive/warehouse/mydb2.db/t1
lsr: DEPRECATED: Please use 'ls -R' instead.
drwxr-xr-x - hadoop supergroup 0 2019-08-06 22:34 /user/hive/warehouse/mydb2.db/t1/pt_d=201905
-rwxr-xr-x 2 hadoop supergroup 95 2019-08-06 22:34 /user/hive/warehouse/mydb2.db/t1/pt_d=201905/hive_data
drwxr-xr-x - hadoop supergroup 0 2019-08-06 22:32 /user/hive/warehouse/mydb2.db/t1/pt_d=201906
-rwxr-xr-x 2 hadoop supergroup 95 2019-08-06 22:32 /user/hive/warehouse/mydb2.db/t1/pt_d=201906/hive_data
查看相應分區的數據
hive> select * from t1 where pt_d = "201905";
OK
1 yk ["book","TV","code"] {"henan":"zhengzhou","guizhou":"guiyang"} 201905
2 xb ["book","code"] {"henan":"zhengzhou","gansu":"lanzhou"} 201905
Time taken: 0.154 seconds, Fetched: 2 row(s)
hive>
添加分區
hive> alter table t1 add partition (pt_d = "201904");
OK
Time taken: 0.126 seconds
hive> show partitions t1;
OK
pt_d=201904
pt_d=201905
pt_d=201906
Time taken: 0.102 seconds, Fetched: 3 row(s)
hive>
重命名分區
hive> alter table t1 partition (pt_d = "201904") rename to partition (pt_d = "201804");
OK
Time taken: 0.296 seconds
hive> show partitions t1;
OK
pt_d=201804
pt_d=201905
pt_d=201906
Time taken: 0.098 seconds, Fetched: 3 row(s)
hive>
刪除分區
hive> alter table t1 drop if exists partition (pt_d = "201804");
Dropped the partition pt_d=201804
OK
Time taken: 0.176 seconds
hive> show partitions t1;
OK
pt_d=201905
pt_d=201906
Time taken: 0.105 seconds, Fetched: 2 row(s)
hive>
多分區
創建分區表
create table t2(
id int
,name string
,hobby array<string>
,add map<string,string>
)
partitioned by (pt_d string,sex string)
row format delimited
fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
;
裝載數據
//準備數據 hive_data
1,yk,book-TV-code,henan:zhengzhou-guizhou:guiyang
2,xb,book-code,henan:zhengzhou-gansu:lanzhou
//裝載數據
hive> load data local inpath '/home/hadoop/hive_data' overwrite into table t2 partition(pt_d = "201906",sex = "male");
Loading data to table mydb2.t2 partition (pt_d=201906, sex=male)
OK
Time taken: 0.427 seconds
hive> load data local inpath '/home/hadoop/hive_data' overwrite into table t2 partition(pt_d = "201906",sex = "female");
Loading data to table mydb2.t2 partition (pt_d=201906, sex=female)
OK
Time taken: 0.494 seconds
查看數據與分區
hive> select * from t2;
OK
1 yk ["book","TV","code"] {"henan":"zhengzhou","guizhou":"guiyang"} 201906 female
2 xb ["book","code"] {"henan":"zhengzhou","gansu":"lanzhou"} 201906 female
1 yk ["book","TV","code"] {"henan":"zhengzhou","guizhou":"guiyang"} 201906 male
2 xb ["book","code"] {"henan":"zhengzhou","gansu":"lanzhou"} 201906 male
Time taken: 0.113 seconds, Fetched: 4 row(s)
hive> show partitions t2;
OK
pt_d=201906/sex=female
pt_d=201906/sex=male
Time taken: 0.099 seconds, Fetched: 2 row(s)
hive>
觀察hdfs上的數據
[hadoop@slave108 ~]$ hdfs dfs -lsr /user/hive/warehouse/mydb2.db/t2
lsr: DEPRECATED: Please use 'ls -R' instead.
drwxr-xr-x - hadoop supergroup 0 2019-08-06 22:45 /user/hive/warehouse/mydb2.db/t2/pt_d=201906
drwxr-xr-x - hadoop supergroup 0 2019-08-06 22:45 /user/hive/warehouse/mydb2.db/t2/pt_d=201906/sex=female
-rwxr-xr-x 2 hadoop supergroup 95 2019-08-06 22:45 /user/hive/warehouse/mydb2.db/t2/pt_d=201906/sex=female/hive_data
drwxr-xr-x - hadoop supergroup 0 2019-08-06 22:45 /user/hive/warehouse/mydb2.db/t2/pt_d=201906/sex=male
-rwxr-xr-x 2 hadoop supergroup 95 2019-08-06 22:45 /user/hive/warehouse/mydb2.db/t2/pt_d=201906/sex=male/hive_data
外部表分區操作
創建分區外部表
hive> create external table t3(id int) partitioned by (age int);
OK
Time taken: 0.066 seconds
hive>
一次操作添加一個分區
hive> alter table t3 add partition (age = 10);
OK
Time taken: 0.099 seconds
hive> show partitions t3;
OK
age=10
Time taken: 0.085 seconds, Fetched: 1 row(s)
一次操作添加兩個分區(注意語法格式)
//正確
hive> alter table t3 add partition (age = 11) partition(age = 12);
OK
Time taken: 0.391 seconds
hive> show partitions t3;
OK
age=10
age=11
age=12
Time taken: 0.095 seconds, Fetched: 3 row(s)
//錯誤
hive> alter table t3 add partition(age=13,age=14);
OK
Time taken: 0.099 seconds
hive> show partitions t3;
OK
age=10
age=11
age=12
age=13
Time taken: 0.094 seconds, Fetched: 4 row(s)
hive>
刪除分區
//刪除分區後,hdfs上的數據仍然存在(外部表)
alter table t3 drop partition(age=13);