hive - 分區


分區表,優化手段之一,從目錄的層面控制搜索數據的範圍。

單分區

創建分區表

//不能有空格,否則報錯
create table t1(
    id      int
   ,name    string
   ,hobby   array<string>
   ,add     map<String,string>
)
partitioned by (pt_d string)
row format delimited                          
fields terminated by ',' 					  
collection items terminated by '-'             
map keys terminated by ':'					  
;

裝載數據

//準備數據 hive_data
1,xiaoming,book-TV-music,henan:zhengzhou-henan:luoyang
2,lilei,book-sport,shanxi:xian-shanxi:yanan

//裝載數據
hive> load data local inpath '/home/hadoop/hive_data' overwrite into table t1 partition(pt_d = "201906");
Loading data to table mydb2.t1 partition (pt_d=201906)
OK
Time taken: 0.627 seconds
hive> 

查看數據與分區

hive> select * from t1;
OK
1	yk	["book","TV","code"]	{"henan":"zhengzhou","guizhou":"guiyang"}	201906
2	xb	["book","code"]	{"henan":"zhengzhou","gansu":"lanzhou"}	201906
Time taken: 0.146 seconds, Fetched: 2 row(s)
hive> show partitions t1;
OK
pt_d=201906
Time taken: 0.104 seconds, Fetched: 1 row(s)
hive> 

插入另一個分區

hive> load data local inpath '/home/hadoop/hive_data' overwrite into table t1 partition(pt_d = "201905");
Loading data to table mydb2.t1 partition (pt_d=201905)
OK
Time taken: 0.481 seconds
hive> select * from t1;
OK
1	yk	["book","TV","code"]	{"henan":"zhengzhou","guizhou":"guiyang"}	201905
2	xb	["book","code"]	{"henan":"zhengzhou","gansu":"lanzhou"}	201905
1	yk	["book","TV","code"]	{"henan":"zhengzhou","guizhou":"guiyang"}	201906
2	xb	["book","code"]	{"henan":"zhengzhou","gansu":"lanzhou"}	201906
Time taken: 0.138 seconds, Fetched: 4 row(s)

觀察hdfs上的文件

[hadoop@slave108 ~]$ hdfs dfs -lsr /user/hive/warehouse/mydb2.db/t1
lsr: DEPRECATED: Please use 'ls -R' instead.
drwxr-xr-x   - hadoop supergroup          0 2019-08-06 22:34 /user/hive/warehouse/mydb2.db/t1/pt_d=201905
-rwxr-xr-x   2 hadoop supergroup         95 2019-08-06 22:34 /user/hive/warehouse/mydb2.db/t1/pt_d=201905/hive_data
drwxr-xr-x   - hadoop supergroup          0 2019-08-06 22:32 /user/hive/warehouse/mydb2.db/t1/pt_d=201906
-rwxr-xr-x   2 hadoop supergroup         95 2019-08-06 22:32 /user/hive/warehouse/mydb2.db/t1/pt_d=201906/hive_data

查看相應分區的數據

hive> select * from t1 where pt_d = "201905";
OK
1	yk	["book","TV","code"]	{"henan":"zhengzhou","guizhou":"guiyang"}	201905
2	xb	["book","code"]	{"henan":"zhengzhou","gansu":"lanzhou"}	201905
Time taken: 0.154 seconds, Fetched: 2 row(s)
hive> 

添加分區

hive> alter table t1 add partition (pt_d = "201904");
OK
Time taken: 0.126 seconds
hive> show partitions t1;
OK
pt_d=201904
pt_d=201905
pt_d=201906
Time taken: 0.102 seconds, Fetched: 3 row(s)
hive>

重命名分區

hive> alter table t1 partition (pt_d = "201904") rename to partition (pt_d = "201804");
OK
Time taken: 0.296 seconds
hive> show partitions t1;
OK
pt_d=201804
pt_d=201905
pt_d=201906
Time taken: 0.098 seconds, Fetched: 3 row(s)
hive>

刪除分區

hive> alter table t1 drop if exists partition (pt_d = "201804");
Dropped the partition pt_d=201804
OK
Time taken: 0.176 seconds
hive> show partitions t1;
OK
pt_d=201905
pt_d=201906
Time taken: 0.105 seconds, Fetched: 2 row(s)
hive>

多分區

創建分區表

create table t2(
    id      int
   ,name    string
   ,hobby   array<string>
   ,add     map<string,string>
)
partitioned by (pt_d string,sex string)
row format delimited
fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
;

裝載數據

//準備數據 hive_data
1,yk,book-TV-code,henan:zhengzhou-guizhou:guiyang
2,xb,book-code,henan:zhengzhou-gansu:lanzhou

//裝載數據
hive> load data local inpath '/home/hadoop/hive_data' overwrite into table t2 partition(pt_d = "201906",sex = "male");
Loading data to table mydb2.t2 partition (pt_d=201906, sex=male)
OK
Time taken: 0.427 seconds
hive> load data local inpath '/home/hadoop/hive_data' overwrite into table t2 partition(pt_d = "201906",sex = "female");
Loading data to table mydb2.t2 partition (pt_d=201906, sex=female)
OK
Time taken: 0.494 seconds

查看數據與分區

hive> select * from t2;
OK
1	yk	["book","TV","code"]	{"henan":"zhengzhou","guizhou":"guiyang"}	201906	female
2	xb	["book","code"]	{"henan":"zhengzhou","gansu":"lanzhou"}	201906	female
1	yk	["book","TV","code"]	{"henan":"zhengzhou","guizhou":"guiyang"}	201906	male
2	xb	["book","code"]	{"henan":"zhengzhou","gansu":"lanzhou"}	201906	male
Time taken: 0.113 seconds, Fetched: 4 row(s)
hive> show partitions t2;
OK
pt_d=201906/sex=female
pt_d=201906/sex=male
Time taken: 0.099 seconds, Fetched: 2 row(s)
hive>

觀察hdfs上的數據

[hadoop@slave108 ~]$ hdfs dfs -lsr /user/hive/warehouse/mydb2.db/t2
lsr: DEPRECATED: Please use 'ls -R' instead.
drwxr-xr-x   - hadoop supergroup          0 2019-08-06 22:45 /user/hive/warehouse/mydb2.db/t2/pt_d=201906
drwxr-xr-x   - hadoop supergroup          0 2019-08-06 22:45 /user/hive/warehouse/mydb2.db/t2/pt_d=201906/sex=female
-rwxr-xr-x   2 hadoop supergroup         95 2019-08-06 22:45 /user/hive/warehouse/mydb2.db/t2/pt_d=201906/sex=female/hive_data
drwxr-xr-x   - hadoop supergroup          0 2019-08-06 22:45 /user/hive/warehouse/mydb2.db/t2/pt_d=201906/sex=male
-rwxr-xr-x   2 hadoop supergroup         95 2019-08-06 22:45 /user/hive/warehouse/mydb2.db/t2/pt_d=201906/sex=male/hive_data

外部表分區操作

創建分區外部表

hive> create external table t3(id int) partitioned by (age int);
OK
Time taken: 0.066 seconds
hive> 

一次操作添加一個分區

hive> alter table t3 add partition (age = 10);
OK
Time taken: 0.099 seconds
hive> show partitions t3;
OK
age=10
Time taken: 0.085 seconds, Fetched: 1 row(s)

一次操作添加兩個分區(注意語法格式)

//正確
hive> alter table t3 add partition (age = 11) partition(age = 12);
OK
Time taken: 0.391 seconds
hive> show partitions t3;
OK
age=10
age=11
age=12
Time taken: 0.095 seconds, Fetched: 3 row(s)


//錯誤
hive> alter table t3 add partition(age=13,age=14);
OK
Time taken: 0.099 seconds
hive> show partitions t3;
OK
age=10
age=11
age=12
age=13
Time taken: 0.094 seconds, Fetched: 4 row(s)
hive>

刪除分區

//刪除分區後,hdfs上的數據仍然存在(外部表)
 alter table t3 drop partition(age=13);
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章