hive - 分区


分区表,优化手段之一,从目录的层面控制搜索数据的范围。

单分区

创建分区表

//不能有空格,否则报错
create table t1(
    id      int
   ,name    string
   ,hobby   array<string>
   ,add     map<String,string>
)
partitioned by (pt_d string)
row format delimited                          
fields terminated by ',' 					  
collection items terminated by '-'             
map keys terminated by ':'					  
;

装载数据

//准备数据 hive_data
1,xiaoming,book-TV-music,henan:zhengzhou-henan:luoyang
2,lilei,book-sport,shanxi:xian-shanxi:yanan

//装载数据
hive> load data local inpath '/home/hadoop/hive_data' overwrite into table t1 partition(pt_d = "201906");
Loading data to table mydb2.t1 partition (pt_d=201906)
OK
Time taken: 0.627 seconds
hive> 

查看数据与分区

hive> select * from t1;
OK
1	yk	["book","TV","code"]	{"henan":"zhengzhou","guizhou":"guiyang"}	201906
2	xb	["book","code"]	{"henan":"zhengzhou","gansu":"lanzhou"}	201906
Time taken: 0.146 seconds, Fetched: 2 row(s)
hive> show partitions t1;
OK
pt_d=201906
Time taken: 0.104 seconds, Fetched: 1 row(s)
hive> 

插入另一个分区

hive> load data local inpath '/home/hadoop/hive_data' overwrite into table t1 partition(pt_d = "201905");
Loading data to table mydb2.t1 partition (pt_d=201905)
OK
Time taken: 0.481 seconds
hive> select * from t1;
OK
1	yk	["book","TV","code"]	{"henan":"zhengzhou","guizhou":"guiyang"}	201905
2	xb	["book","code"]	{"henan":"zhengzhou","gansu":"lanzhou"}	201905
1	yk	["book","TV","code"]	{"henan":"zhengzhou","guizhou":"guiyang"}	201906
2	xb	["book","code"]	{"henan":"zhengzhou","gansu":"lanzhou"}	201906
Time taken: 0.138 seconds, Fetched: 4 row(s)

观察hdfs上的文件

[hadoop@slave108 ~]$ hdfs dfs -lsr /user/hive/warehouse/mydb2.db/t1
lsr: DEPRECATED: Please use 'ls -R' instead.
drwxr-xr-x   - hadoop supergroup          0 2019-08-06 22:34 /user/hive/warehouse/mydb2.db/t1/pt_d=201905
-rwxr-xr-x   2 hadoop supergroup         95 2019-08-06 22:34 /user/hive/warehouse/mydb2.db/t1/pt_d=201905/hive_data
drwxr-xr-x   - hadoop supergroup          0 2019-08-06 22:32 /user/hive/warehouse/mydb2.db/t1/pt_d=201906
-rwxr-xr-x   2 hadoop supergroup         95 2019-08-06 22:32 /user/hive/warehouse/mydb2.db/t1/pt_d=201906/hive_data

查看相应分区的数据

hive> select * from t1 where pt_d = "201905";
OK
1	yk	["book","TV","code"]	{"henan":"zhengzhou","guizhou":"guiyang"}	201905
2	xb	["book","code"]	{"henan":"zhengzhou","gansu":"lanzhou"}	201905
Time taken: 0.154 seconds, Fetched: 2 row(s)
hive> 

添加分区

hive> alter table t1 add partition (pt_d = "201904");
OK
Time taken: 0.126 seconds
hive> show partitions t1;
OK
pt_d=201904
pt_d=201905
pt_d=201906
Time taken: 0.102 seconds, Fetched: 3 row(s)
hive>

重命名分区

hive> alter table t1 partition (pt_d = "201904") rename to partition (pt_d = "201804");
OK
Time taken: 0.296 seconds
hive> show partitions t1;
OK
pt_d=201804
pt_d=201905
pt_d=201906
Time taken: 0.098 seconds, Fetched: 3 row(s)
hive>

删除分区

hive> alter table t1 drop if exists partition (pt_d = "201804");
Dropped the partition pt_d=201804
OK
Time taken: 0.176 seconds
hive> show partitions t1;
OK
pt_d=201905
pt_d=201906
Time taken: 0.105 seconds, Fetched: 2 row(s)
hive>

多分区

创建分区表

create table t2(
    id      int
   ,name    string
   ,hobby   array<string>
   ,add     map<string,string>
)
partitioned by (pt_d string,sex string)
row format delimited
fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
;

装载数据

//准备数据 hive_data
1,yk,book-TV-code,henan:zhengzhou-guizhou:guiyang
2,xb,book-code,henan:zhengzhou-gansu:lanzhou

//装载数据
hive> load data local inpath '/home/hadoop/hive_data' overwrite into table t2 partition(pt_d = "201906",sex = "male");
Loading data to table mydb2.t2 partition (pt_d=201906, sex=male)
OK
Time taken: 0.427 seconds
hive> load data local inpath '/home/hadoop/hive_data' overwrite into table t2 partition(pt_d = "201906",sex = "female");
Loading data to table mydb2.t2 partition (pt_d=201906, sex=female)
OK
Time taken: 0.494 seconds

查看数据与分区

hive> select * from t2;
OK
1	yk	["book","TV","code"]	{"henan":"zhengzhou","guizhou":"guiyang"}	201906	female
2	xb	["book","code"]	{"henan":"zhengzhou","gansu":"lanzhou"}	201906	female
1	yk	["book","TV","code"]	{"henan":"zhengzhou","guizhou":"guiyang"}	201906	male
2	xb	["book","code"]	{"henan":"zhengzhou","gansu":"lanzhou"}	201906	male
Time taken: 0.113 seconds, Fetched: 4 row(s)
hive> show partitions t2;
OK
pt_d=201906/sex=female
pt_d=201906/sex=male
Time taken: 0.099 seconds, Fetched: 2 row(s)
hive>

观察hdfs上的数据

[hadoop@slave108 ~]$ hdfs dfs -lsr /user/hive/warehouse/mydb2.db/t2
lsr: DEPRECATED: Please use 'ls -R' instead.
drwxr-xr-x   - hadoop supergroup          0 2019-08-06 22:45 /user/hive/warehouse/mydb2.db/t2/pt_d=201906
drwxr-xr-x   - hadoop supergroup          0 2019-08-06 22:45 /user/hive/warehouse/mydb2.db/t2/pt_d=201906/sex=female
-rwxr-xr-x   2 hadoop supergroup         95 2019-08-06 22:45 /user/hive/warehouse/mydb2.db/t2/pt_d=201906/sex=female/hive_data
drwxr-xr-x   - hadoop supergroup          0 2019-08-06 22:45 /user/hive/warehouse/mydb2.db/t2/pt_d=201906/sex=male
-rwxr-xr-x   2 hadoop supergroup         95 2019-08-06 22:45 /user/hive/warehouse/mydb2.db/t2/pt_d=201906/sex=male/hive_data

外部表分区操作

创建分区外部表

hive> create external table t3(id int) partitioned by (age int);
OK
Time taken: 0.066 seconds
hive> 

一次操作添加一个分区

hive> alter table t3 add partition (age = 10);
OK
Time taken: 0.099 seconds
hive> show partitions t3;
OK
age=10
Time taken: 0.085 seconds, Fetched: 1 row(s)

一次操作添加两个分区(注意语法格式)

//正确
hive> alter table t3 add partition (age = 11) partition(age = 12);
OK
Time taken: 0.391 seconds
hive> show partitions t3;
OK
age=10
age=11
age=12
Time taken: 0.095 seconds, Fetched: 3 row(s)


//错误
hive> alter table t3 add partition(age=13,age=14);
OK
Time taken: 0.099 seconds
hive> show partitions t3;
OK
age=10
age=11
age=12
age=13
Time taken: 0.094 seconds, Fetched: 4 row(s)
hive>

删除分区

//删除分区后,hdfs上的数据仍然存在(外部表)
 alter table t3 drop partition(age=13);
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章