先準備一個基礎表student
select * from student;
id name age sex
-----------------------------
1 zhangsan 19 boy
2 lisi 18 girl
3 wangwu 17 boy
4 lucy 20 girl
6 lilei 20 boy
靜態分區
--創建靜態分區表
create table student_p_1 (
id int,
name string
)
partitioned by(
sex string,
age int
)
row format delimited
fields terminated by ',';
--導入數據到靜態分區表
insert overwrite table student_p_1 partition(sex='boy',age=17)
select id, name from student where sex='boy' and age = 17;
insert overwrite table student_p_1 partition(sex='boy',age=18)
select id, name from student where sex='boy' and age = 18;
insert overwrite table student_p_1 partition(sex='boy',age=19)
select id, name from student where sex='boy' and age = 19;
insert overwrite table student_p_1 partition(sex='boy',age=20)
select id, name from student where sex='boy' and age = 20;
insert overwrite table student_p_1 partition(sex='girl',age=17)
select id, name from student where sex='girl' and age = 17;
insert overwrite table student_p_1 partition(sex='girl',age=18)
select id, name from student where sex='girl' and age = 18;
insert overwrite table student_p_1 partition(sex='girl',age=19)
select id, name from student where sex='girl' and age = 19;
insert overwrite table student_p_1 partition(sex='girl',age=20)
select id, name from student where sex='girl' and age = 20;
#在看一下hdfs中的文件
-> hadoop dfs -lsr /user/hive/warehouse/student_p_1
/user/hive/warehouse/student_p_1/sex=boy
/user/hive/warehouse/student_p_1/sex=boy/age=17
/user/hive/warehouse/student_p_1/sex=boy/age=17/000000_0
/user/hive/warehouse/student_p_1/sex=boy/age=18
/user/hive/warehouse/student_p_1/sex=boy/age=18/000000_0
/user/hive/warehouse/student_p_1/sex=boy/age=19
/user/hive/warehouse/student_p_1/sex=boy/age=19/000000_0
/user/hive/warehouse/student_p_1/sex=boy/age=20
/user/hive/warehouse/student_p_1/sex=boy/age=20/000000_0
/user/hive/warehouse/student_p_1/sex=girl
/user/hive/warehouse/student_p_1/sex=girl/age=17
/user/hive/warehouse/student_p_1/sex=girl/age=17/000000_0
/user/hive/warehouse/student_p_1/sex=girl/age=18
/user/hive/warehouse/student_p_1/sex=girl/age=18/000000_0
/user/hive/warehouse/student_p_1/sex=girl/age=19
/user/hive/warehouse/student_p_1/sex=girl/age=19/000000_0
/user/hive/warehouse/student_p_1/sex=girl/age=20
/user/hive/warehouse/student_p_1/sex=girl/age=20/000000_0
#查看student_p_1的分區
hive > show partitions student_p_1;
sex=boy/age=17
sex=boy/age=18
sex=boy/age=19
sex=boy/age=20
sex=girl/age=17
sex=girl/age=18
sex=girl/age=19
sex=girl/age=20
可以看到靜態表每次導入數據的時候都要手動爲分區字段賦值。
動態分區
--創建動態分區表student_p_2(建表語句和靜態分區無區別)
create table student_p_2 (
id int,
name string
)
partitioned by(
sex string,
age int
)
row format delimited
fields terminated by ',';
#動態分區相關參數設置
#開始動態分區(true:打開動態分區 false:關閉,默認true)
hive > set hive.exec.dynamic.partition=true;
#設置動態分區模式(nostrick:分區字段可以全部爲動態,strick:分區字段中必須要至少要有一個靜態字段,默認strick)
hive > set hive.exec.dynamic.partition.mode=nostrick;
--導入數據
insert overwrite table student_p_2 partition(sex,age) select id,name,sex,age from student;
--查看數據
select * from student_p_1;
3 wangwu boy 17
1 zhangsan boy 19
6 lilei boy 20
2 lisi girl 18
4 lucy girl 20
查看stuent_p_2分區
hive > show partitions student_p_2;
OK
sex=boy/age=17
sex=boy/age=19
sex=boy/age=20
sex=girl/age=18
sex=girl/age=20
#hsfs 中的目錄
./hadoop dfs -lsr /user/hive/warehouse/student_p_2
/user/hive/warehouse/student_p_4/sex=boy
/user/hive/warehouse/student_p_4/sex=boy/age=17
/user/hive/warehouse/student_p_4/sex=boy/age=17/000000_0
/user/hive/warehouse/student_p_4/sex=boy/age=19
/user/hive/warehouse/student_p_4/sex=boy/age=19/000000_0
/user/hive/warehouse/student_p_4/sex=boy/age=20
/user/hive/warehouse/student_p_4/sex=boy/age=20/000000_0
/user/hive/warehouse/student_p_4/sex=girl
/user/hive/warehouse/student_p_4/sex=girl/age=18
/user/hive/warehouse/student_p_4/sex=girl/age=18/000000_0
/user/hive/warehouse/student_p_4/sex=girl/age=20
/user/hive/warehouse/student_p_4/sex=girl/age=20/000000_0
動態分區在導入數據的時候不用指定分區字段,hive會自動映射到分區
優缺點
動態分區會自動映射,性能可以會差點。
靜態分區需要手動指定,性能會好點。
動態分區其他參數
- hive.exec.max.dynamic.partitions.pernode=100 (默認100)表示每個maper或reducer可以允許創建的最大動態分區個數,超出則會報錯。
- hive.exec.max.dynamic.partitions =1000(默認值) 表示一個動態分區語句可以創建的最大動態分區個數,超出報錯
- hive.exec.max.created.files =10000(默認) 全局可以創建的最大文件個數,超出報錯。