文章目录
基本操作
建表
show databases;
show tables;
create database mydb;
create table if not exists
mydb.employee(
eid int,
name String,
salary String,
destination String)
COMMENT 'Employee details'
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n' STORED AS TEXTFILE;
描述表,查看schema
DESCRIBE mydb.employee;
从文件导入数据
load data local inpath '/root/text.csv' into table employee;
hive 中执行 shell 命令
!clear; //hive中执行shell命令, 清空屏幕
hive 中执行 hdfs 命令
dfs -ls -R /; //hive中执行hdfs命令
拷贝表
create table mydb.employee2 as select * from myhive.employee; // 复制表
内部表和外部表
内部表
托管表(内部表),hive默认创建的表都是托管表,hive控制其数据的生命周期。删除托管表时,元数据和数据均被删除。
create table if not exists
mydb.employee_inner(
eid int,
name String,
salary String,
destination String)
COMMENT 'Employee details'
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
drop table mydb.employee_inner;
外部表
hive 控制元数据,删除外部表时,数据不被删除
create external table if not exists
myhive.employee_outer(
eid int,
name String,
salary String,
destination String)
COMMENT 'Employee details'
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
分区表
建表
partitioned by
create table if not exists
test2(
eid int,
name String,
salary String,
destination String)
partitioned by(country string,state string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
描述分区表
载入数据到指定分区
load data local inpath '/root/text.csv'
into table mydb.test2
partition(country='china',state='shanxi');
查询数据
select * from mydb.test2 where country='china' and state='shanxi';
查看 hdfs
分区表的查询模式: strict/nostrict
默认是非严格,严格模式要求查询的时候必须带分区
set hive.mapred.mode=strict
查看有哪些分区
show partitions mydb.test2;
show partitions mydb.test2 partition(state='shanxi');
增加分区
注意不能增加不存在的分区列
alter table mydb.test2 add partition(country='china',state='henan');
alter table mydb.test2 add
partition(country='china',state='hubei')
partition(country='china',state='hebei')
partition(country='china',state='shandong');
修改表名
alter table mydb.test2 rename to mydb.test;
移动分区的存储位置
alter table mydb.test partition(country='china',state='hubei')
set location '/user/hive/warehouse/myhive.db/test/country=china/state=hubei1';
验证一下:
load data local inpath '/root/text.csv'
into table mydb.test
partition(country='china',state='shanxi');
dfs -ls /user/hive/warehouse/mydb.db/test/country=china/state=hubei;
dfs -ls /user/hive/warehouse/myhive.db/test/country=china;
复制表
create table if not exists
mydb.test3(
eid int,
name String,
salary String,
destination String)
partitioned by(country string,state string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
insert into mydb.test3
partition(country='china',state='hubei')
select eid,name,salary,destination
from mydb.test
where country='china' and state='henan';