建表
create table mydb.userinfo(name string,addressi string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
創建分區表
CREATE TABLE mydb.userinfo --創建表
(col1 string, col2 date, col3 double),
partitioned by (datekey date), --可以多個字段的組合分區
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
Stored AS TEXTFILE;
數據導入到表mydb.userinfo中
load data local inpath "/home/dahaizi/data/userinfo.txt"
overwrite into table mydb.userinfo;
向表中插入數據
insert into table(col1,col2,col3) values('a','b','c')
將查詢的數據插入到已有的表中
INSERT INTO TABLE table_Name
PARTITION (DateKey),
SELECT col1,col2,col3,DateKey FROM otherTable
WHERE DATEKEY IN ('2017-02-26','2013-06-12','2013-09-24'),
GROUP BY col1,col2,col3,DateKey
DISTRIBUTE BY DateKey
將查詢的數據存儲的hdfs目錄中
insert overwrite directory '/jc_bdcqs/qsy'
row format delimited
fields terminated by ','
select * from zqs_gs_g60_0730_list;
!quit
HQL查詢常用設置項
1)設置計算容錯率(防止因計算過程出錯而異常退出程序):set mapred.max.map.failures.percent=100;
2)限制查詢輸出文件的個數set mapred.reduce.tasks=1;
3) 控制最大reduce的數量,不會影響mapred.reduce.tasks的設置set hive.exec.reducers.max = 100;
4) 一個job會有多少個reducer來處理,默認爲1Gset hive.exec.reducers.bytes.per.reducer = 1000000000;
設置動態分區
set hive.exec.dynamic.partition=true;(可通過這個語句查看:set hive.exec.dynamic.partition;),
set hive.exec.dynamic.partition.mode=nonstrict;
SET hive.exec.max.dynamic.partitions=100000;(如果自動分區數大於這個參數,將會報錯),
SET hive.exec.max.dynamic.partitions.pernode=100000;
刪除表
drop table tb_name;
或清空表truncate table table_name;
刪除分區
ALTER TABLE table_Name DROP PARTITION (Datekey='20190606');
新增分區
alter table tb_name add partition (Datekey = ‘20190606’);