Hive查詢實例

數據查詢:
SELECT [ALL | DISTINCT] select_expr, select_expr, ...
  FROM table_reference
  [WHERE where_condition]
  [GROUP BY col_list]
  [ORDER BY col_list]
  [CLUSTER BY col_list
    | [DISTRIBUTE BY col_list] [SORT BY col_list]
  ]
 [LIMIT [offset,] rows]

全表查詢:
select * from emp;

指定字段查詢:
select t.empno,t.ename,t.job from emp t;

限制查詢
select * from emp limit 5;

= >= <= between and

select t.empno,t.ename,t.job from emp t where t.sal between 800 and 1500;

is null/is not null /in /not in
select t.empno,t.ename,t.job from emp t where t.comm is null;

max/min/count/sum/avg
select count(*) cnt from emp;
select max(sal) max_sal from emp;
select sum(sal) sum_sal from emp;
select avg(sal) avg_sal from emp;


group by /having

每個部門的平均工資
select t.deptno,avg(sal) avg_sal from emp t group by t.deptno;

每個部門中最高崗位的薪水
select t.deptno,t.job,max(t.sal) max_sal from emp t group by t.deptno,t.job;

having

where是針對單條記錄進行篩選過濾的
having是對分組結果進行篩選過濾的

求每個部門的平均薪水大於2000
select t.deptno,avg(t.sal) avg_sal from emp t group by t.deptno having avg_sal > 2000;

join
兩個表進行連接,m表中的記錄和n表的中記錄組成一條記錄

等值join:join ... on ...
select e.empno,e.ename,e.deptno from emp e join dept d on e.deptno = d.deptno ;

右連接:right join(以右表爲準)
select e.empno,e.ename,e.deptno from emp e right join dept d on e.deptno = d.deptno ;



左連接:left join(以左表爲準)
select e.empno,e.ename,e.deptno from emp e left join dept d on e.deptno = d.deptno ;

全連接
select e.empno,e.ename,e.deptno from emp e full join dept d on e.deptno = d.deptno ;

order by:對全局數據進行排序,僅僅只有一個reduce
select * from emp order by empno desc;

sort by:對每一個reduce內部數據進行排序,對全局的結果集來說沒有排序
設置mapreduce個數:
set mapreduce.job.reduces= 3;
查看設置:
set mapreduce.job.reduces
insert overwrite local directory '/home/wql/app/hData/sortby-res' select * from emp sort by empno asc;

distribute by:類似於MapReduce中分區partition,對數據進行分區,結合sort by進行使用
insert overwrite local directory '/home/wql/app/hData/distby-res' select * from emp distribute by deptno sort by empno asc;

cluster by當distribute by和sort by字段相同時,就可以使用cluster by;
insert overwrite local directory '/home/wql/app/hData/clusby-res' select * from emp cluster by empno;


查看所有函數:
show functions;
desc function split;
desc function extended  split;

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章