hive之練習題一

練習1

1.求單月訪問次數和總訪問次數

表字段:用戶名,月份,訪問次數
現要求出:
每個用戶截止到每月爲止的最大單月訪問次數和累計到該月的總訪問次數,結果數據格式如下
輸出:用戶、月份、最大訪問次數、總訪問次數、當月訪問次數

CREATE EXTERNAL TABLE IF NOT EXISTS t_access(
uname STRING COMMENT '用戶名',
umonth STRING COMMENT '月份',
ucount int COMMENT '訪問次數')
COMMENT '用戶訪問表'
ROW FORMAT DELIMITED FIELDS TERMINATED BY ","
stored as textfile
location "/hive/warehouse/taodandan.db/t_access"

insert into t_access values
('A','2015-01','5'),
('A','2015-01','15'),
('B','2015-01','5'),
('A','2015-01','8'),
('B','2015-01','25'),
('A','2015-01','5'),
('A','2015-02','4'),
('A','2015-02','6'),
('B','2015-02','10'),
('B','2015-02','5'),
('A','2015-03','16'),
('A','2015-03','22'),
('B','2015-03','23'),
('B','2015-03','10'),
('B','2015-03','11')

先按照用戶、月份分組,求出每個用戶每個月的總訪問次數作爲一個臨時表
再對用戶進行分區,對月份進行升序排序

SELECT TT.uname,TT.umonth,
max(TT.c3) over(PARTITION BY TT.uname order by umonth) as c1,
sum(TT.c3) OVER(PARTITION BY TT.uname ORDER BY umonth) as c2,
TT.c3
from
(SELECT uname , umonth ,
sum(ucount) as c3
FROM t_access
group by uname,umonth
)as TT

在這裏插入圖片描述

2.學生課程成績

字段:序號、學號、課程、分數
所有數學課程成績 大於 語文課程成績的學生的學號
輸出:
學號 數學成績 語文成績


CREATE TABLE IF NOT EXISTS courses_scores
(id INT,
sid int,
course STRING,
score INT);

INSERT INTO courses_scores VALUES 
(1, 1, 'yuwen', 43),
(2, 1, 'shuxue', 55),
(3, 2, 'yuwen', 77),
(4, 2, 'shuxue', 88),
(5, 3, 'yuwen', 98),
 (6, 3, 'shuxue', 65);

使用case when將不同課程行轉換成列,再根據學號分組,取成績最大值

select TT.sid,TT.yuwen,TT.shuxue from 
(select T.sid,max(T.yuwen) as yuwen,max(T.shuxue) as shuxue
from 
(select sid,
case when course='yuwen' then score else 0 end as yuwen,
case when course='shuxue' then score else 0 end as shuxue
from courses_scores) as T
group by T.sid
) TT
where TT.shuxue>TT.yuwen

在這裏插入圖片描述

3.求每一年最大氣溫的那一天 + 溫度

數據格式
2010012325
表示2010年1月23日的氣溫是25度

CREATE table IF NOT EXISTS temperature
(timetemp STRING)
INSERT into temperature VALUES
('2014010114'),
('2014010216'),
('2014010317'),
('2014010410'),
('2014010506'),
('2012010609'),
('2012010732'),
('2012010812'),
('2012010919'),
('2012011023'),
('2001010116'),
('2001010212'),
('2001010310'),
('2001010411'),
('2001010529'),
('2013010619'),
('2013010722'),
('2013010812'),
('2013010929'),
('2013011023'),
('2008010105'),
('2008010216'),
('2008010337'),
('2008010414'),
('2008010516'),
('2007010619'),
('2007010712'),
('2007010812'),
('2007010999'),
('2007011023'),
('2010010114'),
('2010010216'),
('2010010317'),
('2010010410'),
('2010010506'),
('2015010649'),
('2015010722'),
('2015010812'),
('2015010999'),
('2015011023');

利用substr(字符串,起始位置,長度)函數分割成幾列
然後按照年分類,找出每一年的最高溫度

SELECT table2.Year,table2.Month,table2.Day,table2.temperature
FROM
(SELECT T.Year,max(T.temperature) as t1 from 
(SELECT  substr(timetemp,1,4) as Year,substr(timetemp,9) as temperature FROM temperature) T
group by T.Year ) table1
join 
(SELECT  substr(timetemp,1,4) as Year,substr(timetemp,5,2) as Month,substr(timetemp,7,2) as Day,substr(timetemp,9) as temperature FROM temperature) table2
on table1.year = table2.year and table1.t1 = table2.temperature

在這裏插入圖片描述

4.求學生選課情況

字段:學生id,課程名
實現以下結果:表中的1表示選修,表中的0表示未選修
id a b c d e f
1 1 1 1 0 1 0
2 1 0 1 1 0 1
3 1 1 1 0 1 0

CREATE TABLE IF NOT EXISTS sid_course
(id INT,
course STRING);
INSERT INTO sid_course VALUES
(1,'a'),
(1,'b'),
(1,'c'),
(1,'e'),
(2,'a'),
(2,'c'),
(2,'d'),
(2,'f'),
(3,'a'),
(3,'c'),
(3,'b'),
(3,'e');

collect_list() ,collect_set() 將分組中的某列轉爲一個數組返回,不同的是collect_list不去重而collect_set去重
Hive中在group by查詢的時候要求出現在select後面的列都必須是出現在group by後面的,可以利用collect來突破group by的限制

select TT.id , max(a) as a,max(b) as b,max(c) as c,max(d) as d,max(e) as e,max(f) as f from 
(select id,
case when course = c[0] then 1 ELSE 0 end as a,
case when course = c[1] then 1 ELSE 0 end as b,
case when course = c[2] then 1 ELSE 0 end as c,
case when course = c[3] then 1 ELSE 0 end as d,
case when course = c[4] then 1 ELSE 0 end as e,
case when course = c[5] then 1 ELSE 0 end as f
from sid_course join 
(SELECT collect_set(course) as c FROM sid_course) T
)TT
group by TT.id

在這裏插入圖片描述

5、求月銷售額和總銷售額

字段:店鋪,月份,金額
求出每個店鋪的當月銷售額和累計到當月的總銷售額

create table t_store(
name string,
months int,
money int
) 

INSERT into  t_store VALUES
('a',01,150),
('a',01,200),
('b',01,1000),
('b',01,800),
('c',01,250),
('c',01,220),
('b',01,6000),
('a',02,2000),
('a',02,3000),
('b',02,1000),
('b',02,1500),
('c',02,350),
('c',02,280),
('a',03,350),
('a',03,250)
select T.name,T.months,T.c1,
sum(T.c1) over(partition by T.name order by T.months)
from
(SELECT name,months,sum(money) as c1 FROM t_store
GROUP BY name,months) T

在這裏插入圖片描述

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章