hive之练习题一

练习1

1.求单月访问次数和总访问次数

表字段:用户名,月份,访问次数
现要求出:
每个用户截止到每月为止的最大单月访问次数和累计到该月的总访问次数,结果数据格式如下
输出:用户、月份、最大访问次数、总访问次数、当月访问次数

CREATE EXTERNAL TABLE IF NOT EXISTS t_access(
uname STRING COMMENT '用户名',
umonth STRING COMMENT '月份',
ucount int COMMENT '访问次数')
COMMENT '用户访问表'
ROW FORMAT DELIMITED FIELDS TERMINATED BY ","
stored as textfile
location "/hive/warehouse/taodandan.db/t_access"

insert into t_access values
('A','2015-01','5'),
('A','2015-01','15'),
('B','2015-01','5'),
('A','2015-01','8'),
('B','2015-01','25'),
('A','2015-01','5'),
('A','2015-02','4'),
('A','2015-02','6'),
('B','2015-02','10'),
('B','2015-02','5'),
('A','2015-03','16'),
('A','2015-03','22'),
('B','2015-03','23'),
('B','2015-03','10'),
('B','2015-03','11')

先按照用户、月份分组,求出每个用户每个月的总访问次数作为一个临时表
再对用户进行分区,对月份进行升序排序

SELECT TT.uname,TT.umonth,
max(TT.c3) over(PARTITION BY TT.uname order by umonth) as c1,
sum(TT.c3) OVER(PARTITION BY TT.uname ORDER BY umonth) as c2,
TT.c3
from
(SELECT uname , umonth ,
sum(ucount) as c3
FROM t_access
group by uname,umonth
)as TT

在这里插入图片描述

2.学生课程成绩

字段:序号、学号、课程、分数
所有数学课程成绩 大于 语文课程成绩的学生的学号
输出:
学号 数学成绩 语文成绩


CREATE TABLE IF NOT EXISTS courses_scores
(id INT,
sid int,
course STRING,
score INT);

INSERT INTO courses_scores VALUES 
(1, 1, 'yuwen', 43),
(2, 1, 'shuxue', 55),
(3, 2, 'yuwen', 77),
(4, 2, 'shuxue', 88),
(5, 3, 'yuwen', 98),
 (6, 3, 'shuxue', 65);

使用case when将不同课程行转换成列,再根据学号分组,取成绩最大值

select TT.sid,TT.yuwen,TT.shuxue from 
(select T.sid,max(T.yuwen) as yuwen,max(T.shuxue) as shuxue
from 
(select sid,
case when course='yuwen' then score else 0 end as yuwen,
case when course='shuxue' then score else 0 end as shuxue
from courses_scores) as T
group by T.sid
) TT
where TT.shuxue>TT.yuwen

在这里插入图片描述

3.求每一年最大气温的那一天 + 温度

数据格式
2010012325
表示2010年1月23日的气温是25度

CREATE table IF NOT EXISTS temperature
(timetemp STRING)
INSERT into temperature VALUES
('2014010114'),
('2014010216'),
('2014010317'),
('2014010410'),
('2014010506'),
('2012010609'),
('2012010732'),
('2012010812'),
('2012010919'),
('2012011023'),
('2001010116'),
('2001010212'),
('2001010310'),
('2001010411'),
('2001010529'),
('2013010619'),
('2013010722'),
('2013010812'),
('2013010929'),
('2013011023'),
('2008010105'),
('2008010216'),
('2008010337'),
('2008010414'),
('2008010516'),
('2007010619'),
('2007010712'),
('2007010812'),
('2007010999'),
('2007011023'),
('2010010114'),
('2010010216'),
('2010010317'),
('2010010410'),
('2010010506'),
('2015010649'),
('2015010722'),
('2015010812'),
('2015010999'),
('2015011023');

利用substr(字符串,起始位置,长度)函数分割成几列
然后按照年分类,找出每一年的最高温度

SELECT table2.Year,table2.Month,table2.Day,table2.temperature
FROM
(SELECT T.Year,max(T.temperature) as t1 from 
(SELECT  substr(timetemp,1,4) as Year,substr(timetemp,9) as temperature FROM temperature) T
group by T.Year ) table1
join 
(SELECT  substr(timetemp,1,4) as Year,substr(timetemp,5,2) as Month,substr(timetemp,7,2) as Day,substr(timetemp,9) as temperature FROM temperature) table2
on table1.year = table2.year and table1.t1 = table2.temperature

在这里插入图片描述

4.求学生选课情况

字段:学生id,课程名
实现以下结果:表中的1表示选修,表中的0表示未选修
id a b c d e f
1 1 1 1 0 1 0
2 1 0 1 1 0 1
3 1 1 1 0 1 0

CREATE TABLE IF NOT EXISTS sid_course
(id INT,
course STRING);
INSERT INTO sid_course VALUES
(1,'a'),
(1,'b'),
(1,'c'),
(1,'e'),
(2,'a'),
(2,'c'),
(2,'d'),
(2,'f'),
(3,'a'),
(3,'c'),
(3,'b'),
(3,'e');

collect_list() ,collect_set() 将分组中的某列转为一个数组返回,不同的是collect_list不去重而collect_set去重
Hive中在group by查询的时候要求出现在select后面的列都必须是出现在group by后面的,可以利用collect来突破group by的限制

select TT.id , max(a) as a,max(b) as b,max(c) as c,max(d) as d,max(e) as e,max(f) as f from 
(select id,
case when course = c[0] then 1 ELSE 0 end as a,
case when course = c[1] then 1 ELSE 0 end as b,
case when course = c[2] then 1 ELSE 0 end as c,
case when course = c[3] then 1 ELSE 0 end as d,
case when course = c[4] then 1 ELSE 0 end as e,
case when course = c[5] then 1 ELSE 0 end as f
from sid_course join 
(SELECT collect_set(course) as c FROM sid_course) T
)TT
group by TT.id

在这里插入图片描述

5、求月销售额和总销售额

字段:店铺,月份,金额
求出每个店铺的当月销售额和累计到当月的总销售额

create table t_store(
name string,
months int,
money int
) 

INSERT into  t_store VALUES
('a',01,150),
('a',01,200),
('b',01,1000),
('b',01,800),
('c',01,250),
('c',01,220),
('b',01,6000),
('a',02,2000),
('a',02,3000),
('b',02,1000),
('b',02,1500),
('c',02,350),
('c',02,280),
('a',03,350),
('a',03,250)
select T.name,T.months,T.c1,
sum(T.c1) over(partition by T.name order by T.months)
from
(SELECT name,months,sum(money) as c1 FROM t_store
GROUP BY name,months) T

在这里插入图片描述

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章