問題 1:
(1) 需求
找出所有科目成績都大於某一學科平均成績的學生。
(2) 建表
drop table if exists score;
create table score(
uid string,
subject_id string,
score int
)
row format delimited
fields terminated by '\t';
(3) 數據
1001 01 90
1001 02 90
1001 03 90
1002 01 85
1002 02 85
1002 03 70
1003 01 70
1003 02 70
1003 03 85
(4) 加載數據
load data local inpath '/opt/hivesqltopic/data/data1.txt' overwrite into table score;
(5) 查看錶數據
select * from score;
(6) 寫 hql
A、思路
a、求出每個學科的平均成績
select
uid,
score,
avg(score) over(partition by subject_id) avg_score
from
score; t1
b、根據是否大於平均成績 flag,大於記爲 0 否則記爲 1
select
uid,
if(score>avg_score,0,1) flag
from
t1; t2
c、根據學生 id 進行分組統計 flag 的和,和爲 0 則是所有學科成績都大於平均成績
select
uid
from
t2
group by
uid
having
sum(flag)=0;
B、最終 hql
select
uid
from
(
select
uid,
if(score>avg_score,0,1) flag
from
(
select
uid,
score,
avg(score) over(partition by subject_id) avg_score
from
score t1
) t2
) t3
group by
uid
having
sum(flag)=0;
問題二:
(1) 需求
(2) 建表
drop table if exists action;
create table action (
userId string,
visitDate string,
visitCount int
)
row format delimited
fields terminated by "\t";
(3) 數據
u01 2017/1/21 5
u02 2017/1/23 6
u03 2017/1/22 8
u04 2017/1/20 3
u01 2017/1/23 6
u01 2017/2/21 8
u02 2017/1/23 6
u01 2017/2/22 4
(4) 加載數據
load data local inpath '/opt/hivesqltopic/data/data2.txt' overwrite into table action;
(5) 查看錶數據
select * from action;
(6) 寫 hql
A、思路
a、修改數據格式
select
userId,
date_format(regexp_replace(visitDate,'/','-'), 'yyyy-MM') mn,
visitCount
from
action; t1
b、計算每人單月訪問量
select
userId,
mn,
sum(visitCount) mn_count
from
t1
group by
userId,mn; t2
c、按月累計訪問量
select
userId,
mn,
mn_count,
sum(mn_count) over(partition by userId order by mn)
from
t2
B、最終 hql
select
userid,
mn,
mn_Count,
sum(mn_Count) over(partition by userid order by mn) sum_Count
from
(
select
userid,
mn,
sum(visitCount) mn_Count
from
(
select
userid,
date_format(regexp_replace(visitDate,'/','-'),'yyyy-MM') mn,
visitCount
from
action t1
) t2
group by userid,mn
) t3
問題三:
(1) 需求
有 50W 個京東店鋪,每個顧客訪客訪問任何一個店鋪的任何一個商品時都會產生一條
訪問日誌,訪問日誌存儲的表名爲 visit,訪客的用戶 id 爲 user_id,被訪問的店
鋪名稱爲shop,請統計:
A、每個店鋪的 UV (訪客數z)。
B、每個店鋪訪問次數 top3 的訪客信息。輸出店鋪名稱、訪客 id、訪問次數。
(2) 建表
drop table if exists visit;
create table visit(
user_id string,
shop string
)
row format delimited
fields terminated by '\t';
(3) 數據
u1 a
u2 b
u1 b
u1 a
u3 c
u4 b
u1 a
u2 c
u5 b
u4 b
u6 c
u2 c
u1 b
u2 a
u2 a
u3 a
u5 a
u5 a
u5 a
(4) 加載數據
load data local inpath '/opt/hivesqltopic/data/data3.txt' overwrite into table visit;
(5) 查看錶數據
select * from visit;
(6) 寫 hql
A、統計每個店鋪的 UV(訪客數)
select
shop,
count(distinct user_id) UV
from
visit
group by
shop
B、每個店鋪訪問次數 top3 的訪客信息,輸出店鋪名稱、訪客 id、訪問次數。
a、思路
查詢每個店鋪被每個用戶訪問次數
select
shop,
user_id,
count(*) ct
from
visit
group by
shop,user_id; t1
計算每個店鋪被用戶訪問次數排名
select
shop,
user_id,
ct,
rank() over(partition by shop order by ct) rk
from
t1; t2
每個店鋪取排名前三的
select
shop,
user_id,
ct
from
t2
where rk<=3;
b、最終 hql
select
shop,
user_id,
num
from
(
select
shop,
user_id,
num,
rank() over(partition by shop order by num) rk
from
(
select
shop,
user_id,
count(*) num
from
visit t1
group by
shop,user_id
) t2
) t3
where
rk<=3;
問題四:
(1) 需求
已知一個表 order_tab,有如下字段:Date,Order_id,User_id,amount。
請給出 hql 進行統計:數據樣例:2017-01-01,10029028,1000003251,33.57。
A、給出 2017 年每個月的訂單數、用戶數、總成交金額。
B、給出 2017 年 11 月的新客數(指在 11 月纔有第一筆訂單)。
(2) 建表
drop table if exists order_tab;
create table order_tab (
dt string,
order_id string,
user_id string,
amount decimal(10,2)
)
row format delimited
fields terminated by ',';
(3) 數據
2017-01-01,1,1,33.5
2017-05-20,1,3,45.6
2017-11-05,2,4,22
2017-02-06,2,1,43.2
(4) 加載數據
load data local inpath '/opt/hivesqltopic/data/data4.txt' overwrite into table order_tab;
(5) 查看錶數據
select * from order_tab;
A、給出 2017 年每個月的訂單數、用戶數、總成交金額。
select
date_format(dt, 'yyyy-MM') mn,
count(order_id) order_num,
count(distinct user_id) user_id,
sum(amount) amount
from
order_tab
where
date_format(dt, 'yyyy')='2017'
group by
date_format(dt, 'yyyy-MM')
B、給出 2017 年 11 月的新客數(指在 11 月纔有第一筆訂單)。
select
count(user_id) num
from
order_tab
group by
user_id
having
date_format(min(dt),'yyyy-MM')='2017-11';
問題五(hql 中的行列轉換):
1、列轉行
(1) 建表
drop table if exists tb_course;
create table tb_course(
name string,
course string,
grade int
)
row format delimited
fields terminated by ',';
(2) 數據
tom,JDBC,20
tom,Hibernate,50
tom,Spring,80
marry,JDBC,30
marry,Hibernate,60
marry,Spring,70
(3) 加載數據
load data local inpath '/opt/hivesqltopic/data/data9.txt' overwrite into table tb_course;
(4) 查詢數據
select * from tb_course;
(5) 編寫 hql
select
name,
sum(if(course='JDBC',grade,0)) JDBC,
sum(if(course='Hibernate',grade,0)) Hibernate,
sum(if(course='Spring',grade,0)) Spring
from
tb_course
group by
name
2、行轉列
(1) 建表
drop table if exists tb_courses;
create table tb_courses(
name string,
JDBC int,
Hibernate int,
Spring int
)
row format delimited
fields terminated by ',';
(2) 數據
tom,20,50,80
marry,30,60,70
(3) 加載數據
load data local inpath '/opt/hivesqltopic/data/data10.txt' overwrite into table tb_courses;
(4) 查詢數據
select * from tb_courses;
(5) 編寫 hql
select
name,
'JDBC' course,
jdbc grade
from
tb_courses
union all
select
name,
'Hibernate' course,
Hibernate grade
from
tb_courses
union all
select
name,
'Spring' course,
Spring grade
from
tb_courses
order by
name;
問題六: