hive练习:行列转换相关

1

数据:

id userid subject score

1 001 语文 90
2 001 数学 92
3 001 英语 80
4 002 语文 88
5 002 数学 90
6 002 英语 75.5
7 003 语文 70
8 003 数学 85
9 003 英语 90
10 003 政治 82

目标输出

userid 语文 数学 英语 政治 total
001 90 92 80 0 262
002 88 90 75.5 0 253.5
003 70 85 90 82 327
total 248 267 245.5 82 842.5

建表:

create table if not exists score(
id int,
uid string,
subject string,
score double
)
row format delimited fields terminated by ' '
;

load data local inpath '/root/hivedata/score.txt' into table score;

执行

select
*,
c+m+e+p total
from
(select
uid,
sum(case when subject="语文" then score else 0 end) as c,
sum(case when subject="数学" then score else 0 end) as m,
sum(case when subject="英语" then score else 0 end) as e,
sum(case when subject="政治" then score else 0 end) as p
from score
group by uid) t1
;

2

数据:
t1表
uid tags
1 1,2,3
2 2,3
3 1,2
编写sql实现如下结果:
uid tag
1 1
1 2
1 3
2 2
2 3
3 1
3 2

建表

create table if not exists table1(
uid int,
tags string
)
row format delimited fields terminated by ' '
;

load data local inpath '/root/hivedata/table1.txt' into table table1;

执行:

select
uid,
tag
from table1
lateral view explode(split(tags,",")) t2 as tag
;

3

数据:
T1表:
Tags
1,2,3
1,2
2,3
T2表:
Id lab
1 A
2 B
3 C
根据T1和T2表的数据,编写sql实现如下结果:
ids tags
1,2,3 A,B,C
1,2 A,B
2,3 B,C

建表:

create table if not exists tag1(
tags string
)
row format delimited fields terminated by ' '
;

create table if not exists tag2(
id int,
tag string
)
row format delimited fields terminated by ' '
;

load data local inpath '/root/hivedata/tag1.txt' into table tag1;
load data local inpath '/root/hivedata/tag2.txt' into table tag2;

执行:

select
tags,
concat_ws(',',collect_list(t3.tag)) tags
from
(select
tags,
tag
from
(select
tags,
id
from tag1
lateral view explode(split(tags,",")) tmp as id) t1
left join
tag2 t2
on t1.id=t2.id) t3
group by tags
;

4用户标签组合

数据:
t1表:
id tag flag
a b 2
a b 1
a b 3
c d 6
c d 8
c d 8
编写sql实现如下结果:
id tag flag
a b 1|2|3
c d 6|8

建表:

create table if not exists utag(
id string,
tag string,
flag string
)
row format delimited fields terminated by ' '
;

load data local inpath '/root/hivedata/utag.txt' into table utag;

执行:

select
id,
tag,
concat_ws('|',collect_set(flag))
from utag
group by id,tag
;

5用户标签行列互换

数据:
t1表
uid name tags
1 goudan chihuo,huaci
2 mazi sleep
3 laotie paly
编写sql实现如下结果: 
uid name tag
1 goudan chihuo
1 goudan huaci
2 mazi sleep
3 laotie paly

建表

create table if not exists utag2(
uid string,
name string,
tags string
)
row format delimited fields terminated by ' '
;

load data local inpath '/root/hivedata/utag2.txt' into table utag2;

执行:

select
uid,
name,
tag
from utag2
lateral view explode(split(tags,",")) t1 as tag
;

6 hive实现词频统计

数据:
t1表:
uid contents 
1 i|love|china
2 china|is|good|i|i|like

统计结果如下,如果出现次数一样,则按照content名称排序: content cnt
i 3
china 2
good 1
like 1
love 1
is 1

建表:

create table if not exists content(
uid string,
contents string
)
row format delimited fields terminated by ' '
;

load data local inpath '/root/hivedata/content.txt' into table content;

执行:

select
con,
count(con) cnt
from content
lateral view explode(split(contents,"\\|")) t1 as con
group by con
order by cnt desc,con
;

7课程行转列

数据:
t1表
id course
1,a
1,b
1,c
1,e
2,a
2,c
2,d
2,f
3,a
3,b
3,c
3,e
根据编写sql,得到结果如下(表中的1表示选修,表中的0表示未选修): 
id a b c d e f
1 1 1 1 0 1 0
2 1 0 1 1 0 1
3 1 1 1 0 1 0

建表

create table if not exists course1(
id string,
course string
)
row format delimited fields terminated by ','
;

load data local inpath '/root/hivedata/course1.txt' into table course1;

执行:

select
id,
sum(case when c.course="a" then 1 else 0 end),
sum(case when c.course="b" then 1 else 0 end),
sum(case when c.course="c" then 1 else 0 end),
sum(case when c.course="d" then 1 else 0 end),
sum(case when c.course="e" then 1 else 0 end),
sum(case when c.course="f" then 1 else 0 end)
from course1 c
group by id
;

8 兴趣行转列

t1表
name sex hobby
janson 男 打乒乓球、游泳、看电影
tom 男 打乒乓球、看电影

hobby最多3个值,使用hql实现结果如下:
name sex hobby1 hobby2 hobby3
janson 男 打乒乓球 游泳 看电影
tom 男 打乒乓球 看电影

建表

create table if not exists hobby(
name string,
sex string,
hobbies string
)
row format delimited fields terminated by ' '
;

load data local inpath '/root/hivedata/hobby.txt' into table hobby;

执行:

select
name,
sex,
nvl(arr[0]," "),
nvl(arr[1]," "),
nvl(arr[2]," ")
from
(select
name,
sex,
split(hobbies,"、") arr
from hobby) t1
;

9 用户商品行列互换

t1表:
用户 商品
A P1
B P1
A P2
B P3

请你使用hql变成如下结果:
用户 P1 P2 P3
A 1 1 0
B 1 0 1

建表

create table if not exists comm(
uid string,
commodity string
)
row format delimited fields terminated by ' '
;

load data local inpath '/root/hivedata/comm.txt' into table comm;

执行

select
uid,
sum(case when c.commodity='P1' then 1 else 0 end),
sum(case when c.commodity='P2' then 1 else 0 end),
sum(case when c.commodity='P3' then 1 else 0 end)
from comm c
group by uid
;

10成绩课程行列互换

t1表:
name course score
aa English 75
bb math 85
aa math 90

使用hql输出以下结果
name English math
aa 75 90
bb 0 85

建表

create table if not exists score1(
sname string,
course string,
score string
)
row format delimited fields terminated by ' '
;

load data local inpath '/root/hivedata/score1.txt' into table score1;

执行:

select
sname,
sum(case when s.course='English' then s.score else 0 end),
sum(case when s.course='math' then s.score else 0 end)
from score1 s
group by sname
;
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章