推薦:
題目部分
22 、使用hive 求出兩個數據集的差集?
數據
t1表:
id name
1 zs
2 ls
t2表:
id name
1 zs
3 ww
結果如下:
id name
2 ls
3 ww
23
25 、每個用戶連續登陸的最大天數?
數據:
login表
uid,date
1,2019-08-01
1,2019-08-02
1,2019-08-03
2,2019-08-01
2,2019-08-02
3,2019-08-01
3,2019-08-03
4,2019-07-28
4,2019-07-29
4,2019-08-01
4,2019-08-02
4,2019-08-03
結果如下:
uid cnt_days
1 3
2 2
3 1
4 3
32 、有如下三張表:
表A(登錄表):
ds user_id
2019-08-06 1
2019-08-06 2
2019-08-06 3
2019-08-06 4
表B(閱讀表):
ds user_id read_num
2019-08-06 1 2
2019-08-06 2 3
2019-08-06 3 6
表C(付費表):
ds user_id price
2019-08-06 1 55.6
2019-08-06 2 55.8
基於上述三張表,請使用hive的hql語句實現如下需求:
(1)、用戶登錄並且當天有個閱讀的用戶數,已經閱讀書籍數量
(2)、用戶登錄並且閱讀,但是沒有付費的用戶數
(3)、用戶登錄並且付費,付費用戶書籍和金額
37 數據如下:
1,zhangsan,數學,80,2015
2,lisi,語文,90,2016
3,wangwu,化學,70,2017
4,zhangsan,語文,80,2015
5,zhangsan,化學,90,2015
6,lisi,語文,70,2015
答案部分
22
create table sql022t1
(
id string,
name string
)
row format delimited
fields terminated by '\t';
create table sql022t2
like sql022t1;
load data local inpath '/root/in/sql022t1' overwrite into table sql022t1;
load data local inpath '/root/in/sql022t2' overwrite into table sql022t2;
交集
select
t1.id,
t1.name,
t2.name
from
sql022t1 t1
join
sql022t2 t2
on
t1.id=t2.id;tt1
並集
select
id,
name
from
sql022t1
union
select
id,
name
from
sql022t2;tt2
差集=並集-交集
select
tt2.id,
tt2.name
from
(
select
t1.id id,
t1.name name,
t2.name name1
from
sql022t1 t1
join
sql022t2 t2
on
t1.id=t2.id
)tt1
right join
(select
id,
name
from
sql022t1 t1
union
select
id,
name
from
sql022t2)tt2
on
tt1.id=tt2.id
where
tt1.id is null;
23
123,dasfdasas,3,200,1535945356,2018-08-08
124,dasfadass,1,200,1535945356,2018-08-08
125,dadassfas,3,200,1535945356,2018-08-09
126,dadassfas,2,200,1535945356,2018-08-09
127,dasfdasas,5,200,1535945356,2018-08-09
create table sql023
(
orderid int,
userid string,
productid int,
price int,
`timestamp` int,
dt string
)
row format delimited
fields terminated by ',';
load data local inpath '/root/in/sql023'into table sql023;
得到每個客戶今天和昨天每天購買的商品列表(單個商品只出現一次)
select
userid,
productid,
dt
from
sql023
where
dt = '2018-08-08'
or
dt = '2018-08-09'
group by
userid,productid,dt;t1
篩選出昨天和今天購買過3號商品的用戶
select
userid,
productid,
count(dt)
from
(select
userid,
productid,
dt
from
sql023
where
dt = '2018-08-08'
or
dt = '2018-08-09'
group by
userid,productid,dt)t1
group by
userid,productid
having
count(dt)>1;
25
1,2019-08-01
1,2019-08-02
1,2019-08-03
2,2019-08-01
2,2019-08-02
3,2019-08-01
3,2019-08-03
4,2019-07-28
4,2019-07-29
4,2019-08-01
4,2019-08-02
4,2019-08-03
create table sql025
(
uid int,
dt string
)
row format delimited
fields terminated by ',';
load data local inpath '/root/in/sql025'into table sql025;
按用戶分組加一列等差數列
select
uid,
dt,
row_number() over(partition by uid order by dt) `rank`
from
sql025;t1
日期與等差數列做差
select
uid,
dt,
`rank`,
date_sub(dt,`rank`) sub
from
()t1;t2
按差值匯聚,通過count來計算每次連續登錄的天數
select
uid,
sub,
count(uid) amount
from
()t2
group by
uid,sub;t3
取最大值得到每個用戶連續登錄的最大天數
select
uid,
max(amount)
from
(select
uid,
sub,
count(uid) amount
from
(select
uid,
dt,
`rank`,
date_sub(dt,`rank`) sub
from
(select
uid,
dt,
row_number() over(partition by uid order by dt) `rank`
from
sql025)t1)t2
group by
uid,sub)t3
group by
uid;
32
2019-08-06,1
2019-08-06,2
2019-08-06,3
2019-08-06,4
2019-08-06,1,2
2019-08-06,2,3
2019-08-06,3,6
2019-08-06,1,55.6
2019-08-06,2,55.8
create table sql032t1
(
ds string,
user_id int
)
row format delimited
fields terminated by ',';
create table sql032t2
(
ds string,
user_id int,
read_num int
)
row format delimited
fields terminated by ',';
create table sql032t3
(
ds string,
user_id int,
price int
)
row format delimited
fields terminated by ',';
load data local inpath '/root/in/sql032t1' into table sql032t1;
load data local inpath '/root/in/sql032t2' into table sql032t2;
load data local inpath '/root/in/sql032t3' into table sql032t3;
(1)
select
t1.user_id,
t2.read_num
from
sql032t1 t1
join
sql032t2 t2
on
t1.user_id=t2.user_id;
36
1,zhangsan,數學,80,2015
2,lisi,語文,90,2016
3,wangwu,化學,70,2017
4,zhangsan,語文,80,2015
5,zhangsan,化學,90,2015
6,lisi,語文,70,2015
create table sql037
(
id int,
userid string,
course string,
score int,
term int
)
row format delimited
fields terminated by ',';
load data local inpath'/root/in/sql037'into table sql037;
select
id,
userid,
course,
score,
term,
rank() over(partition by course,term order by score desc) `rank`
from
sql037;t1
select
id,
userid,
course,
score,
term
from
(select
id,
userid,
course,
score,
term,
rank() over(partition by course,term order by score desc) `rank`
from
sql037)t1
where
`rank`=1;