[hive] 經典sql題及答案(三)

推薦:

經典sql題及答案(一)
經典sql題及答案(二)

題目部分

22 、使用hive 求出兩個數據集的差集?
數據
t1表:
id name
1 zs
2 ls
t2表:
id name
1 zs
3 ww
結果如下:
id name
2 ls
3 ww

23
在這裏插入圖片描述

25 、每個用戶連續登陸的最大天數?
數據:
login表
uid,date
1,2019-08-01
1,2019-08-02
1,2019-08-03
2,2019-08-01
2,2019-08-02
3,2019-08-01
3,2019-08-03
4,2019-07-28
4,2019-07-29
4,2019-08-01
4,2019-08-02
4,2019-08-03
結果如下:
uid cnt_days
1 3
2 2
3 1
4 3

32 、有如下三張表:
表A(登錄表):
ds user_id
2019-08-06 1
2019-08-06 2
2019-08-06 3
2019-08-06 4
表B(閱讀表):
ds user_id read_num
2019-08-06 1 2
2019-08-06 2 3
2019-08-06 3 6
表C(付費表):
ds user_id price
2019-08-06 1 55.6
2019-08-06 2 55.8
基於上述三張表,請使用hive的hql語句實現如下需求:
(1)、用戶登錄並且當天有個閱讀的用戶數,已經閱讀書籍數量
(2)、用戶登錄並且閱讀,但是沒有付費的用戶數
(3)、用戶登錄並且付費,付費用戶書籍和金額

37 數據如下:
1,zhangsan,數學,80,2015
2,lisi,語文,90,2016
3,wangwu,化學,70,2017
4,zhangsan,語文,80,2015
5,zhangsan,化學,90,2015
6,lisi,語文,70,2015
在這裏插入圖片描述

答案部分

22
create table sql022t1
(
id string,
name string
)
row format delimited 
fields terminated by '\t';
create table sql022t2
like sql022t1;
load data local inpath '/root/in/sql022t1' overwrite into table sql022t1;
load data local inpath '/root/in/sql022t2' overwrite into table sql022t2;

交集
select
    t1.id,
    t1.name,
    t2.name
from
    sql022t1 t1
join
    sql022t2 t2
on
    t1.id=t2.id;tt1
並集
select
    id,
    name
from
    sql022t1
union
select
    id,
    name
from
    sql022t2;tt2
差集=並集-交集
select
tt2.id,
tt2.name
from
    ( 
    select
    t1.id id,
    t1.name name,
    t2.name name1
    from
        sql022t1 t1
    join
        sql022t2 t2
    on
    t1.id=t2.id
    )tt1
right join
    (select
    id,
    name
    from
        sql022t1 t1
    union
    select
        id,
        name
    from
    sql022t2)tt2
on
    tt1.id=tt2.id
where
    tt1.id is null;
    
23
123,dasfdasas,3,200,1535945356,2018-08-08
124,dasfadass,1,200,1535945356,2018-08-08
125,dadassfas,3,200,1535945356,2018-08-09
126,dadassfas,2,200,1535945356,2018-08-09
127,dasfdasas,5,200,1535945356,2018-08-09

create table sql023
(
orderid int,
userid string,
productid int,
price int,
`timestamp` int,
dt string
)
row format delimited
fields terminated by ',';
load data local inpath '/root/in/sql023'into table sql023;

得到每個客戶今天和昨天每天購買的商品列表(單個商品只出現一次)
select
    userid,
    productid,
    dt
from
    sql023
where
    dt = '2018-08-08'
or
    dt = '2018-08-09'
group by
    userid,productid,dt;t1
篩選出昨天和今天購買過3號商品的用戶
select
    userid,
    productid,
    count(dt)
from
    (select
        userid,
        productid,
        dt
    from
        sql023
    where
        dt = '2018-08-08'
    or
        dt = '2018-08-09'
    group by
        userid,productid,dt)t1
group by
    userid,productid
having
    count(dt)>1;
    
25
1,2019-08-01
1,2019-08-02
1,2019-08-03
2,2019-08-01
2,2019-08-02
3,2019-08-01
3,2019-08-03
4,2019-07-28
4,2019-07-29
4,2019-08-01
4,2019-08-02
4,2019-08-03
create table sql025
(
uid int,
dt string
)
row format delimited
fields terminated by ',';
load data local inpath '/root/in/sql025'into table sql025;
按用戶分組加一列等差數列
select
    uid,
    dt,
    row_number() over(partition by uid order by dt) `rank`
from
    sql025;t1
日期與等差數列做差
select
    uid,
    dt,
    `rank`,
    date_sub(dt,`rank`) sub
from
    ()t1;t2
按差值匯聚,通過count來計算每次連續登錄的天數
select
    uid,
    sub,
    count(uid) amount
from
    ()t2
group by
    uid,sub;t3
取最大值得到每個用戶連續登錄的最大天數
select
    uid,
    max(amount)
from
    (select
        uid,
        sub,
        count(uid) amount
    from
        (select
            uid,
            dt,
            `rank`,
            date_sub(dt,`rank`) sub
        from
            (select
                uid,
                dt,
                row_number() over(partition by uid order by dt) `rank`
            from
                sql025)t1)t2
    group by
        uid,sub)t3
group by
    uid;
    
32
2019-08-06,1
2019-08-06,2
2019-08-06,3
2019-08-06,4

2019-08-06,1,2
2019-08-06,2,3
2019-08-06,3,6

2019-08-06,1,55.6
2019-08-06,2,55.8

create table sql032t1
(
ds string,
user_id int
)
row format delimited
fields terminated by ',';
create table sql032t2
(
ds string,
user_id int,
read_num int
)
row format delimited
fields terminated by ',';
create table sql032t3
(
ds string,
user_id int,
price int
)
row format delimited
fields terminated by ',';
load data local inpath '/root/in/sql032t1' into table sql032t1;
load data local inpath '/root/in/sql032t2' into table sql032t2;
load data local inpath '/root/in/sql032t3' into table sql032t3;
(1)
select
    t1.user_id,
    t2.read_num
from
    sql032t1 t1
join
    sql032t2 t2
on
    t1.user_id=t2.user_id;

36
1,zhangsan,數學,80,2015
2,lisi,語文,90,2016
3,wangwu,化學,70,2017
4,zhangsan,語文,80,2015
5,zhangsan,化學,90,2015
6,lisi,語文,70,2015
create table sql037
(
id int,
userid string,
course string,
score int,
term int
)
row format delimited
fields terminated by ',';
load data local inpath'/root/in/sql037'into table sql037;
select
    id,
    userid,
    course,
    score,
    term,
    rank() over(partition by course,term order by score desc) `rank`
from
    sql037;t1
select
    id,
    userid,
    course,
    score,
    term
from
        (select
        id,
        userid,
        course,
        score,
        term,
        rank() over(partition by course,term order by score desc) `rank`
    from
        sql037)t1
where
    `rank`=1;
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章