Hive練習題

#選出城市在北京,性別爲女的是個用戶

select user_name
from user_info
where city = 'beijing' and sex = 'female'
limit 10;

#選出在2019年4月9號,購買的商品品類是food的用戶名、購買數量、支付金額

select user_name,piece,pay_amount
from user_trade
where dt = '2019-04-09' and goods_category = 'food';


#2019年一月到四月,每個品類有多少人購買,累計金額是多少

select goods_category,
count(distinct user_name) as num,
sum(pay_amount) as total_amount


from user_trade
where dt between '2019-01-01' and '2019-04-30'
group by goods_category;


#group by的作用:分類彙總
常用聚合函數:
1.count():計數count(distinct...)去重計數
2.sum():求和
3.avg():平均值
4.max():最大值
5.min():最小值

group by .....having

#2019年4月,支付金額超過5萬元的用戶

select user_name,
sum(pay_amount) as total_amount
from user_trade
where dt between '2019-04-01' and '2019-04-30'
group by user_name
having sum(pay_amount)>50000;

#having :對group by 的對象進行篩選


#2019年4月,支付金額最多的top5用戶

select user_name,
sum(pay_amount) as total_amount
from user_trade
where dt between '2019-04-01' and '2019-04-30'
group by user_name
order by total_amount desc limit 5;

#常用函數
1、如何把時間戳轉化爲日期?

select pay_time,
from_unixtime(pay_time,'yyyy-MM-dd hh:mm:ss')
from user_trade
where dt = '2019-04-09';


#如何計算日期間隔

用戶的首次激活時間,與2019年5月1日的日期間隔

select user_name,
datediff('2019-05-01',to_date(firstactivetime))
from user_info
limit 10;

條件函數
case when

#統計一下四個年齡段20歲以下,20-30歲,40歲以上的用戶數:

         
select count(distinct user_id) user_num,
case when age<20 then '2osui'
when age>=20 and age<30 then '20-30sui'
when age>=30 and age<40 then '30-40sui'
else '40suiyihang' end as age_type
from user_info
group by case when age<20 then '2osui'
when age>=20 and age<30 then '20-30sui'
when age>=30 and age<40 then '30-40sui'
else '40suiyihang' end;

#if函數
#統計每個性別用戶等級高低的分佈情況(level大於5爲高級)

select sex,
if (level>5,'高','低') as level_type,
count(distinct user_id) user_num
from user_info
group by sex,
if (level>5,'高','低');
from user_info 


4、字符串函數
#每個月新激活的用戶數

select count(distinct user_id) as user_num,
       substr(firstactivetime,1,7) as month    
from user_info
group by substr(firstactivetime,1,7);


substr(string A,int start,int len)
備註:如果不指定截取長度則從起始位一直截取到最後

select get_json_object(extra1,'$.phonebrand') as phone_brand,
count(distinct user_id) user_num
from user_info
group by get_json_object(extra1,'$.phonebrand');


5、聚合統計函數

#ELLA用戶的2018年的平均支付金額,以及2018年最大的支付日期與最小的支付日期的間隔

select avg(pay_amount) as avg_amount,
datediff(max(from_unixtime(pay_time,'yyyy-MM-dd')),
min(from_unixtime(pay_time,'yyyy-MM-dd')))
from user_trade
where year(dt)='2018'
and user_name = 'ELLA';

#2018年購買的商品品類在兩個以上的用戶數
select count(a.user_name)
from 
(select user_name,
count(distinct goods_category)as category_num
from user_trade
where year(dt)='2018'
group by user_name having count(distinct goods_category)>2)as a;


#用戶激活時間在2018年,年齡段在20-30歲和30-40歲的婚姻狀況分佈

select a.age_type,
if(a.marraige_status=1,'已婚','未婚'),
count(distinct a.user_id)
from 
(select case when age<20 then '20歲以下'
when age>=20 and age<30 then '20-30歲'
when age>=30 and age<40 then '30-40歲'
else '40歲以上' end as age_type,
get_json_object(extra1,'$.marraige_status') as marraige_status,
user_id
from user_info
where to_date(firstactivetime) between '2018-01-01' and '2018-12-31') as a
where a.age_type in ('20-30歲','30-40歲')
group by a.age_type,
if (a.marraige_status=1,'已婚','未婚');

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章