#選出城市在北京,性別爲女的是個用戶
select user_name
from user_info
where city = 'beijing' and sex = 'female'
limit 10;
#選出在2019年4月9號,購買的商品品類是food的用戶名、購買數量、支付金額
select user_name,piece,pay_amount
from user_trade
where dt = '2019-04-09' and goods_category = 'food';
#2019年一月到四月,每個品類有多少人購買,累計金額是多少
select goods_category,
count(distinct user_name) as num,
sum(pay_amount) as total_amount
from user_trade
where dt between '2019-01-01' and '2019-04-30'
group by goods_category;
#group by的作用:分類彙總
常用聚合函數:
1.count():計數count(distinct...)去重計數
2.sum():求和
3.avg():平均值
4.max():最大值
5.min():最小值
group by .....having
#2019年4月,支付金額超過5萬元的用戶
select user_name,
sum(pay_amount) as total_amount
from user_trade
where dt between '2019-04-01' and '2019-04-30'
group by user_name
having sum(pay_amount)>50000;
#having :對group by 的對象進行篩選
#2019年4月,支付金額最多的top5用戶
select user_name,
sum(pay_amount) as total_amount
from user_trade
where dt between '2019-04-01' and '2019-04-30'
group by user_name
order by total_amount desc limit 5;
#常用函數
1、如何把時間戳轉化爲日期?
select pay_time,
from_unixtime(pay_time,'yyyy-MM-dd hh:mm:ss')
from user_trade
where dt = '2019-04-09';
#如何計算日期間隔
用戶的首次激活時間,與2019年5月1日的日期間隔
select user_name,
datediff('2019-05-01',to_date(firstactivetime))
from user_info
limit 10;
條件函數
case when
#統計一下四個年齡段20歲以下,20-30歲,40歲以上的用戶數:
select count(distinct user_id) user_num,
case when age<20 then '2osui'
when age>=20 and age<30 then '20-30sui'
when age>=30 and age<40 then '30-40sui'
else '40suiyihang' end as age_type
from user_info
group by case when age<20 then '2osui'
when age>=20 and age<30 then '20-30sui'
when age>=30 and age<40 then '30-40sui'
else '40suiyihang' end;
#if函數
#統計每個性別用戶等級高低的分佈情況(level大於5爲高級)
select sex,
if (level>5,'高','低') as level_type,
count(distinct user_id) user_num
from user_info
group by sex,
if (level>5,'高','低');
from user_info
4、字符串函數
#每個月新激活的用戶數
select count(distinct user_id) as user_num,
substr(firstactivetime,1,7) as month
from user_info
group by substr(firstactivetime,1,7);
substr(string A,int start,int len)
備註:如果不指定截取長度則從起始位一直截取到最後
select get_json_object(extra1,'$.phonebrand') as phone_brand,
count(distinct user_id) user_num
from user_info
group by get_json_object(extra1,'$.phonebrand');
5、聚合統計函數
#ELLA用戶的2018年的平均支付金額,以及2018年最大的支付日期與最小的支付日期的間隔
select avg(pay_amount) as avg_amount,
datediff(max(from_unixtime(pay_time,'yyyy-MM-dd')),
min(from_unixtime(pay_time,'yyyy-MM-dd')))
from user_trade
where year(dt)='2018'
and user_name = 'ELLA';
#2018年購買的商品品類在兩個以上的用戶數
select count(a.user_name)
from
(select user_name,
count(distinct goods_category)as category_num
from user_trade
where year(dt)='2018'
group by user_name having count(distinct goods_category)>2)as a;
#用戶激活時間在2018年,年齡段在20-30歲和30-40歲的婚姻狀況分佈
select a.age_type,
if(a.marraige_status=1,'已婚','未婚'),
count(distinct a.user_id)
from
(select case when age<20 then '20歲以下'
when age>=20 and age<30 then '20-30歲'
when age>=30 and age<40 then '30-40歲'
else '40歲以上' end as age_type,
get_json_object(extra1,'$.marraige_status') as marraige_status,
user_id
from user_info
where to_date(firstactivetime) between '2018-01-01' and '2018-12-31') as a
where a.age_type in ('20-30歲','30-40歲')
group by a.age_type,
if (a.marraige_status=1,'已婚','未婚');