某音短視頻
SQL156 各個視頻的平均完播率【簡單】
select
t1.video_id,
-- 結束觀看時間與開始播放時間的差>=視頻時長時,視爲完成播放
round(
avg(if (end_time - start_time >= duration, 1, 0)),
3
) as avg_comp_play_rate
from
tb_user_video_log t1
join tb_video_info t2 on t1.video_id = t2.video_id
where
year (start_time) = 2021
group by
t1.video_id
order by
avg_comp_play_rate desc
使用timestampdiff會更好
avg( if timestampdiff(second,start_time,end_time) >= duration 1,0)
SQL157 平均播放進度大於60%的視頻類別【簡單】
select
tag,
concat (round(avg(play_progress), 2), "%") as avg_play_progress
from
(
select
U.video_id,
tag,
duration,
if (
timestampdiff (second, start_time, end_time) / duration > 1,
1,
timestampdiff (second, start_time, end_time) / duration
) * 100 as play_progress
from
tb_user_video_log U
left join tb_video_info V on U.video_id = V.video_id
) tb
group by
tag
having
avg_play_progress > 60
order by
avg_play_progress desc
時間差值直接用減號相減容易出錯,最好時間時間戳函數
TIMESTAMPDIFF(unit,begin,end)
TIMESTAMPDIFF函數返回begin-end的結果,其中begin和end是DATE或DATETIME表達式。
TIMESTAMPDIFF函數允許其參數具有混合類型,例如,begin是DATE值,end可以是DATETIME值。 如果使用DATE值,則TIMESTAMPDIFF函數將其視爲時間部分爲“00:00:00”的DATETIME值。
unit參數是確定(end-begin)的結果的單位,表示爲整數,取值可爲:YEAR/MONTH/DAY/HOUR/MINUTE/SECOND
SQL158 每類視頻近一個月的轉發量/率【中等】
DATEDIFF :求兩個日期間隔的函數
select
tag,
sum(if_retweet) as retweet_cnt,
round((sum(if_retweet) / count(U.uid)), 3) as retweet_rate
from
tb_user_video_log as U
left join tb_video_info as V on U.video_id = V.video_id
where
-- 最近30天
datediff (
(
select
max(date (start_time))
from
tb_user_video_log
),
date (start_time)
) < 30
group by
tag
order by
retweet_rate DESC
SQL159 每個創作者每月的漲粉率及截止當前的總粉絲量【中等】
難點 total_fans是當前月份的,不是某個月份分組的,需要累積,不能使用聚合函數,只能用窗口函數來統計。剩下的就是 case when 打標籤
select
author,
month,
-- 漲分率 這裏不能用聚合函數
round(follow_n_sum / play_cnt, 3) as fans_growth_rate,
-- 當前粉絲總量
sum(follow_n_sum) over (
partition by
author
order by
month
) total_fans
from
(
select
author,
date_format (start_time, '%Y-%m') as month,
sum(
case
when if_follow = 1 then 1 -- 漲粉
when if_follow = 2 then -1 -- 掉粉
else 0
end
) as follow_n_sum,
count(author) as play_cnt
from
tb_user_video_log as U
left join tb_video_info as V on U.video_id = V.video_id
where
year (start_time) = 2021
group by
author,
month
) T
order by
author,
total_fans
SQL160 國慶期間每類視頻點贊量和轉發量【較難】
開窗函數,滑動窗口
- rows n perceding:從當前行到前n行(一共n+1行)
- rang/rows between 邊界規則1 and 邊界規則2:rang表示按照值的範圍進行定義框架,rows表示按照行的範圍進行定義框架
rows between 2 perceding and 2 following # 當前行往前2行+當前行+當前行往後2行(一共5行)
rows between 1 following 3 following # 當前行的後1——>後3(共3行)
rows between unbounded preceding and current row # 從第一行到當前行
sql
select
*
-- 先用窗口函數 再去篩選
from
(
select
tag,
dt,
-- 窗口向前7天
sum(add_likes) over (
partition by
tag
order by
dt
rows 6 preceding
) sum_like_cnt_7d,
max(add_retweets) over (
partition by
tag
order by
dt
rows 6 preceding
) max_retweet_cnt_7d
from
(
-- 過濾 聚合
select
tag,
date_format (start_time, '%Y-%m-%d') dt,
sum(if_like) add_likes,
sum(if_retweet) add_retweets
from
tb_user_video_log t1
join tb_video_info t2 on t1.video_id = t2.video_id
group by
tag,
dt
) a
) b
where
dt between '2021-10-01' and '2021-10-03'
order by
tag desc,
dt
SQL161 近一個月發佈的視頻中熱度最高的top3視頻【困難】
新度 最近無播放的天數:所有數據的最新日期-某一視頻的最新播放日期(聚合分組)
這是個聚合分組後,每個分組中的數據
注意這裏要用date類型,否則過不了
select
video_id,
round((100 * complete_rate + 5 * like_cnt + 3 * comment_cnt + 2 * retweet_cnt) * fresh_index,0
) as hot_index
from
(
select
vl.video_id,
-- 平均完播率
avg( if ( timestampdiff (second, start_time, end_time) >= duration,1,0)) as complete_rate,
-- 點贊數
sum(if_like) as like_cnt,
-- 評論數
count(comment_id) as comment_cnt,
-- 轉發數
sum(if_retweet) as retweet_cnt,
-- 新鮮度 最近無播放天數
-- 所有數據的最新日期-某一視頻的最新播放日期(聚合分組)
1 / (
timestampdiff(
day,
max(date (end_time)),
(
select max(date (end_time))
from tb_user_video_log
)
) + 1
) as fresh_index
from
tb_user_video_log as vl
join tb_video_info as vi on vl.video_id = vi.video_id
where
-- 近一個月發佈的視頻
-- 所有數據的最新日期-某個視頻的發佈日期<=29
timestampdiff(day,date(release_time),
(
select max(date(end_time))
from tb_user_video_log)
) <= 29
group by
video_id
) as q
order by
hot_index desc
limit
3
用戶增長場景(某度信息流)
SQL162 2021年11月每天的人均瀏覽文章時長【簡單】
- 計算每次文章瀏覽的時長和日期:
- 過濾目標時間窗的有效瀏覽記錄:WHERE artical_id != 0 AND DATE_FORMAT(in_time, "%Y%m") = "202111"
- 將進入時間轉化爲日期:DATE(in_time) as dt
- 計算瀏覽時長:TIMESTAMPDIFF(SECOND, in_time, out_time) as view_len_sec
- 按日期分組:GROUP BY dt
- 計算人均時長(=總時長÷人數):SUM(view_len_sec) / COUNT(DISTINCT uid) as avg_view_len_sec
- 保留1位小數:ROUND(x, 1)
timestampdiff()函數的作用是返回兩個日期時間之間的整數差。
select
date(in_time) dt,
round(
sum(timestampdiff (second, in_time, out_time)) / count(distinct uid),
1
) avg_lensec
from
tb_user_log
where
date_format(in_time, "%Y%m") = "202111"
and artical_id != 0
group by
dt
order by
avg_lensec
SQL163 每篇文章同一時刻最大在看人數【中等】【同時在線】
同時在線人數問題
本題的坑點:
題目要求在瞬時統計時遵循【先進後出】:如果同一時刻有進入也有離開時,先記錄用戶數增加,再記錄減少。
因此在ORDER BY層面,在遵循dt升序的同時,還要遵循先+1,再-1的原則,即diff DESC:
SUM(diff) OVER(PARTITION BY artical_id ORDER BY dt, diff DESC)
總體sql:
select
artical_id,
max(instant_viewer_cnt) as max_uv
from
(
select
artical_id,
event_time,
sum(flag) over (partition by artical_id order by event_time, flag desc) as instant_viewer_cnt
from
(
-- 登錄標記1 登出標記-1
select
uid,
artical_id,
in_time as event_time,
1 flag
from
tb_user_log
where artical_id != 0
union all
select
uid,
artical_id,
out_time as event_time,
-1 flag
from
tb_user_log
where artical_id != 0
) t1
) t2
group by
artical_id
order by max_uv desc
SQL164 2021年11月每天新用戶的次日留存率【中等】【留存問題】
留存率的計算是經典的老問題了 就是left join
select
t1.dt,
round(count(distinct t2.uid) / count(t1.uid), 2) as uv_rate
from
(
select
uid,
min(date (in_time)) dt
from
tb_user_log
group by
uid
) t1
left join (
select
uid,
date (in_time) dt
from
tb_user_log
union
select
uid,
date (out_time) dt
from
tb_user_log
) t2 -- 用戶活躍表 進入時間和離開時間都算活躍
on t1.uid = t2.uid
and datediff(t2.dt,t1.dt)=1
where
t1.dt like '2021-11%'
group by
t1.dt
order by
t1.dt;
上述代碼似乎有問題,如果登陸時間跨越了了三天,剛好錯過了第二天,拆出來的數據沒有第二天,那結果不是錯了?
SQL165 統計活躍間隔對用戶分級結果【較難】
打標籤的題 case when
select
case
when max_day<7 and min_day>=7 then '忠實用戶'
when max_day<7 and min_day<7 then '新晉用戶'
when min_day between 7 and 29 then '沉睡用戶'
else '流失用戶' end user_grade
,round(count(distinct uid)/(select count(distinct uid) from tb_user_log),2) ratio
from(
select *
-- 每個用戶最近一次登錄差多少天
,datediff(max(out_time)over(),max(out_time)over(partition by uid)) max_day
-- 每個用戶最遠一次登錄差多少天
,datediff(max(out_time)over(),min(out_time)over(partition by uid)) min_day
from tb_user_log
)a
group by user_grade
order by ratio desc
這裏 引入了兩個概念
- 每個用戶最近一次登錄差多少天
- 每個用戶最遠一次登錄差多少天
使用開窗函數來實現,非常巧妙
SQL166 每天的日活數及新用戶佔比【較難】
select
t1.dt dt,
count(distinct t1.uid) dau,
round(
count(distinct t2.uid) / count(distinct t1.uid),
2
) ub_new_ratio
from
(
select
uid,
date (in_time) dt
from
tb_user_log
union
select
uid,
date (out_time) dt
from
tb_user_log
) t1 -- 用戶活躍表 進入時間和離開時間都算活躍
left join (
select
uid,
min(date (in_time)) dt
from
tb_user_log
group by
uid
) t2 -- 每天的新用戶表
on t1.uid = t2.uid
and t1.dt = t2.dt
group by
dt
order by
dt
與SQL164留存率問題相反
SQL167 連續簽到領金幣【困難】
兩個難點:
- 如何確定簽到日期是否連續?(窗口排序)
- 如何確定每一次簽到獲取金幣的數量?(mod取餘)
select
uid,
date_format(dt,'%Y%m') as month,
sum(grade) as coin
from(
select uid ,dt,
case
when mod(rank() over (partition by uid,rank_day order by dt),7) = 3 then 3
when mod(rank() over (partition by uid,rank_day order by dt),7) = 0 then 7
else 1
end grade
from
(
select uid, dt,
date_sub(dt,interval rank() over(partition by uid order by dt) day) as rank_day -- 連續日期
-- rank() over(partition by uid order by dt) as rk
from(
select uid, date(in_time) dt
from tb_user_log
where artical_id=0 and sign_in=1 and date(in_time) between '2021-07-07' and '2021-10-31'
group by uid,dt
) t1
) t2
)t3
group by uid,month;
電商場景(某東商城)
SQL168 計算商城中2021年每月的GMV【簡單】
GMV(Gross Merchandise Volume,商品交易總額)爲已付款訂單和未付款訂單兩者之和。結果按GMV升序排序。
-- 有年有月
SELECT date_format(event_time,'%Y-%m') as month,
sum(total_amount) as GMV
from tb_order_overall
-- status=2是退款
where status != 2 and year(event_time) = 2021
group by month
having GMV > 100000
order by GMV
SQL169 統計2021年10月每個退貨率不大於0.5的商品各項指標【中等】
select
product_id,
-- 商品點展比
round(sum(if_click) / count(1), 3) ctr,
-- 加購率
round(sum(if_cart) / sum(if_click), 3) cart_rate,
-- 成單率
round(sum(if_payment) / sum(if_cart), 3) payment_rate,
-- 退貨率
round(sum(if_refund) / sum(if_payment), 3) refund_rate
from
tb_user_event
where
-- 2021年10月
date_format (event_time, '%Y-%m') = '2021-10'
group by
product_id
having
-- 退貨率小於 0.5
refund_rate <= 0.5
order by
product_id
SQL170 某店鋪的各商品毛利率及店鋪整體毛利率【中等】
輸出要涵蓋 店鋪和商品 需要用到Union
select
'店鋪彙總' as product_id,
concat (
round((1 - sum(in_price * cnt) / sum(price * cnt) )* 100,1),'%'
) as profit_rate
from
tb_order_overall
join tb_order_detail using (order_id)
join tb_product_info using (product_id)
where
date_format (event_time, '%Y-%m') >= '2021-10'
and shop_id = 901
union all
(
select
product_id,
concat (
round((1 - avg(in_price / price))* 100, 1),'%'
) profit_rate
from
tb_order_overall
join tb_order_detail using (order_id)
join tb_product_info using (product_id)
where
date_format (event_time, '%Y-%m') >= '2021-10'
and shop_id = 901
group by
product_id
having
(1 - avg(in_price / price)) * 100 > 24.9
order by
product_id
)
SQL171 零食類商品中復購率top3高的商品【中等】
某商品復購率 = 近90天內購買它至少兩次的人數 ÷ 購買它的總人數
近90天指包含最大日期(記爲當天)在內的近90天。
select
product_id,
-- rk>=2 說明被複購
round(count(distinct if(rk >= 2,uid,null))/count(distinct uid),3) rate
from(
-- 統計每個貨物用戶的購買記錄;利用窗口函數根據貨號、用戶分組按購買時間排序
select
product_id,
uid,
row_number() over(partition by product_id,uid order by event_time) rk
from tb_order_detail
left join tb_product_info using(product_id)
left join tb_order_overall using (order_id)
where tag='零食'
and status=1
and datediff(
date(
select
max(event_time)
from tb_order_overall
),date(event_time)) < 90 -- 90天內
) main
group by product_id
order by rate desc,product_id
limit 3
SQL172 10月的新戶客單價和獲客成本【較難】
一個uid在一個order_id中可能有多條記錄,所以要用SUM窗口函數,單純的聚合有可能出現一行對應多行的錯誤。同時窗口聚合的分組依據應該是order_id而不是uid
select
-- 首單平均交易金額
round(avg(total_amount), 1) as avg_amount,
-- 平均獲客成本
round(avg(total - total_amount), 1) as avg_cost
from(
select
uid,
event_time,
row_number() over (partition by uid order by event_time) as rk,
total_amount,
-- 按order_id分組 訂單明細的總和
sum(price*cnt) over (partition by order_id) as total
from tb_order_detail
join tb_order_overall USING(order_id)
where status = 1
) TB1
where rk = 1 -- 首單
and date_format(event_time, '%Y-%m') = '2021-10'
SQL173 店鋪901國慶期間的7日動銷率和滯銷率【困難】
動銷率定義爲店鋪中一段時間內有銷量的商品佔當前已上架總商品數的比例(有銷量的商品/已上架總商品數)。
滯銷率定義爲店鋪中一段時間內沒有銷量的商品佔當前已上架總商品數的比例。(沒有銷量的商品/已上架總商品數)。
select dt
,round(count(distinct product_id)/avg(onsale_cnt),3) as sale_rate
,round(1- count(distinct product_id)/avg(onsale_cnt),3) as unsale_rate
from
(
-- 笛卡爾積+篩選獲取901店鋪在三天裏上架商品數量
select
date(event_time) as dt,
count(
distinct
case when
event_time>=release_time then product_id
end) as onsale_cnt
from tb_product_info,tb_order_overall
where shop_id=901
and date(event_time) between '2021-10-01' and '2021-10-03'
group by dt
) tb_3dayreli
left join
(
-- 每個有下單的日期與下單商品id
select product_id,date(event_time) dtt
from tb_order_overall join tb_order_detail
using(order_id)
join tb_product_info using(product_id)
where status=1 and shop_id=901
) tb_sellcnt
on datediff(tb_3dayreli.dt,tb_sellcnt.dtt) between 0 and 6 #7天內
group by dt
出行場景(某滴打車)
SQL174 2021年國慶在北京接單3次及以上的司機統計信息【簡單】
select
city,
ROUND(AVG(order_cnt), 3) as avg_order_num,
ROUND(AVG(income_amount), 3) as avg_income
from
(
-- 2021國慶期間在北京接單3次以上的司機
SELECT
city,
driver_id,
count(t1.order_id) as order_cnt,
sum(fare) as income_amount
from tb_get_car_order as t1
join tb_get_car_record as t2
on t1.order_id = t2.order_id
where
date(order_time) between '2021-10-01' and '2021-10-07'
and city = '北京'
group by city, driver_id -- 去重
having order_cnt >= 3
) as t3
group by city;
-- 需要注意id一樣的時候用聚合函數(SUM,AVG)需要再用一次GROUP BY,否則會報錯;
SQL175 有取消訂單記錄的司機平均評分【簡單】
(
select driver_id as id,
round(avg(grade), 1) as avg_score
from tb_get_car_order
where driver_id in (
-- 有取消訂單的司機id
select driver_id
from tb_get_car_order
where date_format(order_time, '%Y-%m') = '2021-10'
and start_time is null
group by driver_id
)
group by driver_id
order by driver_id
)
union all
(
select "總體" as id,
round(avg(grade), 1) as avg_score
from tb_get_car_order
where driver_id in (
select driver_id
from tb_get_car_order
where date_format(order_time, '%Y-%m') = '2021-10'
and start_time is null
group by driver_id
)
)
SQL176 每個城市中評分最高的司機信息【中等】
select
city,
driver_id,
avg_grade,
cnt_order,
cnt_mile
from
(
select
b.city,
a.driver_id,
round(avg(grade), 1) as avg_grade, -- 平均評分
round(
count(a.order_id) / count(distinct substring(order_time, 1, 10)),
1
) as cnt_order, -- 日均接單量
round(
sum(mileage) / count(distinct substring(order_time, 1, 10)),
3
) as cnt_mile, -- 日均行駛里程數
dense_rank() over (
partition by
b.city
order by
avg(grade) desc
) as rk -- 按城市分區,按平均評分排序
from
tb_get_car_order a
left join tb_get_car_record b on a.order_id = b.order_id -- 以訂單表爲左表
group by
-- 去重
b.city,
a.driver_id
) t
where
rk = 1
order by
cnt_order
SQL177 國慶期間近7日日均取消訂單量【中等】
select
dt,
round(finish_num_7d,2) finish_num_7d,
round(cancel_num_7d,2) cancel_num_7d
from (
select
dt,
-- 從當前位置向前6天,每天的近7日
avg(order_done) over(order by dt rows 6 preceding) finish_num_7d,
avg(order_cancel) over(order by dt rows 6 preceding) cancel_num_7d
from (
select
date(order_time) as dt,
count(start_time) order_done, -- 訂單完成量 不爲null
sum(if(start_time is null,1,0)) order_cancel -- 訂單取消量
from tb_get_car_order
group by date(order_time)
) tb
) tb1
where dt between '2021-10-01' and '2021-10-03' -- 國慶前三天
order by dt
SQL178 工作日各時段叫車量、等待接單時間和調度時間【較難】
select period,
count(1) as get_car_num,
round(avg(wait_time/60), 1) as avg_wait_time, -- 秒轉分
round(avg(dispatch_time/60), 1) as avg_dispatch_time
from (
select event_time,
-- case when 打標籤
-- event_time-開始打車時間爲時段劃分依據
case
when hour(event_time) in (7, 8) then '早高峯'
when hour(event_time) between 9 and 16 then '工作時間'
when hour(event_time) in (17, 18, 19) then '晚高峯'
else '休息時間'
end as period,
timestampdiff(second, event_time, end_time) as wait_time, -- 等待時間
timestampdiff(second, order_time, start_time) as dispatch_time -- 調度時間
from tb_get_car_record r
join tb_get_car_order o ON r.order_id = o.order_id
where dayofweek(event_time) between 2 and 6
) as t_wait_dispatch_time
group by period
order by get_car_num;
-- timestampdiff()函數的作用是返回兩個日期時間之間的整數差
-- dayofweek 返回日期在本週的星期幾索引,星期天 = 1,星期一 = 2,星期六 = 7
SQL179 各城市最大同時等車人數 【較難】
select city, max(wait_uv) as max_wait_uv
from (
select city,
dt,
sum(uv) over (partition by city order by dt asc,uv desc) as wait_uv
from ( -- 開始打車
select city,
event_time as dt,
1 as uv
from tb_get_car_record
union all
-- 訂單取消 或結束
select city,
if(start_time is not null, start_time, end_time) as dt,
-1 as uv
from tb_get_car_record as cr
inner join tb_get_car_order as co
on cr.order_id=co.order_id
) as tb1
) as tb2
where date_format (dt, '%Y-%m')='2021-10'
group by city
order by max_wait_uv, city;
同時在線問題 union all 登錄登出 + 窗口排序
某寶店鋪分析(電商模式)
SQL180 某寶店鋪的SPU數量【簡單】
select
style_id,
count(*) as SPU_num
from
product_tb
group by
style_id
order by
SPU_num desc
SPU與SKU區別?
SQL181 某寶店鋪的實際銷售額與客單價【簡單】
select
sum(sales_price) as sales_total,
round(sum(sales_price) / count(distinct user_id), 2) as per_trans
from
sales_tb;
SQL182 某寶店鋪折扣率【中等】
即Gross Merchandise Volume,主要是指網站的成交金額,而這裏的成交金額包括:付款金額和未付款。類似題SQL168
1、GMV成交金額:累計結算金額,即 sum(sales_price)
2、吊牌總金額:產品數量*吊牌單價,即 sum(sales_num * tag_price)
select
round(100*sum(sales_price) / sum(sales_num*tag_price), 2) as 'discount_rate(%)'
from
sales_tb left join product_tb on sales_tb.item_id = product_tb.item_id
SQL183 某寶店鋪動銷率與售罄率【較難】
一、動銷率
Stock Keeping Unit (庫存量單位)。在很多業務場景下,是計算庫存進出計量,物理上不可分割的最小存貨單元,表示某款商品的具體配置(規格、顏色等)。
動銷率 = 有銷售的SKU數量 / 在售SKU數量
有銷售的SKU數量:售出的SKU數量總和(商品期間銷售數量)
在售的SKU數量 :剩餘(總庫存 - 售出)庫存SKU的數量總和(商品期末庫存數量)
二、售罄率
售罄率 = GMV / 備貨值(備貨值 = 吊牌價 * 庫存數)
GMV :所有訂單的金額總和(很多場景下,下單未付款的訂單金額也計算在內)
吊牌價 :商品詳情頁顯示的價格
問題:直接做連接做計算會導致有重複購買記錄的商品的inventory多次計算,計算結果會變小。 要將兩個表的計算分開進行。
select style_id,
round(100*sum(num)/ (sum(inventory)-sum(num)),2) as 'pin_rate(%)',
round(100*sum(item_GMV)/ sum(inventory*tag_price),2) as 'sell-through_rate(%)'
from product_tb a
join
(
select item_id,
sum(sales_num) as num,
sum(sales_price) as item_GMV
from sales_tb
group by item_id) as b
on a.item_id=b.item_id
group by style_id
order by style_id
SQL184 某寶店鋪連續2天及以上購物的用戶及其對應的天數【較難】
MySQL的date_sub用法:
語法:date_sub(date,interval expr type),函數從日期減去指定的時間間隔
date_sub('2019-07-27', interval 30 day)表示往前推30天
可以直接 用日期-數字,但最好別這麼勇
select
user_id,
count(diff) days_count
from
(
select
user_id,
date_sub(sales_date , interval row_number() over (partition by user_id order by sales_date) day )as diff
from
( -- 同一天可能多個用戶下單,進行去重
select
user_id,
sales_date
from
sales_tb
group by
user_id,
sales_date
) t1
) t2 -- 判斷一串日期是否連續:若連續,用這個日期減去它的排名,會得到一個相同的結果
group by
user_id,
diff
having
count(diff) >= 2 -- 連續下單大於等於兩天
order by
user_id
牛客直播課分析(在線教育行業)
SQL185 牛客直播轉換率【簡單】
select
btb.course_id,
course_name,
round(sum(if_sign) / sum(if_vw) * 100, 2) as sign_rate
from
course_tb ctb
join behavior_tb btb on ctb.course_id = btb.course_id
group by -- 去重
btb.course_id,
course_name
order by
btb.course_id
SQL186 牛客直播開始時各直播間在線人數【中等】
select
atb.course_id,
course_name,
count(*) as online_num
from
course_tb ctb
join attend_tb atb on ctb.course_id = atb.course_id
where '19:00' between date_format(in_datetime,'%H:%i') and date_format(out_datetime,'%H:%i')
group by
atb.course_id,
course_name
order by
atb.course_id
開播人數:用戶的進入時間在19點前(包含19點),離開時間在19點後(包含19點)
即,開播時間在進入後離開前,即 開播時間 between ‘進入時間’ and ‘離開時間’
時間的格式,h- i -s
SELECT DATE_FORMAT(NOW(),'%Y-%m-%d %H:%i:%s'); -- 結果:2020-12-07 22:18:58
SELECT DATE_FORMAT(NOW(),'%Y-%m-%d %H:%i'); -- 結果:2020-12-07 22:18
SELECT DATE_FORMAT(NOW(),'%Y-%m-%d %H'); -- 結果:2020-12-07 22
SELECT DATE_FORMAT(NOW(),'%Y-%m-%d'); -- 結果:2020-12-07
SELECT DATE_FORMAT(NOW(),'%H:%i:%s'); -- 結果:22:18:58
SELECT DATE_FORMAT(NOW(),'%H'); -- 結果:22
SQL187 牛客直播各科目平均觀看時長【中等】
select
course_name,
round(
avg(timestampdiff (minute, in_datetime, out_datetime)),
2
) as avg_Len
from
course_tb ctb
join attend_tb atb on ctb.course_id = atb.course_id
group by
course_name
order by
avg_Len desc
timestampdiff 求時間差 函數
語法: timestampdiff(unit(單位),begin,end)
begin和end可以爲DATE或DATETIME類型,並且可允許參數爲混合類型。
DATEDIFF函數返回date1 - date2的計算結果,date1和date2兩個參數需是有效的日期或日期時間值;如果參數傳遞的是日期時間值,DATEDIFF函數僅將日期部分用於計算,並忽略時間部分(只有值的日期部分參與計算)
SELECT DATEDIFF('2022-04-30','2022-04-29');-- 1
SELECT DATEDIFF('2022-04-30','2022-04-30');-- 0
SELECT DATEDIFF('2022-04-29','2022-04-30');-- -1
SELECT DATEDIFF('2022-04-30','2022-04-30 14:00:00');-- 0
SELECT DATEDIFF('2022-04-30 13:00:00','2022-04-29 14:00:00');-- 1
SELECT DATEDIFF('2017-06-25 09:34:21', '2017-06-15');-- 10
TIMESTAMPDIFF函數用於計算兩個日期的時間差
-- 相差:25秒
SELECT TIMESTAMPDIFF(SECOND,'2022-04-23 14:57:00','2022-04-23 14:57:25');
-- 相差:5分鐘
SELECT TIMESTAMPDIFF(MINUTE,'2022-04-23 15:00:00','2022-04-23 15:05:00');
-- 相差:5小時
SELECT TIMESTAMPDIFF(HOUR,'2022-04-23 11:20:00', '2022-04-23 16:20:00');
-- 相差:2天
SELECT TIMESTAMPDIFF(DAY,'2022-04-23 11:20:00', '2022-04-25 11:20:00');
-- 相差:1星期
SELECT TIMESTAMPDIFF(WEEK,'2022-04-23 11:20:00', '2022-04-30 11:20:00');
-- 相差:1月
SELECT TIMESTAMPDIFF(MONTH,'2022-04-23 11:20:00', '2022-05-30 11:20:00');
-- 相差:1季度
SELECT TIMESTAMPDIFF(QUARTER,'2022-04-23 11:20:00', '2022-07-23 11:20:00');
-- 相差:1年
SELECT TIMESTAMPDIFF(YEAR,'2022-04-23 11:20:00', '2023-04-23 11:20:00');
SQL188 牛客直播各科目出勤率【較難】
一開始想着這麼寫,後來發現attend_tb 中會有重複
SELECT
t1.course_id,
course_name,
ROUND(100 * t2.a / t1.b, 2) attend_rate
from
( -- 每個科目報名的人數
SELECT
course_id,
count(user_id) b
from
behavior_tb
where
if_sign = 1
group by
course_id
) t1
join (
-- 每個科目出勤(在線時長10分鐘及以上)人數
SELECT
course_id,
count(distinct user_id) a
from
attend_tb
where
TIMESTAMPDIFF (MINUTE, in_datetime, out_datetime) >= 10
group by
course_id
) t2 on t2.course_id = t1.course_id
join course_tb on t2.course_id = course_tb.course_id
ORDER BY
course_id
SQL189 牛客直播各科目同時在線人數【較難】
select
course_id,
course_name,
MAX(uv_cnt) max_num -- 最大同時在線人數
from
(
select
course_id,
course_name,
SUM(uv) OVER (
PARTITION BY
course_id
ORDER BY
dt,
uv DESC
) uv_cnt
from
( -- 登入時間爲1,登出時間爲-1
select
course_id,
user_id,
in_datetime dt,
1 as uv
from
attend_tb
union all
select
course_id,
user_id,
out_datetime dt,
-1 as uv
from
attend_tb
) uv_tb
JOIN course_tb USING (course_id)
) t1
GROUP BY
course_id,
course_name
ORDER BY
course_id;
某乎問答(內容行業)
SQL190 某乎問答11月份日人均回答量【簡單】
select
answer_date,
round(count(issue_id) / count(DISTINCT author_id), 2) per_num
from
answer_tb
where
month (answer_date) = 11
group by
answer_date
注意用戶id去重
SQL191 某乎問答高質量的回答中用戶屬於各級別的數量【中等】
select
case
when b.author_level in (1, 2) then '1-2級'
when b.author_level in (3, 4) then '3-4級'
when b.author_level in (5, 6) then '5-6級'
else ''
end as level_cut, -- 作爲新增的字段
count(issue_id) num
from
answer_tb a
left join author_tb b on a.author_id = b.author_id
where
char_len >= 100 -- 高質量回答
group by
level_cut
order by
num desc
case when 打標籤
SQL192 某乎問答單日回答問題數大於等於3個的所有用戶【中等】
select
answer_date,author_id,count(issue_id) as answer_cnt
from answer_tb
where
month (answer_date) = 11
group by
answer_date,author_id
having count(issue_id)>=3 -- 分組過濾
order by answer_date
SQL193 某乎問答回答過教育類問題的用戶裏有多少用戶回答過職場類問題【中等】
select
count(distinct author_id) num
from
issue_tb t1
join answer_tb t2 on t1.issue_id = t2.issue_id
where
issue_type = 'Education'
and author_id in (
-- 統計回答過教育類問題的用戶
select
author_id
from
issue_tb a
join answer_tb b on a.issue_id = b.issue_id
where
issue_type = 'Career'
)
SQL194 某乎問答最大連續回答問題天數大於等於3天的用戶及其對應等級【較難】【同時在線】
select
t3.author_id,
author_level,
days_cnt
from
(
select
author_id,
count(diff) days_cnt
from
(
select
author_id,
answer_date - row_number() over (
partition by
author_id
order by
answer_date
) diff
from
(
-- 按 用戶 回答時間 過濾
select
author_id,
answer_date
from
answer_tb
-- group by 去重
group by
author_id,
answer_date
) t1
) t2 -- 判斷一串日期是否連續:若連續,用這個日期減去它的排名,會得到一個相同的結果
group by
author_id,
diff
having
count(diff) >= 3 -- 連續下單大於等於兩天
order by
author_id
) t3
join author_tb on t3.author_id = author_tb.author_id