-- 二刷:
-- 1082:方法2也可以處理並列問題
-- 1083:方法1和方法2(透視表方法)都不錯
-- 1084:方法2很重要,注意三表左連接的方式
-- 1097:方法3真的很厲害
-- 1098:方法1依舊可以改進(可以少用一個not in)
-- 1107:方法2中一定要有條件where activity='login'
-- 1112:看一下方法2中處理並列course的角度
-- 1127:方法4和方法5,尤其方法5中轉化表的方式值得學習
-- 1148:注意group by可以替代distinct
1075. 項目員工 I
簡單題目,重點注意小數點控制方法即可
SELECT project_id, round(avg(experience_years),2) average_years
FROM
project p,employee e
WHERE p.employee_id=e.employee_id
GROUP by project_id
1076. 項目員工II
考慮並列的情況,自己對這種情況掌握還不夠熟練
SELECT project_id
FROM
project p
GROUP by project_id
having count(*)=
(SELECT count(*) cnt
FROM
project
GROUP by project_id
ORDER BY cnt desc
limit 1)
1077. 項目員工 III
# 自己的方法(我認爲這個方法 最好) 但是沒有跑通,好像是LeetCode排序有問題
# 該方法後續待定,好好思考一下 哪裏不對
# 這個邏輯有一點問題: 假設第五名和第六名的人數相同,那麼第六名也會標記爲1
# 最後確定:是三個表的原因導致排序不穩定,可能方法2中多加了一個子查詢是有道理的!
SELECT project_id,employee_id
FROM
(SELECT project_id,e.employee_id, (case when @pro=project_id and @year=experience_years then 1
when @pro=project_id then 0
else 1 end) cnt, @pro := project_id, @year := experience_years
FROM
project p,employee e,(SELECT @pro:=null, @year:=null) t
WHERE p.employee_id=e.employee_id
ORDER BY project_id asc, experience_years desc) t1
WHERE cnt=1;
# 方法2:0.5177 思路(確定project_id和並列的年限就可以啦!)
SELECT DISTINCT P.project_id, P.employee_id FROM project P INNER JOIN Employee E
ON P.employee_id = E.employee_id
INNER JOIN
(SELECT T1.project_id, T1.experience_years FROM(
SELECT
IF(@project = e.project_id,@rank := @rank +1,@rank := 1) AS rank,
@project := e.project_id,
e.*
FROM
(SELECT
@project := NULL,@rank := 0) r,
(SELECT P.project_id, P.employee_id, E.experience_years FROM Project P
INNER JOIN Employee E ON P.employee_id = E.employee_id) e
ORDER BY e.project_id ASC,e.experience_years DESC) T1
WHERE T1.rank=1) T2
ON E.experience_years = T2.experience_years AND P.project_id = T2.project_id
ORDER BY P.project_id, P.employee_id;
# 方法3:對方法2的改進 一個思路0.9624
SELECT p.project_id,e.employee_id
FROM
project p inner join employee e
ON
p.employee_id=e.employee_id
and
(p.project_id,e.experience_years) in
(select p.project_id,max(experience_years)
from
project p,employee e
where
p.employee_id=e.employee_id
group by p.project_id)
1082. 銷售分析 I
處理並列問題,大概就這兩種方法
# 方法1 0.9536
select seller_id
from sales
group by seller_id
having sum(price)=
(
select sum(price) price
from sales
group by seller_id
order by price desc
limit 1
);
# 方法2 0.6954(可能不準)
select
seller_id
from sales
group by seller_id
having
sum(price)>=all(select sum(price) from sales group by seller_id )
1083. 銷售分析 II
# 自己的方法邏輯很差! 0.8700
select distinct buyer_id
from
product p join sales s
on p.product_id=s.product_id
where product_name='S8' and buyer_id not in (
select buyer_id
from product p join sales s
on p.product_id=s.product_id
where product_name='iPhone')
# 方法2: 數據透視表方法,邏輯清晰0.6426(雖然顯示效果不好,但是我覺得不錯)
SELECT a.buyer_id
FROM (
SELECT buyer_id, MAX(CASE
WHEN product_name = 'S8' THEN 1
ELSE 0
END) AS s8, MAX(CASE
WHEN product_name = 'iPhone' THEN 1
ELSE 0
END) AS iphone
FROM Sales a
LEFT JOIN Product b ON a.product_id = b.product_id
GROUP BY buyer_id
) a
WHERE s8 = 1
AND iphone=0
1084. 銷售分析III
# 自己的方法 性能差 0.2394
select distinct p.product_id,p.product_name
from
product p join sales s
on p.product_id=s.product_id
where sale_date between '2019-01-01' and '2019-03-31'
and p.product_id not in
(select product_id from sales where sale_date <'2019-01-01'
union all
select product_id from sales where sale_date >'2019-03-31')
order by p.product_id;
# 方法2 94.36
select a.product_id
, Product.product_name
from
(
select distinct product_id
from Sales
where sale_date between date('2019-01-01') and date('2019-03-31')
)a
left join
(
select distinct product_id
from Sales
where sale_date<date('2019-01-01')
or sale_date>date('2019-03-31')
)b
on a.product_id = b.product_id
left join Product
on Product.product_id = a.product_id
where b.product_id is null
1097. 遊戲玩法分析 V
# 自己第一次就寫對 0.7164
select t2.event_date install_dt, count(*) installs, round(count(a2.event_date)/count(*), 2) Day1_retention
from
(select player_id,event_date, (case when @player=player_id then @cnt:=@cnt+1
when @player:=player_id then @cnt:=1 end) cnt
from
activity a1,(select @cnt:=null, @player:=null) t1
order by player_id,event_date) t2
left join activity a2 on t2.player_id=a2.player_id and t2.event_date=a2.event_date-1
where t2.cnt=1
group by t2.event_date
# 方法2 其實獲得最小日期 也可以這麼做 0.7537
select a.install_dt,
count(a.player_id) as installs,
round(count(b.player_id)/ count(a.player_id),2) as Day1_retention
from
(
select player_id,min(event_date) as install_dt
from Activity
group by player_id
)
as a left join Activity as b
on a.player_id=b.player_id and b.event_date-a.install_dt=1
group by a.install_dt
# 牛逼的方法 直接沒有使用join 0.9925 重點學習
# 最內層的臨時表中,大佬比我多取了一列(上一次玩遊戲的日期)
# 巧妙的把昨天 轉換爲 上一天!
SELECT event_date AS install_dt, COUNT(*) AS installs
, round(SUM(isCalc) / COUNT(*), 2) AS Day1_retention
FROM (
SELECT a.player_id, MIN(a.event_date) AS event_date, MAX(CASE WHEN a.rk = 2 AND DATE_ADD(lastDate, INTERVAL 1 DAY) = event_date
THEN 1 ELSE 0 END) AS isCalc
FROM
(SELECT a.player_id, a.event_date, a.lastDate, a.rk
FROM (
SELECT a.player_id, a.event_date, @preDate AS lastDate, CASE WHEN @prePid = a.player_id THEN @curRank := @curRank + 1
ELSE @curRank := 1 END AS rk, @prePid := player_id, @preDate := event_date
FROM Activity a, (SELECT @curRank := 0, @prePid := NULL, @preDate := NULL) r
ORDER BY a.player_id, a.event_date) a
WHERE a.rk = 1 OR a.rk = 2) a
GROUP BY a.player_id
) a
GROUP BY a.event_date;
1098. 小衆書籍
本題遺漏了要求!是考慮最近一年內的訂單!
# 0.9629 感覺邏輯很好,但是感覺not in還可以改進
select book_id,name
from Books
where book_id not in
(select b.book_id from Orders b where b.dispatch_date > '2018-06-23' and b.dispatch_date < '2019-06-23'
group by b.book_id having sum(b.quantity) >= 10)
and book_id not in
(select a.book_id from Books a where datediff('2019-06-23', a.available_from) < 30);
# 方法2 自己做的改進,但只有0.47 考慮一下爲什麼
select a.book_id,name
from
(select book_id, name from Books where available_from < '2019-05-23' ) a
left join
(select book_id, quantity from orders where dispatch_date>'2018-06-23' ) b
on a.book_id=b.book_id
group by a.book_id
having sum(quantity) < 10 or sum(quantity) is null
1107. 每日新用戶統計
理解一個地方,這裏的90天是指2019-06-30之前的90天。
# 方法1 這個邏輯本來是沒問題的(如果這是用戶所有的數據的話)
# 問題是 這個用戶第一條數據不一定就是login(我確認過了,有的第一條數據是logout..)
# 爭取完善一下這個思路
SELECT activity_date login_date, count(*) user_count
FROM
(SELECT activity, activity_date, if(@id=user_id,@cnt:=@cnt+1,@cnt:=1) cnt, @id:=user_id
FROM traffic,(SELECT @id:=0, @cnt:=0)t1
ORDER BY user_id,activity_date) t2
WHERE cnt=1 and activity_date >= Subdate('2019-06-30',INTERVAL 90 DAY)
GROUP BY activity_date
# 方法2 0.7292
SELECT login_date, COUNT(user_id) AS user_count
FROM
(SELECT user_id, MIN(activity_date) AS login_date
FROM Traffic
WHERE activity = 'login'
GROUP BY user_id) tmp
WHERE login_date >= Subdate('2019-06-30',INTERVAL 90 DAY)
GROUP BY login_date
ORDER BY login_date
# 受啓發與方法2 修改方法1 0.5280
SELECT activity_date login_date, count(*) user_count
FROM
(SELECT activity, activity_date, if(@id=user_id,@cnt:=@cnt+1,@cnt:=1) cnt, @id:=user_id
FROM traffic,(SELECT @id:=0, @cnt:=0)t1
WHERE activity='login' # 增添內容
ORDER BY user_id,activity_date) t2
WHERE cnt=1 and activity_date >= Subdate('2019-06-30',INTERVAL 90 DAY)
GROUP BY activity_date
1112. 每位學生的最高成績
# 個人方法 100%
SELECT student_id,course_id,grade
FROM
(SELECT student_id,course_id,grade, if(@id=student_id,@cnt:=@cnt+1,@cnt:=1) cnt, @id:=student_id
FROM Enrollments,(SELECT @id:=null, @cnt:=null) t1
ORDER BY student_id,grade desc,course_id) t2
WHERE cnt=1
ORDER BY student_id
# 方法2 0.3265,注意其中的‘MIN(course_id)’ 處理並列的角度
SELECT student_id, MIN(course_id) AS course_id, grade
FROM
Enrollments
WHERE (student_id, grade) IN (SELECT student_id, MAX(grade)
FROM Enrollments
GROUP BY student_id)
GROUP BY student_id
ORDER BY student_id
1113. Reported Posts
表沒有主鍵,因此可能會存在重複值。列action是枚舉類型,取值可能爲(‘view’, ‘like’, ‘reaction’, ‘comment’, ‘report’, ‘share’)。列extra是關於action的可選信息,例如報道的原因或者行動的類型。
需要查詢的是post_id不重複的個數。。沒讀懂英語
# 這是一道簡單題,不需要過分關注
SELECT extra AS report_reason,
COUNT(DISTINCT post_id) AS report_count
FROM Actions
WHERE action_date = '2019-07-4' AND action = 'report'
GROUP BY extra
1126. Active Businesses
總結一下:這道題就是查找牛逼的企業(至少有 兩項過硬的業務)
# 方法1 0.6970
SELECT business_id
FROM
events a
join
(SELECT event_type, avg(occurences) avg
FROM events group by event_type) b
ON a.event_type=b.event_type and a.occurences>b.avg
group by business_id
having count(*)>1
1127. User Purchase Platform
該表格記錄了用戶的在線購物網站購物的消費歷史。
編寫一個SQL查詢來查找每個日期的用戶總數和僅使用移動設備、僅使用桌面設備以及同時使用移動設備和桌面設備的總開銷。
# 自己第一種方法(錯),自己想想有多蠢。。。
SELECT spend_date,platform,sum(amount) tltal_amount, count(distinct user_id) total_users
FROM
spending
GROUP by spend_date,platform
# 個人第二種方法,但是沒法輸出:' 2019-07-02 | both | 0 | 0 '
SELECT spend_date,platform,sum(amount) tltal_amount, count(distinct user_id) total_users
FROM
(SELECT user_id,spend_date,'both' platform,sum(amount) amount
FROM
spending
GROUP by spend_date,user_id
having count(*)=2
union all
SELECT user_id,spend_date, platform,sum(amount) amount
FROM
spending
GROUP by spend_date,user_id
having count(*)=1) t
GROUP by spend_date,platform
ORDER BY platform
# 方法3 不知道哪裏錯了,而且思路有點混亂(不建議)
SELECT spend_date,platform,ifnull(sum(amount),0) tltal_amount, count(amount) total_users
FROM
(SELECT user_id,spend_date,'both' platform,if(count(*)=1,null,sum(amount)) amount
FROM
spending
GROUP by spend_date,user_id
union all
SELECT user_id,spend_date, platform,sum(amount) amount
FROM
spending
GROUP by spend_date,user_id
having count(*)=1) t
GROUP by spend_date,platform
# 方法4 0.97 邏輯非常清晰
# 整體上分爲兩個表連接,其中第二個表中嵌套兩層子查詢。
# 最內容等於做了一個分類,之後完成表的轉換(platform由兩類變爲三類)
SELECT a.spend_date, a.platform, ifnull(b.total_amount, 0) AS total_amount
, ifnull(b.total_users, 0) as total_users
FROM (
SELECT *
FROM
(SELECT 'desktop' AS platform
FROM dual
UNION
SELECT 'mobile' AS platform
FROM dual
UNION
SELECT 'both' AS platform
FROM dual) a
CROSS JOIN
(SELECT DISTINCT spend_date
FROM Spending) b
) a
LEFT JOIN
(SELECT a.spend_date, a.platform, SUM(allAmount) AS total_amount, COUNT(*) AS total_users
FROM
(SELECT a.user_id, a.spend_date, allAmount, CASE WHEN mobile = 1 AND desktop = 1 THEN 'both'
WHEN desktop = 1 THEN 'desktop'
ELSE 'mobile' END AS platform
FROM
(SELECT a.user_id, a.spend_date, MAX(CASE WHEN platform = 'mobile' THEN 1 ELSE 0 END) AS mobile
, MAX(CASE WHEN platform = 'desktop' THEN 1 ELSE 0 END) AS desktop
, SUM(amount) AS allAmount
FROM Spending a
GROUP BY a.user_id, a.spend_date) a ) a
GROUP BY a.spend_date, a.platform) b
ON a.platform = b.platform AND a.spend_date = b.spend_date;
# 方法5 修改了‘轉換表的方式’ 100% 但是和第一種方法應該差不多
SELECT a.spend_date, a.platform, ifnull(b.total_amount, 0) AS total_amount
, ifnull(b.total_users, 0) as total_users
FROM (
SELECT *
FROM
(SELECT 'desktop' AS platform
FROM dual
UNION
SELECT 'mobile' AS platform
FROM dual
UNION
SELECT 'both' AS platform
FROM dual) a
CROSS JOIN
(SELECT DISTINCT spend_date
FROM Spending) b
) a
LEFT JOIN
(SELECT a.spend_date, a.platform, SUM(allAmount) AS total_amount, COUNT(*) AS total_users
FROM
(SELECT a.user_id, a.spend_date, CASE WHEN COUNT(DISTINCT platform) = 2 THEN 'both'
ELSE MAX(platform) END AS platform, SUM(amount) AS allAmount
FROM Spending a
GROUP BY a.user_id, a.spend_date ) a
GROUP BY a.spend_date, a.platform) b
ON a.platform = b.platform AND a.spend_date = b.spend_date;
# 僅僅依靠下面的代碼就完成了表的轉換
SELECT a.user_id, a.spend_date, CASE WHEN COUNT(DISTINCT platform) = 2 THEN 'both'
ELSE MAX(platform) END AS platform, SUM(amount) AS allAmount
FROM Spending a
GROUP BY a.user_id, a.spend_date
1132. Reported Posts II
查詢每天被診斷爲垃圾郵件後的刪除率的平均值,並保留兩位小數。
# 第一次方法 0.2688
SELECT round((avg(average_daily_percent)*100),2) average_daily_percent
FROM
(SELECT count( DISTINCT b.post_id)/count(DISTINCT a.post_id) average_daily_percent
FROM
Actions a left join removals b
ON a.post_id=b.post_id
WHERE extra='spam'
GROUP by a.action_date) T
1142. 過去30天的用戶活動 II
select ifnull(round(avg(number),2),0.00) average_sessions_per_user
from
(select user_id, count(distinct session_id) number
from activity
where activity_date > '2019-06-27' and activity_date <='2019-07-27'
group by user_id ) t
這是一道簡單題,但是很久沒做出來,因爲忽略了一個重要條件:請注意,每個會話完全屬於一個用戶。
1148. 文章瀏覽 I
# 注意group by方法 也可以去掉重複值,而且比distinct更好
# 方法1 0.84
select distinct viewer_id id
from views
where viewer_id=author_id
order by
# 方法2 0.92
select viewer_id as id
from Views
where viewer_id = author_id
group by viewer_id
order by viewer_id