1. 計算歌曲完播率
請根據 user_listen_record、song_library計算出QQ音樂20230306歌曲完播率(播放時長>=聽歌時長)輸出表結構如下,其中完播率保留小數點後2位小數並按照完播率重小到大排序:
song_playback_history
已知QQ音樂部分用戶聽歌流水錶格式和樣例數據如下: user_listen_record 其中ftime爲數據分區時間,uin爲用戶賬號(唯一標識),os_type爲設備端分類,song_id爲歌曲id,app_ver爲應用版本,play_duration爲聽歌時長(秒)
曲庫信息表:song_library 其中song_id爲歌曲id(唯一標識),song_name歌曲名稱,duration爲歌曲時長(秒),artist_id爲歌手id,artist_name爲歌手名
示例1
輸入例子:
-- ----------------------------
-- Table structure for user_listen_record
-- ----------------------------
DROP TABLE IF EXISTS `user_listen_record`;
CREATE TABLE `user_listen_record` (
`ftime` bigint(20) DEFAULT NULL,
`uin` varchar(255) DEFAULT NULL,
`os_type` varchar(255) DEFAULT NULL,
`song_id` bigint(20) DEFAULT NULL,
`app_ver` varchar(255) DEFAULT NULL,
`play_duration` bigint(20) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
-- ----------------------------
-- Records of user_listen_record
-- ----------------------------
BEGIN;
INSERT INTO `user_listen_record` (`ftime`, `uin`, `os_type`, `song_id`, `app_ver`, `play_duration`) VALUES (20230306, 'a', 'ios', 1001, '10.0.1', 140);
INSERT INTO `user_listen_record` (`ftime`, `uin`, `os_type`, `song_id`, `app_ver`, `play_duration`) VALUES (20230306, 's', 'android', 1001, '10.0.1', 170);
INSERT INTO `user_listen_record` (`ftime`, `uin`, `os_type`, `song_id`, `app_ver`, `play_duration`) VALUES (20230306, 'm', 'ios', 1003, '10.0.5', 100);
INSERT INTO `user_listen_record` (`ftime`, `uin`, `os_type`, `song_id`, `app_ver`, `play_duration`) VALUES (20230306, 'u', 'android', 1004, '10.0.1', 229);
INSERT INTO `user_listen_record` (`ftime`, `uin`, `os_type`, `song_id`, `app_ver`, `play_duration`) VALUES (20230306, 'm', 'ios', 1002, '10.0.5', 230);
INSERT INTO `user_listen_record` (`ftime`, `uin`, `os_type`, `song_id`, `app_ver`, `play_duration`) VALUES (20230306, 'a', 'ios', 1003, '10.0.1', 257);
INSERT INTO `user_listen_record` (`ftime`, `uin`, `os_type`, `song_id`, `app_ver`, `play_duration`) VALUES (20230306, 'u', 'android', 1001, '10.0.1', 290);
INSERT INTO `user_listen_record` (`ftime`, `uin`, `os_type`, `song_id`, `app_ver`, `play_duration`) VALUES (20230306, 's', 'android', 1003, '10.0.1', 170);
INSERT INTO `user_listen_record` (`ftime`, `uin`, `os_type`, `song_id`, `app_ver`, `play_duration`) VALUES (20230306, 'a', 'ios', 1004, '10.0.1', 229);
COMMIT;
DROP TABLE IF EXISTS `song_library`;
CREATE TABLE `song_library` (
`song_id` bigint(20) DEFAULT NULL,
`song_name` varchar(255) DEFAULT NULL,
`duration` bigint(20) DEFAULT NULL,
`artist_id` bigint(20) DEFAULT NULL,
`artist_name` varchar(255) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
-- ----------------------------
-- Records of song_library
-- ----------------------------
BEGIN;
INSERT INTO `song_library` (`song_id`, `song_name`, `duration`, `artist_id`, `artist_name`) VALUES (1001, '七里香', 297, 1, '周杰倫');
INSERT INTO `song_library` (`song_id`, `song_name`, `duration`, `artist_id`, `artist_name`) VALUES (1002, '逆戰', 230, 235, '張傑');
INSERT INTO `song_library` (`song_id`, `song_name`, `duration`, `artist_id`, `artist_name`) VALUES (1003, '烏梅子醬', 257, 23, '李榮浩');
INSERT INTO `song_library` (`song_id`, `song_name`, `duration`, `artist_id`, `artist_name`) VALUES (1004, '倒數', 229, 25, '鄧紫棋');
COMMIT;
輸出例子:
ftime|song_id|song_name|play_comp_rate
20230306|1002|逆戰|1.00
20230306|1004|倒數|1.00
20230306|1003|烏梅子醬|0.33
20230306|1001|七里香|0.00
答案
select
ftime,
U.song_id as song_id,
song_name,
round(
avg(if (play_duration >= duration, 1, 0)),
2
) as play_comp_rate
from
user_listen_record U
join song_library S on U.song_id = S.song_id
where ftime = '20230306'
group by
ftime,
U.song_id,
song_name
order by
play_comp_rate desc
2. 聽歌時長前3名
請根據 user_listen_record按照每個用戶對歌曲的聽歌時長,排出每個用戶播放前3名歌曲(相同排名取song_id更小的歌曲),最後結果按用戶賬號(uin)從大到小,排名從1到3排序,輸出表結構如下:
user_play_rank
輸出例子:
uin|song_id|rank
u|1001|1
u|1004|2
s|1001|1
s|1003|2
m|1002|1
m|1003|2
a|1003|1
a|1004|2
a|1001|3
答案代碼:
SELECT
uin,
song_id,
rank
FROM
(
SELECT
uin,
U.song_id,
row_number() over ( PARTITION BY uin ORDER BY total_time ) AS rank
FROM
(
SELECT
uin,
sum(play_duration) AS total_time,
U.song_id AS song_id
FROM
user_listen_record U
JOIN song_library S ON U.song_id = S.song_id
GROUP BY
uin,
U.song_id
) t1
) t2
WHERE
rank <= 3
3. 每個月Top3的周杰倫歌曲
輸入例子:
drop table if exists play_log;
create table `play_log` (
`fdate` date,
`user_id` int,
`song_id` int
);
insert into play_log(fdate, user_id, song_id)
values
('2022-01-08', 10000, 0),
('2022-01-16', 10000, 0),
('2022-01-20', 10000, 0),
('2022-01-25', 10000, 0),
('2022-01-02', 10000, 1),
('2022-01-12', 10000, 1),
('2022-01-13', 10000, 1),
('2022-01-14', 10000, 1),
('2022-01-10', 10000, 2),
('2022-01-11', 10000, 3),
('2022-01-16', 10000, 3),
('2022-01-11', 10000, 4),
('2022-01-27', 10000, 4),
('2022-02-05', 10000, 0),
('2022-02-19', 10000, 0),
('2022-02-07', 10000, 1),
('2022-02-27', 10000, 2),
('2022-02-25', 10000, 3),
('2022-02-03', 10000, 4),
('2022-02-16', 10000, 4);
drop table if exists song_info;
create table `song_info` (
`song_id` int,
`song_name` varchar(255),
`singer_name` varchar(255)
);
insert into song_info(song_id, song_name, singer_name)
values
(0, '明明就', '周杰倫'),
(1, '說好的幸福呢', '周杰倫'),
(2, '江南', '林俊杰'),
(3, '大笨鐘', '周杰倫'),
(4, '黑鍵', '林俊杰');
drop table if exists user_info;
create table `user_info` (
`user_id` int,
`age` int
);
insert into user_info(user_id, age)
values
(10000, 18)
輸出例子:
month|ranking|song_name|play_pv
1|1|明明就|4
1|2|說好的幸福呢|4
1|3|大笨鐘|2
2|1|明明就|2
2|2|說好的幸福呢|1
2|3|大笨鐘|1
例子說明:
1月被18-25歲用戶播放次數最高的三首歌爲“明明就”、“說好的幸福呢”、“大笨鐘”,“明明就”和“說好的幸福呢”播放次數相同,排名先後由兩者的song_id先後順序決定。2月同理。
答案:
select
month,
ranking,
song_name,
play_pv
from
(
select
month,
row_number() over (
partition by
month
order by
play_pv desc,
song_id
) as ranking,
song_name,
play_pv
from
(
select
month (fdate) as month,
song_name,
PS.song_id as song_id,
count(*) as play_pv
from
play_log PS
join song_info S on PS.song_id = S.song_id
join user_info U on PS.user_id = U.user_id
where
year (fdate) = 2022
and age >= 18
and age <= 25
and singer_name = '周杰倫'
group by
month,
song_name,
PS.song_id
) t1
) t2
where
ranking < 4
4. 語種播放量前三高所有歌曲
表:songplay
+--------------+---------+
| Column Name | Type |
+--------------+---------+
| id | int |
| playcnt | int |
|languageid | int |
+--------------+---------+
id是該表的主鍵列。
languageid是songplay表中ID的外鍵。
該表的每一行都表示歌曲的ID、播放量,語種id。
表: language
+-------------+---------+
| Column Name | Type |
+-------------+---------+
| id | int |
| name | varchar |
+-------------+---------+
Id是該表的主鍵列。
該表的每一行表示語種ID和語種名。
示例1
輸入例子:
drop table if exists songplay;
create table `songplay`(
`id` int,
`playcnt` int,
`languageid` int
);
insert into songplay
values(1,85001,1);
insert into songplay
values(2,80001,2);
insert into songplay
values(3,60001,2);
insert into songplay
values(4,90001,1);
insert into songplay
values(5,69001,1);
insert into songplay
values(6,85001,1);
insert into songplay
values(7,70001,1);
drop table if exists language;
create table `language`(
`id` int,
`name` varchar(255)
);
insert into language
values(1,'中文');
insert into language
values(2,'英文');
輸出例子:
language_name|songid|playcnt
中文|4|90001
中文|1|85001
中文|6|85001
中文|7|70001
英文|2|80001
英文|3|60001
代碼:
開始搞錯思路了,看這個樣例還以爲是求播放總量前三的語種下的所有歌曲,實際上是求每個語種播放量前三的歌曲(存在並列現象)
select
language_name,
songid,
playcnt
from
(
select
songplay.id as songid,
name as language_name,
playcnt,
dense_rank() over (
PARTITION BY
languageid
ORDER BY
playcnt desc
) AS rk
from
songplay
join language on songplay.languageid = language.id
)t1
where rk <=3
5. 最長連續登錄天數
你正在搭建一個用戶活躍度的畫像,其中一個與活躍度相關的特徵是“最長連續登錄天數”, 請用SQL實現“2023年1月1日-2023年1月31日用戶最長的連續登錄天數”
示例1
輸入例子:
drop table if exists tb_dau;
create table `tb_dau` (
`fdate` date,
`user_id` int
);
insert into tb_dau(fdate, user_id)
values
('2023-01-01', 10000),
('2023-01-02', 10000),
('2023-01-04', 10000);
輸出例子:
user_id|max_consec_days
10000|2
例子說明:
id爲10000的用戶在1月1日及1月2日連續登錄2日,1月4日登錄1日,故最長連續登錄天數爲2日
答案
SELECT
user_id,
max(consecutive_day) AS max_consec_days
FROM
(
SELECT
user_id,
count(diff) AS consecutive_day
FROM
(
SELECT
user_id,
fdate - rn AS diff
FROM
(
SELECT
user_id,
fdate,
row_number() over (PARTITION BY user_id ORDER BY fdate) AS rn
FROM
tb_dau
) t1
) t2
GROUP BY
user_id,
diff
) t3
GROUP BY
user_id
6.SQL實現文本處理
現有試卷信息表examination_info(exam_id試卷ID, tag試卷類別, difficulty試卷難度, duration考試時長):
+----+---------+------------------+------------+----------+---------------------+
| id | exam_id | tag | difficulty | duration |
+----+---------+------------------+------------+----------+---------------------+
| 1 | 9001 | 算法 | hard | 60 |
| 2 | 9002 | 算法 | hard | 80 |
| 3 | 9003 | SQL | medium | 70 |
| 4 | 9004 | 算法,medium,80 | | 0 |
+----+---------+------------------+------------+----------+---------------------+
錄題同學有一次手誤將部分記錄的試題類別tag、難度、時長同時錄入到了tag字段,
請幫忙找出這些錄錯了的記錄,並拆分後按正確的列類型輸出。
由示例數據結果輸出如下:
+---------+--------+------------+----------+
| exam_id | tag | difficulty | duration |
+---------+--------+------------+----------+
| 9004 | 算法 | medium | 80 |
+---------+--------+------------+----------+
示例1
輸入例子:
drop table if exists examination_info,exam_record;
CREATE TABLE examination_info (
id int PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
exam_id int UNIQUE NOT NULL COMMENT '試卷ID',
tag varchar(32) COMMENT '類別標籤',
difficulty varchar(8) COMMENT '難度',
duration int NOT NULL COMMENT '時長',
release_time datetime COMMENT '發佈時間'
)CHARACTER SET utf8 COLLATE utf8_general_ci;
INSERT INTO examination_info(exam_id,tag,difficulty,duration,release_time) VALUES
(9001, '算法', 'hard', 60, '2020-01-01 10:00:00'),
(9002, '算法', 'hard', 80, '2020-01-01 10:00:00'),
(9003, 'SQL', 'medium', 70, '2020-01-01 10:00:00'),
(9004, '算法,medium,80','', 0, '2020-01-01 10:00:00');
輸出例子:
exam_id|tag|difficulty|duration
9004|算法|medium|80
答案代碼
牛客原題:https://www.nowcoder.com/practice/a5475ed3b5ab4de58e2ea426b4b2db76
SELECT
exam_id,
-- 查找字段tag中','這個字符的每一個位置並排序,截取第一個','向左所有的字符。
substring_index (tag, ',', 1) as tag,
-- difficult在中間位置,需要截取2次
substring_index (substring_index (tag, ',', 2), ',', -1) as difficult,
-- 查找字段tag中','這個字符的每一個位置並排序,截取最後1個','向右所有的字符。並且轉換數據格式。
substring_index (tag, ',', -1) as duration
from
examination_info
where
-- 定位到出現串列的數據
tag like '%,%'