需求背景:
訂單表中有每筆訂單的下單時間、用戶ID、訂單金額等信息,需要統計每個月在接下來幾個月用戶復購情況。
create table order_info(
order_id int primary key,
user_id int,
amount decimal(10,2),
create_time datetime
);
insert into order_info values
(101,11211,749.00,'2020-01-01 00:04:00'),
(102,75205,939.00,'2020-01-05 09:15:00'),
(103,81384,349.00,'2020-01-08 22:19:00'),
(104,50437,687.00,'2020-01-11 22:17:00'),
(105,30321,658.00,'2020-01-12 22:18:00'),
(106,49811,355.00,'2020-01-16 22:18:00'),
(107,30352,363.00,'2020-01-19 22:24:00'),
(108,30362,435.00,'2020-01-22 22:18:00'),
(109,30363,270.00,'2020-01-27 22:19:00'),
(110,30324,552.00,'2020-01-30 22:22:00'),
(111,11211,692.00,'2020-02-04 08:35:00'),
(112,75205,536.00,'2020-02-09 11:03:00'),
(113,81384,478.00,'2020-02-13 09:32:00'),
(114,30362,675.00,'2020-02-17 11:18:00'),
(115,30363,723.00,'2020-02-20 08:47:00'),
(116,30324,914.00,'2020-02-21 10:48:00'),
(117,49262,444.00,'2020-02-24 18:35:00'),
(118,12074,617.00,'2020-02-29 20:16:00'),
(119,50437,911.00,'2020-03-02 12:35:00'),
(120,30321,695.00,'2020-03-14 23:53:00'),
(121,12074,275.00,'2020-03-15 15:38:00'),
(122,81384,1066.00,'2020-03-18 16:49:00'),
(123,30362,431.00,'2020-03-23 09:01:00'),
(124,27727,687.00,'2020-03-25 15:38:00'),
(125,27727,858.00,'2020-03-28 17:44:00');
select * from order_info;
+----------+---------+---------+---------------------+
| order_id | user_id | amount | create_time |
+----------+---------+---------+---------------------+
| 101 | 11211 | 749.00 | 2020-01-01 00:04:00 |
| 102 | 75205 | 939.00 | 2020-01-05 09:15:00 |
| 103 | 81384 | 349.00 | 2020-01-08 22:19:00 |
| 104 | 50437 | 687.00 | 2020-01-11 22:17:00 |
| 105 | 30321 | 658.00 | 2020-01-12 22:18:00 |
| 106 | 49811 | 355.00 | 2020-01-16 22:18:00 |
| 107 | 30352 | 363.00 | 2020-01-19 22:24:00 |
| 108 | 30362 | 435.00 | 2020-01-22 22:18:00 |
| 109 | 30363 | 270.00 | 2020-01-27 22:19:00 |
| 110 | 30324 | 552.00 | 2020-01-30 22:22:00 |
| 111 | 11211 | 692.00 | 2020-02-04 08:35:00 |
| 112 | 75205 | 536.00 | 2020-02-09 11:03:00 |
| 113 | 81384 | 478.00 | 2020-02-13 09:32:00 |
| 114 | 30362 | 675.00 | 2020-02-17 11:18:00 |
| 115 | 30363 | 723.00 | 2020-02-20 08:47:00 |
| 116 | 30324 | 914.00 | 2020-02-21 10:48:00 |
| 117 | 49262 | 444.00 | 2020-02-24 18:35:00 |
| 118 | 12074 | 617.00 | 2020-02-29 20:16:00 |
| 119 | 50437 | 911.00 | 2020-03-02 12:35:00 |
| 120 | 30321 | 695.00 | 2020-03-14 23:53:00 |
| 121 | 12074 | 275.00 | 2020-03-15 15:38:00 |
| 122 | 81384 | 1066.00 | 2020-03-18 16:49:00 |
| 123 | 30362 | 431.00 | 2020-03-23 09:01:00 |
| 124 | 27727 | 687.00 | 2020-03-25 15:38:00 |
| 125 | 27727 | 858.00 | 2020-03-28 17:44:00 |
+----------+---------+---------+---------------------+
解析思路:
第一步:查詢每個月下過單的用戶
select month(create_time) as dt,user_id
from order_info
group by month(create_time),user_id;
+------+---------+
| dt | user_id |
+------+---------+
| 1 | 11211 |
| 1 | 75205 |
| 1 | 81384 |
| 1 | 50437 |
| 1 | 30321 |
| 1 | 49811 |
| 1 | 30352 |
| 1 | 30362 |
| 1 | 30363 |
| 1 | 30324 |
| 2 | 11211 |
| 2 | 75205 |
| 2 | 81384 |
| 2 | 30362 |
| 2 | 30363 |
| 2 | 30324 |
| 2 | 49262 |
| 2 | 12074 |
| 3 | 50437 |
| 3 | 30321 |
| 3 | 12074 |
| 3 | 81384 |
| 3 | 30362 |
| 3 | 27727 |
+------+---------+
第二步:查詢每個月下過單的總用戶數
select month(create_time) 月份,count(distinct user_id) 總用戶數
from order_info
group by month(create_time);
+------+----------+
| 月份 | 總用戶數 |
+------+----------+
| 1 | 10 |
| 2 | 8 |
| 3 | 6 |
+------+----------+
第三步:將每個月下過單的用戶的查詢結果作爲臨時表進行自連接,用月份和用戶ID判斷是否復購,計算次月的復購用戶數
select t1.dt 自然月份,t2.dt 復購月份,count(distinct t2.user_id) 復購用戶數
from (select month(create_time) dt,user_id from order_info group by month(create_time),user_id) t1
join (select month(create_time) dt,user_id from order_info group by month(create_time),user_id) t2
on t1.user_id=t2.user_id and t1.dt < t2.dt
group by t1.dt,t2.dt;
+----------+----------+------------+
| 自然月份 | 復購月份 | 復購用戶數 |
+----------+----------+------------+
| 1 | 2 | 6 |
| 1 | 3 | 4 |
| 2 | 3 | 3 |
+----------+----------+------------+
第四步:復購率=復購用戶數/總用戶數
select 自然月份,復購月份,復購用戶數,總用戶數,round(復購用戶數/總用戶數,2) 復購率
from
(select t1.dt 自然月份,t2.dt 復購月份,count(distinct t2.user_id) 復購用戶數
from (select month(create_time) dt,user_id from order_info group by month(create_time),user_id) t1
join (select month(create_time) dt,user_id from order_info group by month(create_time),user_id) t2
on t1.user_id=t2.user_id and t1.dt < t2.dt
group by t1.dt,t2.dt) a
join
(select month(create_time) 月份,count(distinct user_id) 總用戶數
from order_info
group by month(create_time)) b
on a.自然月份=b.月份;
+----------+----------+------------+----------+--------+
| 自然月份 | 復購月份 | 復購用戶數 | 總用戶數 | 復購率 |
+----------+----------+------------+----------+--------+
| 1 | 2 | 6 | 10 | 0.60 |
| 1 | 3 | 4 | 10 | 0.40 |
| 2 | 3 | 3 | 8 | 0.38 |
+----------+----------+------------+----------+--------+