Hive SQL題庫-中級

第一章 環境準備

1.1 用戶信息表

1)表結構

user_id(用戶id) gender(性別) birthday(生日)
101 1990-01-01
102 1991-02-01
103 1992-03-01
104 1993-04-01

2)建表語句

hive> 
DROP TABLE IF EXISTS user_info;
create table user_info(
  `user_id`  string COMMENT '用戶id',
  `gender`  string COMMENT '性別',
  `birthday` string COMMENT '生日'
) COMMENT '用戶信息表'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

3)數據裝載

hive> 
insert overwrite table user_info
values ('101', '男', '1990-01-01'),
('102', '女', '1991-02-01'),
('103', '女', '1992-03-01'),
('104', '男', '1993-04-01'),
('105', '女', '1994-05-01'),
('106', '男', '1995-06-01'),
('107', '女', '1996-07-01'),
('108', '男', '1997-08-01'),
('109', '女', '1998-09-01'),
('1010', '男', '1999-10-01');

1.2 商品信息表

1)表結構

sku_id(商品id) name(商品名稱) category_id(分類id) from_date(上架日期) price(商品價格)
1 xiaomi 10 1 2020-01-01 2000
6 洗碗機 2 2020-02-01 2000
9 自行車 3 2020-01-01 1000

2)建表語句

hive> 
DROP TABLE IF EXISTS sku_info;
CREATE TABLE sku_info(
  `sku_id`    string COMMENT '商品id',
  `name`     string COMMENT '商品名稱',
  `category_id` string COMMENT '所屬分類id',
  `from_date`  string COMMENT '上架日期',
  `price`    double COMMENT '商品單價'
) COMMENT '商品屬性表'
  ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

3)數據裝載

hive> 
insert overwrite table sku_info
values ('1', 'xiaomi 10', '1', '2020-01-01', 2000),
('2', '手機殼', '1', '2020-02-01', 10),
('3', 'apple 12', '1', '2020-03-01', 5000),
('4', 'xiaomi 13', '1', '2020-04-01', 6000),
('5', '破壁機', '2', '2020-01-01', 500),
('6', '洗碗機', '2', '2020-02-01', 2000),
('7', '熱水壺', '2', '2020-03-01', 100),
('8', '微波爐', '2', '2020-04-01', 600),
('9', '自行車', '3', '2020-01-01', 1000),
('10', '帳篷', '3', '2020-02-01', 100),
('11', '燒烤架', '3', '2020-02-01', 50),
('12', '遮陽傘', '3', '2020-03-01', 20);

1.3 商品分類信息表

1)表結構

category_id(分類id) category_name(分類名稱)
1 數碼
2 廚衛
3 戶外

2)建表語句

hive> 
DROP TABLE IF EXISTS category_info;
create table category_info(
  `category_id`  string,
  `category_name` string
) COMMENT '品類表'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

3)數據裝載

hive> 
insert overwrite table category_info
values ('1','數碼'),
('2','廚衛'),
('3','戶外');

1.4 訂單信息表

1)表結構

order_id(訂單id) user_id(用戶id) create_date(下單日期) total_amount(訂單金額)
1 101 2021-09-30 29000.00
10 103 2020-10-02 28000.00

2)建表語句

hive> 
DROP TABLE IF EXISTS order_info;
create table order_info(
  `order_id`   string COMMENT '訂單id',
  `user_id`    string COMMENT '用戶id',
  `create_date`  string COMMENT '下單日期',
  `total_amount` decimal(16, 2) COMMENT '訂單總金額'
) COMMENT '訂單表'
  ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

3)數據裝載

hive> 
insert overwrite table order_info
values ('1', '101', '2021-09-27', 29000.00),
('2', '101', '2021-09-28', 70500.00),
('3', '101', '2021-09-29', 43300.00),
('4', '101', '2021-09-30', 860.00),
('5', '102', '2021-10-01', 46180.00),
('6', '102', '2021-10-01', 50000.00),
('7', '102', '2021-10-01', 75500.00),
('8', '102', '2021-10-02', 6170.00),
('9', '103', '2021-10-02', 18580.00),
('10', '103', '2021-10-02', 28000.00),
('11', '103', '2021-10-02', 23400.00),
('12', '103', '2021-10-03', 5910.00),
('13', '104', '2021-10-03', 13000.00),
('14', '104', '2021-10-03', 69500.00),
('15', '104', '2021-10-03', 2000.00),
('16', '104', '2021-10-03', 5380.00),
('17', '105', '2021-10-04', 6210.00),
('18', '105', '2021-10-04', 68000.00),
('19', '105', '2021-10-04', 43100.00),
('20', '105', '2021-10-04', 2790.00),
('21', '106', '2021-10-04', 9390.00),
('22', '106', '2021-10-05', 58000.00),
('23', '106', '2021-10-05', 46600.00),
('24', '106', '2021-10-05', 5160.00),
('25', '107', '2021-10-05', 55350.00),
('26', '107', '2021-10-05', 14500.00),
('27', '107', '2021-10-06', 47400.00),
('28', '107', '2021-10-06', 6900.00),
('29', '108', '2021-10-06', 56570.00),
('30', '108', '2021-10-06', 44500.00),
('31', '108', '2021-10-07', 50800.00),
('32', '108', '2021-10-07', 3900.00),
('33', '109', '2021-10-07', 41480.00),
('34', '109', '2021-10-07', 88000.00),
('35', '109', '2020-10-08', 15000.00),
('36', '109', '2020-10-08', 9020.00),
('37', '1010', '2020-10-08', 9260.00),
('38', '1010', '2020-10-08', 12000.00),
('39', '1010', '2020-10-08', 23900.00),
('40', '1010', '2020-10-08', 6790.00);

1.5 訂單明細表

1)表結

order_detail_id(訂單明細id) order_id(訂單id) sku_id(商品id) create_date(下單日期) price(商品單價) sku_num(商品件數)
1 1 1 2021-09-30 2000.00 2
2 1 3 2021-09-30 5000.00 5
22 10 4 2020-10-02 6000.00 1
23 10 5 2020-10-02 500.00 24
24 10 6 2020-10-02 2000.00 5

2)建表語句

hive> 
DROP TABLE IF EXISTS order_detail;
CREATE TABLE order_detail
(
  `order_detail_id` string COMMENT '訂單明細id',
  `order_id`     string COMMENT '訂單id',
  `sku_id`      string COMMENT '商品id',
  `create_date`   string COMMENT '下單日期',
  `price`      decimal(16, 2) COMMENT '下單時的商品單價',
  `sku_num`     int COMMENT '下單商品件數'
) COMMENT '訂單明細表'
  ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

3)數據裝載

hive> 
INSERT overwrite table order_detail
values ('1', '1', '1', '2021-09-27', 2000.00, 2),
('2', '1', '3', '2021-09-27', 5000.00, 5),
('3', '2', '4', '2021-09-28', 6000.00, 9),
('4', '2', '5', '2021-09-28', 500.00, 33),
('5', '3', '7', '2021-09-29', 100.00, 37),
('6', '3', '8', '2021-09-29', 600.00, 46),
('7', '3', '9', '2021-09-29', 1000.00, 12),
('8', '4', '12', '2021-09-30', 20.00, 43),
('9', '5', '1', '2021-10-01', 2000.00, 8),
('10', '5', '2', '2021-10-01', 10.00, 18),
('11', '5', '3', '2021-10-01', 5000.00, 6),
('12', '6', '4', '2021-10-01', 6000.00, 8),
('13', '6', '6', '2021-10-01', 2000.00, 1),
('14', '7', '7', '2021-10-01', 100.00, 17),
('15', '7', '8', '2021-10-01', 600.00, 48),
('16', '7', '9', '2021-10-01', 1000.00, 45),
('17', '8', '10', '2021-10-02', 100.00, 48),
('18', '8', '11', '2021-10-02', 50.00, 15),
('19', '8', '12', '2021-10-02', 20.00, 31),
('20', '9', '1', '2021-09-30', 2000.00, 9),
('21', '9', '2', '2021-10-02', 10.00, 5800),
('22', '10', '4', '2021-10-02', 6000.00, 1),
('23', '10', '5', '2021-10-02', 500.00, 24),
('24', '10', '6', '2021-10-02', 2000.00, 5),
('25', '11', '8', '2021-10-02', 600.00, 39),
('26', '12', '10', '2021-10-03', 100.00, 47),
('27', '12', '11', '2021-10-03', 50.00, 19),
('28', '12', '12', '2021-10-03', 20.00, 13000),
('29', '13', '1', '2021-10-03', 2000.00, 4),
('30', '13', '3', '2021-10-03', 5000.00, 1),
('31', '14', '4', '2021-10-03', 6000.00, 5)
('32', '14', '5', '2021-10-03', 500.00, 47),
('33', '14', '6', '2021-10-03', 2000.00, 8),
('34', '15', '7', '2021-10-03', 100.00, 20),
('35', '16', '10', '2021-10-03', 100.00, 22),
('36', '16', '11', '2021-10-03', 50.00, 42),
('37', '16', '12', '2021-10-03', 20.00, 7400),
('38', '17', '1', '2021-10-04', 2000.00, 3),
('39', '17', '2', '2021-10-04', 10.00, 21),
('40', '18', '4', '2021-10-04', 6000.00, 8),
('41', '18', '5', '2021-10-04', 500.00, 28),
('42', '18', '6', '2021-10-04', 2000.00, 3),
('43', '19', '7', '2021-10-04', 100.00, 55),
('44', '19', '8', '2021-10-04', 600.00, 11),
('45', '19', '9', '2021-10-04', 1000.00, 31),
('46', '20', '11', '2021-10-04', 50.00, 45),
('47', '20', '12', '2021-10-04', 20.00, 27)
('48', '21', '1', '2021-10-04', 2000.00, 2),
('49', '21', '2', '2021-10-04', 10.00, 39),
('50', '21', '3', '2021-10-04', 5000.00, 1),
('51', '22', '4', '2021-10-05', 6000.00, 8),
('52', '22', '5', '2021-10-05', 500.00, 20),
('53', '23', '7', '2021-10-05', 100.00, 58),
('54', '23', '8', '2021-10-05', 600.00, 18),
('55', '23', '9', '2021-10-05', 1000.00, 30),
('56', '24', '10', '2021-10-05', 100.00, 27),
('57', '24', '11', '2021-10-05', 50.00, 28),
('58', '24', '12', '2021-10-05', 20.00, 53),
('59', '25', '1', '2021-10-05', 2000.00, 5),
('60', '25', '2', '2021-10-05', 10.00, 35),
('61', '25', '3', '2021-10-05', 5000.00, 9),
('62', '26', '4', '2021-10-05', 6000.00, 1),
('63', '26', '5', '2021-10-05', 500.00, 13),
('64', '26', '6', '2021-10-05', 2000.00, 1),
('65', '27', '7', '2021-10-06', 100.00, 30),
('66', '27', '8', '2021-10-06', 600.00, 19),
('67', '27', '9', '2021-10-06', 1000.00, 33),
('68', '28', '10', '2021-10-06', 100.00, 37),
('69', '28', '11', '2021-10-06', 50.00, 46),
('70', '28', '12', '2021-10-06', 20.00, 45),
('71', '29', '1', '2021-10-06', 2000.00, 8),
('72', '29', '2', '2021-10-06', 10.00, 57),
('73', '29', '3', '2021-10-06', 5000.00, 8),
('74', '30', '4', '2021-10-06', 6000.00, 3),
('75', '30', '5', '2021-10-06', 500.00, 33),
('76', '30', '6', '2021-10-06', 2000.00, 5),
('77', '31', '8', '2021-10-07', 600.00, 13),
('78', '31', '9', '2021-10-07', 1000.00, 43),
('79', '32', '10', '2021-10-07', 100.00, 24),
('80', '32', '11', '2021-10-07', 50.00, 30),
('81', '33', '1', '2021-10-07', 2000.00, 8),
('82', '33', '2', '2021-10-07', 10.00, 48),
('83', '33', '3', '2021-10-07', 5000.00, 5),
('84', '34', '4', '2021-10-07', 6000.00, 10),
('85', '34', '5', '2021-10-07', 500.00, 44),
('86', '34', '6', '2021-10-07', 2000.00, 3),
('87', '35', '8', '2020-10-08', 600.00, 25),
('88', '36', '10', '2020-10-08', 100.00, 57),
('89', '36', '11', '2020-10-08', 50.00, 44),
('90', '36', '12', '2020-10-08', 20.00, 56),
('91', '37', '1', '2020-10-08', 2000.00, 2),
('92', '37', '2', '2020-10-08', 10.00, 26),
('93', '37', '3', '2020-10-08', 5000.00, 1),
('94', '38', '6', '2020-10-08', 2000.00, 6),
('95', '39', '7', '2020-10-08', 100.00, 35),
('96', '39', '8', '2020-10-08', 600.00, 34),
('97', '40', '10', '2020-10-08', 100.00, 37),
('98', '40', '11', '2020-10-08', 50.00, 51),
('99', '40', '12', '2020-10-08', 20.00, 27);

1.6 登錄明細表

1)表結構

user_id(用戶id) ip_address(ip地址) login_ts(登錄時間) logout_ts(登出時間)
101 180.149.130.161 2021-09-21 08:00:00 2021-09-27 08:30:00
102 120.245.11.2 2021-09-22 09:00:00 2021-09-27 09:30:00
103 27.184.97.3 2021-09-23 10:00:00 2021-09-27 10:30:00

2)建表語句

hive> 
DROP TABLE IF EXISTS user_login_detail;
CREATE TABLE user_login_detail
(
  `user_id`   string comment '用戶id',
  `ip_address` string comment 'ip地址',
  `login_ts`  string comment '登錄時間',
  `logout_ts`  string comment '登出時間'
) COMMENT '用戶登錄明細表'
  ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

3)數據裝載

hive> 
INSERT overwrite table user_login_detail
VALUES ('101', '180.149.130.161', '2021-09-21 08:00:00', '2021-09-27 08:30:00'),
('101', '180.149.130.161', '2021-09-27 08:00:00', '2021-09-27 08:30:00'),
('101', '180.149.130.161', '2021-09-28 09:00:00', '2021-09-28 09:10:00'),
('101', '180.149.130.161', '2021-09-29 13:30:00', '2021-09-29 13:50:00'),
('101', '180.149.130.161', '2021-09-30 20:00:00', '2021-09-30 20:10:00'),
('102', '120.245.11.2', '2021-09-22 09:00:00', '2021-09-27 09:30:00'),
('102', '120.245.11.2', '2021-10-01 08:00:00', '2021-10-01 08:30:00'),
('102', '180.149.130.174', '2021-10-01 07:50:00', '2021-10-01 08:20:00'),
('102', '120.245.11.2', '2021-10-02 08:00:00', '2021-10-02 08:30:00'),
('103', '27.184.97.3', '2021-09-23 10:00:00', '2021-09-27 10:30:00'),
('103', '27.184.97.3', '2021-10-03 07:50:00', '2021-10-03 09:20:00'),
('104', '27.184.97.34', '2021-09-24 11:00:00', '2021-09-27 11:30:00'),
('104', '27.184.97.34', '2021-10-03 07:50:00', '2021-10-03 08:20:00'),
('104', '27.184.97.34', '2021-10-03 08:50:00', '2021-10-03 10:20:00'),
('104', '120.245.11.89', '2021-10-03 08:40:00', '2021-10-03 10:30:00'),
('105', '119.180.192.212', '2021-10-04 09:10:00', '2021-10-04 09:30:00'),
('106', '119.180.192.66', '2021-10-04 08:40:00', '2021-10-04 10:30:00'),
('106', '119.180.192.66', '2021-10-05 21:50:00', '2021-10-05 22:40:00'),
('107', '219.134.104.7', '2021-09-25 12:00:00', '2021-09-27 12:30:00'),
('107', '219.134.104.7', '2021-10-05 22:00:00', '2021-10-05 23:00:00'),
('107', '219.134.104.7', '2021-10-06 09:10:00', '2021-10-06 10:20:00'),
('107', '27.184.97.46', '2021-10-06 09:00:00', '2021-10-06 10:00:00'),
('108', '101.227.131.22', '2021-10-06 09:00:00', '2021-10-06 10:00:00'),
('108', '101.227.131.22', '2021-10-06 22:00:00', '2021-10-06 23:00:00'),
('109', '101.227.131.29', '2021-09-26 13:00:00', '2021-09-27 13:30:00'),
('109', '101.227.131.29', '2021-10-06 08:50:00', '2021-10-06 10:20:00'),
('109', '101.227.131.29', '2021-10-08 09:00:00', '2021-10-08 09:10:00'),
('1010', '119.180.192.10', '2021-09-27 14:00:00', '2021-09-27 14:30:00'),
('1010', '119.180.192.10', '2021-10-09 08:50:00', '2021-10-09 10:20:00');

1.7 商品價格變更明細表

1)表結構

sku_id(商品id) new_price(本次變更之後的價格) change_date(變更日期)
1 1900.00 2021-09-25
1 2000.00 2021-09-26
2 80.00 2021-09-29
2 10.00 2021-09-30

2)建表語句

hive> 
DROP TABLE IF EXISTS sku_price_modify_detail;
CREATE TABLE sku_price_modify_detail
(
  `sku_id`    string comment '商品id',
  `new_price`  decimal(16, 2) comment '更改後的價格',
  `change_date` string comment '變動日期'
) COMMENT '商品價格變更明細表'
  ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

3)數據裝載

hive> 
insert overwrite table sku_price_modify_detail
values ('1', 1900, '2021-09-25'),
('1', 2000, '2021-09-26'),
('2', 80, '2021-09-29'),
('2', 10, '2021-09-30'),
('3', 4999, '2021-09-25'),
('3', 5000, '2021-09-26'),
('4', 5600, '2021-09-26'),
('4', 6000, '2021-09-27'),
('5', 490, '2021-09-27'),
('5', 500, '2021-09-28'),
('6', 1988, '2021-09-30'),
('6', 2000, '2021-10-01'),
('7', 88, '2021-09-28'),
('7', 100, '2021-09-29'),
('8', 800, '2021-09-28'),
('8', 600, '2021-09-29'),
('9', 1100, '2021-09-27'),
('9', 1000, '2021-09-28'),
('10', 90, '2021-10-01'),
('10', 100, '2021-10-02'),
('11', 66, '2021-10-01'),
('11', 50, '2021-10-02'),
('12', 35, '2021-09-28'),
('12', 20, '2021-09-29');

1.8 配送信息表

1)表結構

delivery_id(運單id) order_id(訂單id) user_id(用戶id) order_date(下單日期) custom_date(期望配送日期)
1 1 101 2021-09-27 2021-09-29
2 2 101 2021-09-28 2021-09-28
3 3 101 2021-09-29 2021-09-30

2)建表語句

hive> 
DROP TABLE IF EXISTS delivery_info;
CREATE TABLE delivery_info
(
  `delivery_id` string comment '配送單id',
  `order_id`   string comment '訂單id',
  `user_id`   string comment '用戶id',
  `order_date`  string comment '下單日期',
  `custom_date` string comment '期望配送日期'
) COMMENT '郵寄信息表'
  ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

3)數據裝載

hive> 
insert overwrite table delivery_info
values ('1', '1', '101', '2021-09-27', '2021-09-29'),
('2', '2', '101', '2021-09-28', '2021-09-28'),
('3', '3', '101', '2021-09-29', '2021-09-30'),
('4', '4', '101', '2021-09-30', '2021-10-01'),
('5', '5', '102', '2021-10-01', '2021-10-01'),
('6', '6', '102', '2021-10-01', '2021-10-01'),
('7', '7', '102', '2021-10-01', '2021-10-03'),
('8', '8', '102', '2021-10-02', '2021-10-02'),
('9', '9', '103', '2021-10-02', '2021-10-03'),
('10', '10', '103', '2021-10-02', '2021-10-04'),
('11', '11', '103', '2021-10-02', '2021-10-02'),
('12', '12', '103', '2021-10-03', '2021-10-03'),
('13', '13', '104', '2021-10-03', '2021-10-04'),
('14', '14', '104', '2021-10-03', '2021-10-04'),
('15', '15', '104', '2021-10-03', '2021-10-03'),
('16', '16', '104', '2021-10-03', '2021-10-03'),
('17', '17', '105', '2021-10-04', '2021-10-04'),
('18', '18', '105', '2021-10-04', '2021-10-06'),
('19', '19', '105', '2021-10-04', '2021-10-06'),
('20', '20', '105', '2021-10-04', '2021-10-04'),
('21', '21', '106', '2021-10-04', '2021-10-04'),
('22', '22', '106', '2021-10-05', '2021-10-05'),
('23', '23', '106', '2021-10-05', '2021-10-05'),
('24', '24', '106', '2021-10-05', '2021-10-07'),
('25', '25', '107', '2021-10-05', '2021-10-05'),
('26', '26', '107', '2021-10-05', '2021-10-06'),
('27', '27', '107', '2021-10-06', '2021-10-06'),
('28', '28', '107', '2021-10-06', '2021-10-07'),
('29', '29', '108', '2021-10-06', '2021-10-06'),
('30', '30', '108', '2021-10-06', '2021-10-06'),
('31', '31', '108', '2021-10-07', '2021-10-09'),
('32', '32', '108', '2021-10-07', '2021-10-09'),
('33', '33', '109', '2021-10-07', '2021-10-08'),
('34', '34', '109', '2021-10-07', '2021-10-08'),
('35', '35', '109', '2021-10-08', '2021-10-10'),
('36', '36', '109', '2021-10-08', '2021-10-09'),
('37', '37', '1010', '2021-10-08', '2021-10-10'),
('38', '38', '1010', '2021-10-08', '2021-10-10'),
('39', '39', '1010', '2021-10-08', '2021-10-09'),
('40', '40', '1010', '2021-10-08', '2021-10-09');

1.9 好友關係表

1)表結構

user1_id(用戶1 id) user2_id(用戶2 id)
101 1010
101 108
101 106

注:表中一行數據中的兩個user_id,表示兩個用戶互爲好友。

2)建表語句

hive> 
DROP TABLE IF EXISTS friendship_info;
CREATE TABLE friendship_info(
  `user1_id` string comment '用戶1id',
  `user2_id` string comment '用戶2id'
) COMMENT '用戶關係表'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

3)數據裝載

hive> 
insert overwrite table friendship_info
values ('101', '1010'),
('101', '108'),
('101', '106'),
('101', '104'),
('101', '102'),
('102', '1010'),
('102', '108'),
('102', '106'),
('102', '104'),
('102', '102'),
('103', '1010'),
('103', '108'),
('103', '106'),
('103', '104'),
('103', '102'),
('104', '1010'),
('104', '108'),
('104', '106'),
('104', '104'),
('104', '102'),
('105', '1010'),
('105', '108'),
('105', '106'),
('105', '104'),
('105', '102'),
('106', '1010'),
('106', '108'),
('106', '106'),
('106', '104'),
('106', '102'),
('107', '1010'),
('107', '108'),
('107', '106'),
('107', '104'),
('107', '102'),
('108', '1010'),
('108', '108'),
('108', '106'),
('108', '104'),
('108', '102'),
('109', '1010'),
('109', '108'),
('109', '106'),
('109', '104'),
('109', '102'),
('1010', '1010'),
('1010', '108'),
('1010', '106'),
('1010', '104'),
('1010', '102');

1.10 收藏信息表

1)表結構

user_id(用戶id) sku_id(商品id) create_date(收藏日期)
101 3 2021-09-23
101 12 2021-09-23
101 6 2021-09-25

2)建表語句

hive> 
DROP TABLE IF EXISTS favor_info;
CREATE TABLE favor_info
(
  `user_id`   string comment '用戶id',
  `sku_id`    string comment '商品id',
  `create_date` string comment '收藏日期'
) COMMENT '用戶收藏表'
  ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

3)數據裝載

hive> 
insert overwrite table favor_info
values ('101', '3', '2021-09-23'),
('101', '12', '2021-09-23'),
('101', '6', '2021-09-25'),
('101', '10', '2021-09-21'),
('101', '5', '2021-09-25'),
('102', '1', '2021-09-24'),
('102', '2', '2021-09-24'),
('102', '8', '2021-09-23'),
('102', '12', '2021-09-22'),
('102', '11', '2021-09-23'),
('102', '9', '2021-09-25'),
('102', '4', '2021-09-25'),
('102', '6', '2021-09-23'),
('102', '7', '2021-09-26'),
('103', '8', '2021-09-24'),
('103', '5', '2021-09-25'),
('103', '6', '2021-09-26'),
('103', '12', '2021-09-27'),
('103', '7', '2021-09-25'),
('103', '10', '2021-09-25'),
('103', '4', '2021-09-24'),
('103', '11', '2021-09-25'),
('103', '3', '2021-09-27'),
('104', '9', '2021-09-28'),
('104', '7', '2021-09-28'),
('104', '8', '2021-09-25'),
('104', '3', '2021-09-28'),
('104', '11', '2021-09-25'),
('104', '6', '2021-09-25'),
('104', '12', '2021-09-28'),
('105', '8', '2021-10-08'),
('105', '9', '2021-10-07'),
('105', '7', '2021-10-07'),
('105', '11', '2021-10-06'),
('105', '5', '2021-10-07'),
('105', '4', '2021-10-05'),
('105', '10', '2021-10-07'),
('106', '12', '2021-10-08'),
('106', '1', '2021-10-08'),
('106', '4', '2021-10-04'),
('106', '5', '2021-10-08'),
('106', '2', '2021-10-04'),
('106', '6', '2021-10-04'),
('106', '7', '2021-10-08'),
('107', '5', '2021-09-29'),
('107', '3', '2021-09-28'),
('107', '10', '2021-09-27'),
('108', '9', '2021-10-08'),
('108', '3', '2021-10-10'),
('108', '8', '2021-10-10'),
('108', '10', '2021-10-07'),
('108', '11', '2021-10-07'),
('109', '2', '2021-09-27'),
('109', '4', '2021-09-29'),
('109', '5', '2021-09-29'),
('109', '9', '2021-09-30'),
('109', '8', '2021-09-26'),
('1010', '2', '2021-09-29'),
('1010', '9', '2021-09-29'),
('1010', '1', '2021-10-01');

第二章 練習題

2.1 查詢累積銷量排名第二的商品

2.1.1 題目需求

查詢訂單明細表(order_detail)中銷量(下單件數)排名第二的商品id,如果不存在返回null,如果存在多個排名第二的商品則需要全部返回。期望結果如下:

sku_id
11

2.1.2 代碼實現

hive> 
select sku_id
from (
     select sku_id
     from (
         select sku_id,
             order_num,
             -- 窗口函數要排序不要分區
             dense_rank() over (order by order_num desc) rk
         from (
              -- 每個商品的銷量
              select sku_id, sum(sku_num) order_num
              from order_detail
              group by sku_id
            ) t1
       ) t2
     where rk = 2
   ) t3
     right join -- 爲保證,沒有第二名的情況下,返回null
   (
     select 1
   ) t4
   on 1 = 1; 

窗口排序函數

  • row_number:在每個分組中,爲每行分配一個從1開始的唯一序列號,遞增,不考慮重複;(1234567……)

  • rank: 在每個分組中,爲每行分配一個從1開始的序列號,考慮重複,擠佔後續位置;(12225……)

  • dense_rank: 在每個分組中,爲每行分配一個從1開始的序列號,考慮重複,不擠佔後續位置(12223……)

也有人會將分組聚合子查詢和窗口排序子查詢合在一起,要理解這種方法,但可讀性不高

select sku_id,
       sum(sku_num) order_num,
       -- 窗口函數要排序不要分區
       dense_rank() over (order by sum(sku_num) desc) rk
from order_detail
group by sku_id

2.2 查詢至少連續三天下單的用戶

2.2.1 題目需求

查詢訂單信息表(order_info)中最少連續3天下單的用戶id,期望結果如下:

user_id
101

2.2.2 代碼實現

hive中的去重

  • distinct關鍵字

  • group by

  • 窗口函數排序(思路同上一題,按分區窗口排序後,取第一個)

方案1

使用lead滑窗,datediff求差值

image-20230119105954346

select distinct user_id
from (
     select 
     user_id,
     datediff(lead2,create_date) diff
     from (
         select 
         user_id, 
         create_date, 
         lead(create_date, 2,'9999-12-31') over (partition by user_id order by create_date) lead2
         from (
              select 
              distinct
              user_id, 
              create_date
              from order_info
            ) t1 
       ) t2
   ) t3
   where diff=2;

方案2

統計相同日期差值的個數

image-20230119111457416

select distinct user_id
from (
     select user_id
     from (
         select user_id
            , create_date
            , date_sub(create_date, row_number() over (partition by user_id order by create_date)) diff
         from (
              select user_id, create_date
              from order_info
              -- group by 去重
              group by user_id, create_date
            ) t1 -- 同一天可能多個用戶下單,進行去重
       ) t2 -- 判斷一串日期是否連續:若連續,用這個日期減去它的排名,會得到一個相同的結果
     group by user_id, diff
     having count(diff) >= 3 -- 連續下單大於等於三天
   ) t3;

date函數

  • datediff('endTime',‘startTime’)返回前後日期之間的天數差

    hive> select datediff('2019-06-25','2019-06-20');
    5
     
    hive> select datediff('2019-06-25','2019-06-27');
    -2
    
  • date_sub(‘yyyy-MM-dd’,n/-m)返回初始日期n天前、m天后的日期

    hive> select date_sub('2019-06-25',4);
    2019-06-21
     
    hive> select date_sub('2019-06-25',-2);
    2019-06-27
    

方案3

利用滑動窗口的範圍

image-20230119112453889

select distinct user_id
from (
     select user_id,
     ts,
     -- 86400秒 一天
     count(*) over(partition by user_id order by ts range between 86400 preceding and 86400 following) cnt
     from (
         select user_id, 
         -- 時間字符串轉爲時間戳 秒
         unix_timestamp(create_date,'yyyy-MM-dd') ts
         from (
              select 
              distinct
              user_id, create_date
              from order_info
            ) t1 
       ) t2
   ) t3
   where cnt=3;

2.3 查詢各品類銷售商品的種類數及銷量最高的商品

2.3.1 題目需求

從訂單明細表(order_detail)統計各品類銷售出的商品種類數及累積銷量最好的商品,期望結果如下:

category_id(分類id) category_name(分類名稱) sku_id(銷量最好的商品id) name(商品名稱) order_num(銷量最好的商品銷量) order_cnt(商品種類數量)
1 數碼 2 手機殼 302 4
2 廚衛 8 微波爐 253 4
3 戶外 12 遮陽傘 349 4

2.3.2 代碼實現

select category_id,
    category_name,
    sku_id,
    name,
    order_num,
    sku_cnt
from (
     select od.sku_id,
        sku.name,
        sku.category_id,
        cate.category_name,
        order_num,
        -- 分組開窗 依舊是Top K問題
        rank() over (partition by sku.category_id order by order_num desc) rk,
        -- 使用開窗保證每一行都要有一個count 
        count(distinct od.sku_id) over (partition by sku.category_id)    sku_cnt
     from (
         -- 商品銷量
         select sku_id,
             sum(sku_num) order_num
         from order_detail
         group by sku_id
       ) od
       -- 商品信息表sku_info中有商品名稱 分類信息
       left join sku_info sku on od.sku_id = sku.sku_id
       -- 商品分類信息表category_info中有分類名稱
       left join category_info cate on sku.category_id = cate.category_id
   ) t1
where rk = 1;

2.4 查詢用戶的累計消費金額及VIP等級

2.4.1 題目需求

從訂單信息表(order_info)中統計每個用戶截止其每個下單日期的累積消費金額,以及每個用戶在其每個下單日期的VIP等級。

用戶vip等級根據累積消費金額計算,計算規則如下:

設累積消費總額爲X,

若0=<X<10000,則vip等級爲普通會員

若10000<=X<30000,則vip等級爲青銅會員

若30000<=X<50000,則vip等級爲白銀會員

若50000<=X<80000,則vip爲黃金會員

若80000<=X<100000,則vip等級爲白金會員

若X>=100000,則vip等級爲鑽石會員

期望結果如下:

user_id(用戶id) create_date(下單日期) sum_so_far(截至每個下單日期的累計下單金額) vip_level(每個下單日期的VIP等級)
101 2021-09-27 29000.00 青銅會員
101 2021-09-28 99500.00 白金會員
101 2021-09-29 142800.00 鑽石會員
101 2021-09-30 143660.00 鑽石會員
102 2021-10-01 171680.00 鑽石會員
102 2021-10-02 177850.00 鑽石會員
103 2021-10-02 69980.00 黃金會員
103 2021-10-03 75890.00 黃金會員
104 2021-10-03 89880.00 白金會員
105 2021-10-04 120100.00 鑽石會員
106 2021-10-04 9390.00 普通會員
106 2021-10-05 119150.00 鑽石會員
107 2021-10-05 69850.00 黃金會員
107 2021-10-06 124150.00 鑽石會員
108 2021-10-06 101070.00 鑽石會員
108 2021-10-07 155770.00 鑽石會員
109 2021-10-07 129480.00 鑽石會員
109 2021-10-08 153500.00 鑽石會員
1010 2021-10-08 51950.00 黃金會員

2.4.2 代碼實現

select user_id,
    create_date,
    sum_so_far,
    case
      when sum_so_far >= 100000 then '鑽石會員'
      when sum_so_far >= 80000 then '白金會員'
      when sum_so_far >= 50000 then '黃金會員'
      when sum_so_far >= 30000 then '白銀會員'
      when sum_so_far >= 10000 then '青銅會員'
      when sum_so_far >= 0 then '普通會員'
    end vip_level
from (
     select user_id,
        create_date,
        -- 開窗計算累積金額
        -- rang 負無窮到當前行
        sum(total_amount_per_day) over (partition by user_id order by create_date rows between unbounded preceding and current row) sum_so_far
     from (
         -- 每個用戶在每天的消費金額
         select user_id,
             create_date,
             sum(total_amount) total_amount_per_day
         from order_info
         group by user_id, create_date
       ) t1
   ) t2;

2.5 查詢首次下單後第二天連續下單的用戶比率

2.5.1 題目需求

從訂單信息表(order_info)中查詢首次下單後第二天仍然下單的用戶佔所有下單用戶的比例,結果保留一位小數,使用百分數顯示,期望結果如下:

percentage
70.0%

2.5.2 代碼實現

先找到每個用戶的前兩個下單日期

select 
concat(round(sum(if(datediff(buy_date_second, buy_date_first) = 1, 1, 0)) / count(*) * 100, 1), '%') percentage
from (
     select user_id,
        min(create_date) buy_date_first,
        max(create_date) buy_date_second
     from (
         -- 所有下單日期裏面的前兩個
         select user_id,
             create_date,
             rank() over (partition by user_id order by create_date) rk
         from (
              select 
              distinct
              user_id,
              create_date
              from order_info
            ) t1
       ) t2
     where rk <= 2
     group by user_id
   ) t3;

2.6 每個商品銷售首年的年份、銷售數量和銷售金額

2.6.1 題目需求

從訂單明細表(order_detail)統計每個商品銷售首年的年份,銷售數量和銷售總額。

期望結果如下:

sku_id(商品id) year(銷售首年年份) order_num(首年銷量) order_amount(首年銷售金額)
1 2021 51 102000.00
2 2021 302 3020.00
3 2021 36 180000.00
4 2021 53 318000.00
5 2021 242 121000.00
6 2021 32 64000.00
7 2021 252 25200.00
8 2021 253 151800.00
9 2021 194 194000.00
10 2021 299 29900.00
11 2021 320 16000.00
12 2021 349 6980.00

2.6.2 代碼實現

依舊是Top K排序問題,得到每個商品最早的銷售時間

select sku_id,
    year(create_date) as year,
    sum(sku_num) as order_num,
    sum(price*sku_num) as order_amount
from (
     select order_id,
        sku_id,
        price,
        sku_num,
        create_date,
        rank() over (partition by sku_id order by year(create_date)) rk
     from order_detail  
) t1
where rk = 1
group by sku_id,year(create_date);

最開始的思路是直接使用聚合函數

select sku_id,
    min(year(create_date)) as year,
    sum(sku_num) as order_num,
    sum(price*sku_num) as order_amount
from  order_detail  
group by sku_id;

但,發現無法得到商品最早的銷售時間,這隻能使用開窗函數,注意與2.8的區別

2.7 篩選去年總銷量小於100的商品

2.7.1 題目需求

從訂單明細表(order_detail)中篩選出去年總銷量小於100的商品及其銷量,假設今天的日期是2022-01-10,不考慮上架時間小於一個月的商品,期望結果如下:

sku_id(商品id) name(商品名稱) order_num(銷量)
1 xiaomi 10 51
3 apple 12 36
4 xiaomi 13 53
6 洗碗機 32

2.7.2 代碼實現

select t1.sku_id,
    t2.name,
    order_num
from (
     select sku_id,
        sum(sku_num) order_num
     from order_detail
     where year(create_date) = '2021'
     and sku_id in (
       -- 上架時間超過一個月的商品
       select sku_id
       from sku_info
       where datediff('2022-01-10', from_date) > 30
     )
     group by sku_id
     having sum(sku_num) < 100
   ) t1
     left join
   sku_info t2
   on t1.sku_id = t2.sku_id;

2.8 查詢每日新用戶數

2.8.1 題目需求

從用戶登錄明細表(user_login_detail)中查詢每天的新增用戶數,若一個用戶在某天登錄了,且在這一天之前沒登錄過,則任務該用戶爲這一天的新增用戶。期望結果如下:

login_date_first(日期) user_count(新增用戶數)
2021-09-21 1
2021-09-22 1
2021-09-23 1
2021-09-24 1
2021-09-25 1
2021-09-26 1
2021-09-27 1
2021-10-04 2
2021-10-06 1

2.8.2 代碼實現

select
  login_date_first,
  count(*) user_count
from
(
  select
    user_id,
    min(date_format(login_ts,'yyyy-MM-dd')) login_date_first
  from user_login_detail
  group by user_id
)t1
group by login_date_first;

2.9 統計每個商品的銷量最高的日期

2.9.1 題目需求

從訂單明細表(order_detail)中統計出每種商品銷售件數最多的日期及當日銷量,如果有同一商品多日銷量並列的情況,取其中的最小日期。期望結果如下:

sku_id(商品id) create_date(銷量最高的日期) sum_num(銷量)
1 2021-10-02 9
2 2021-10-04 60
3 2021-10-05 9
4 2021-10-07 10
5 2021-10-03 47
6 2021-10-03 8
7 2021-10-05 58
8 2021-10-08 59
9 2021-10-01 45
10 2021-10-08 94
11 2021-10-08 95
12 2021-10-08 83

2.9.2 代碼實現

select sku_id,
    create_date,
    sum_num
from (
     select sku_id,
        create_date,
        sum_num,
        row_number() over (partition by sku_id order by sum_num desc,create_date asc) rn
     from (
         -- 統計 每個商品當日銷售量
         select sku_id,
             create_date,
             sum(sku_num) sum_num
         from order_detail
         group by sku_id, create_date
       ) t1
   ) t2
where rn = 1;

2.10 查詢銷售件數高於品類平均數的商品

2.10.1 題目需求

從訂單明細表(order_detail)中查詢累積銷售件數高於其所屬品類平均數的商品,期望結果如下:

sku_id name sum_num cate_avg_num
2 手機殼 302 110.5
5 破壁機 242 194.75
7 熱水壺 252 194.75
8 微波爐 253 194.75
10 帳篷 299 290.5
11 燒烤架 320 290.5
12 遮陽傘 349 290.5

2.10.2 代碼實現

select sku_id,
    name,
    sum_num,
    cate_avg_num
from (
     select od.sku_id,
        category_id,
        name,
        sum_num,
        -- 開窗函數 計算每個類別中的商品平均銷量
        avg(sum_num) over (partition by category_id) cate_avg_num
     from (
         -- 每個商品的總銷售數
         select sku_id,
             sum(sku_num) sum_num
         from order_detail
         group by sku_id
       ) od 
      -- 左連接上類別
      left join(
         select sku_id,
             name,
             category_id
         from sku_info
       ) sku
       on od.sku_id = sku.sku_id) t1
where sum_num > cate_avg_num;

2.11 用戶註冊、登錄、下單綜合統計

2.11.1 題目需求

從用戶登錄明細表(user_login_detail)和訂單信息表(order_info)中查詢每個用戶的註冊日期(首次登錄日期)、總登錄次數以及其在2021年的登錄次數、訂單數和訂單總額。期望結果如下:

user_id(用戶id) register_date(註冊日期) total_login_count(累積登錄次數) login_count_2021(2021年登錄次數) order_count_2021(2021年下單次數) order_amount_2021(2021年訂單金額)
101 2021-09-21 5 5 4 143660.00
102 2021-09-22 4 4 4 177850.00
103 2021-09-23 2 2 4 75890.00
104 2021-09-24 4 4 4 89880.00
105 2021-10-04 1 1 4 120100.00
106 2021-10-04 2 2 4 119150.00
107 2021-09-25 4 4 4 124150.00
108 2021-10-06 2 2 4 155770.00
109 2021-09-26 3 3 4 153500.00
1010 2021-09-27 2 2 4 51950.00

2.11.2 代碼實現

select login.user_id,
    register_date,
    total_login_count,
    login_count_2021,
    order_count_2021,
    order_amount_2021
from (
     select user_id,
        -- 首次登陸(註冊日期)
        min(date_format(login_ts, 'yyyy-MM-dd')) register_date,
        -- 累計登錄次數
        count(1) total_login_count,
        -- 2021年登錄次數
        count(if(year(login_ts) = '2021', 1, null)) login_count_2021
     from user_login_detail
     group by user_id
   ) login
     join
   (
     select user_id,
        -- 下單次數
        count(distinct(order_id)) order_count_2021,
        -- 訂單金額
        sum(total_amount) order_amount_2021
     from order_info
     where year(create_date) = '2021'
     group by user_id
   ) oi
   on login.user_id = oi.user_id;

2.12 查詢指定日期的全部商品價格

2.12.1 題目需求

從商品價格修改明細表(sku_price_modify_detail)中查詢2021-10-01的全部商品的價格,假設所有商品初始價格默認都是99。期望結果如下:

sku_id(商品id) price(商品價格)
1 2000.00
2 10.00
3 5000.00
4 6000.00
5 500.00
6 2000.00
7 100.00
8 600.00
9 1000.00
10 90.00
11 66.00
12 20.00

2.12.2 代碼實現

某個商品在2021-10-01不一定會更改價格,需要獲得最近日期的價格(TopK問題)

select sku_info.sku_id,
    nvl(new_price, 99) price
from sku_info
     left join
   (
     select sku_id,
        new_price
     from (
         select sku_id,
             new_price,
             change_date,
             row_number() over (partition by sku_id order by change_date desc) rn
         from sku_price_modify_detail
         -- 過濾
         where change_date <= '2021-10-01'
       ) t1
     where rn = 1
   ) t2
   on sku_info.sku_id = t2.sku_id;

2.13 即時訂單比例

2.13.1 題目需求

訂單配送中,如果期望配送日期和下單日期相同,稱爲即時訂單,如果期望配送日期和下單日期不同,稱爲計劃訂單。

請從配送信息表(delivery_info)中求出每個用戶的首單(用戶的第一個訂單)中即時訂單的比例,保留兩位小數,以小數形式顯示。期望結果如下:

percentage
0.5

2.13.2 代碼實現

每個用戶的首單(TopK問題)

select
  cast(sum(if(order_date=custom_date,1,0))/count(*) as decimal(10,2)) percentage
from
(
  select
    delivery_id,
    user_id,
    order_date,
    custom_date,
    row_number() over (partition by user_id order by order_date) rn
  from delivery_info
)t1
where rn=1;

2.14 向用戶推薦朋友收藏的商品

2.14.1 題目需求

現需要請向所有用戶推薦其朋友收藏但是用戶自己未收藏的商品,請從好友關係表(friendship_info)和收藏表(favor_info)中查詢出應向哪位用戶推薦哪些商品。期望結果如下:

1)部分結果展示

user_id(用戶id) sku_id(應向該用戶推薦的商品id)
101 2
101 4
101 7
101 9
101 8
101 11
101 1

2.14.2 代碼實現

select
  distinct t1.user_id,
  friend_favor.sku_id
from
(
  -- union 進行表冗餘,將雙向關係,表示爲單向關係
  select
    user1_id user_id,
    user2_id friend_id
  from friendship_info
  union
  select
    user2_id,
    user1_id
  from friendship_info
    
)t1
-- 所有有收藏的朋友
left join favor_info friend_favor
on t1.friend_id=friend_favor.user_id

-- 朋友有收藏,但是用戶本身沒有收藏
left join favor_info user_favor
on t1.user_id=user_favor.user_id
and friend_favor.sku_id=user_favor.sku_id
where user_favor.sku_id is null;

2.15 查詢所有用戶的連續登錄兩天及以上的日期區間

2.15.1 題目需求

從登錄明細表(user_login_detail)中查詢出,所有用戶的連續登錄兩天及以上的日期區間,以登錄時間(login_ts)爲準。期望結果如下:

user_id(用戶id) start_date(開始日期) end_date(結束日期)
101 2021-09-27 2021-09-30
102 2021-10-01 2021-10-02
106 2021-10-04 2021-10-05
107 2021-10-05 2021-10-06

2.15.2 代碼實現

注意該題需要獲得的是時間區間,而不是用戶id。這裏不同於2.2查詢至少連續三天下單的用戶

select user_id,
    min(login_date) start_date,
    max(login_date) end_date
from (
     select user_id,
        login_date,
        -- 差值
        date_sub(login_date, rn) flag
     from (
         select user_id,
             login_date,
             -- 排序
             row_number() over (partition by user_id order by login_date) rn
         from (
              -- 按用戶登錄日期去重
              select user_id,
                 date_format(login_ts, 'yyyy-MM-dd') login_date
              from user_login_detail
              group by user_id, date_format(login_ts, 'yyyy-MM-dd')
            ) t1
       ) t2
   ) t3
group by user_id, flag
-- 具有相同的flag數大於等於2表明連續登錄兩天及以上
having count(*) >= 2;

2.16 男性和女性每日的購物總金額統計

2.16.1 題目需求

從訂單信息表(order_info)和用戶信息表(user_info)中,分別統計每天男性和女性用戶的訂單總金額,如果當天男性或者女性沒有購物,則統計結果爲0。期望結果如下:

create_date(日期) total_amount_male(男性用戶總金額) total_amount_female(女性用戶總金額)
2021-09-27 29000.00 0.00
2021-09-28 70500.00 0.00
2021-09-29 43300.00 0.00
2021-09-30 860.00 0.00
2021-10-01 0.00 171680.00
2021-10-02 0.00 76150.00
2021-10-03 89880.00 5910.00
2021-10-04 9390.00 120100.00
2021-10-05 109760.00 69850.00
2021-10-06 101070.00 54300.00
2021-10-07 54700.00 129480.00
2021-10-08 51950.00 24020.00

2.16.2 代碼實現

select create_date,
    sum(if(gender = '男', total_amount, 0)) total_amount_male,
    sum(if(gender = '女', total_amount, 0)) total_amount_female
from 
order_info oi left join user_info ui on oi.user_id = ui.user_id
group by create_date;

2.17 訂單金額趨勢分析

2.17.1 題目需求

查詢截止每天的最近3天內的訂單金額總和以及訂單金額日平均值,保留兩位小數,四捨五入。期望結果如下:

create_date(日期) total_3d(最近3日訂單金額總和) avg_ad(最近3日訂單金額日平均值)
2021-09-27 29000.00 29000.00
2021-09-28 99500.00 49750.00
2021-09-29 142800.00 47600.00
2021-09-30 114660.00 38220.00
2021-10-01 215840.00 71946.67
2021-10-02 248690.00 82896.67
2021-10-03 343620.00 114540.00
2021-10-04 301430.00 100476.67
2021-10-05 404890.00 134963.33
2021-10-06 464470.00 154823.33
2021-10-07 519160.00 173053.33
2021-10-08 415520.00 138506.67

2.17.2 代碼實現

注意要求是3天內,不能用rows,而要用range

select create_date,
    round(sum(total_amount_by_day) over (order by datediff(create_date,'2010-01-01') range BETWEEN 2 preceding and current row ),2) total_3d,
    round(avg(total_amount_by_day) over (order by datediff(create_date,'2010-01-01') range BETWEEN 2 preceding and current row ), 2) avg_3d
from (
     -- 聚集total_amount
     select create_date,
        sum(total_amount) total_amount_by_day
     from order_info
     group by create_date
   ) t1;

2.18 購買過商品1和商品2但是沒有購買商品3的顧客

2.18.1 題目需求

從訂單明細表(order_detail)中查詢出所有購買過商品1和商品2,但是沒有購買過商品3的用戶,期望結果如下:

user_id
103
105

2.18.2 代碼實現

高級函數

  • collect_set

    對於非group by字段,用Hive的collect_set函數收集這些字段,返回一個數組;

  • array_contains

    用於判定包含(array_contains)或不包含(!array_contains)關係。

select user_id
from (
     select user_id,
        collect_set(sku_id) skus
     from order_detail od
     left join
       order_info oi
       on od.order_id = oi.order_id
     group by user_id
   ) t1
where array_contains(skus, '1')
and array_contains(skus, '2')
and !array_contains(skus, '3');

2.19 統計每日商品1和商品2銷量的差值

2.19.1 題目需求

從訂單明細表(order_detail)中統計每天商品1和商品2銷量(件數)的差值(商品1銷量-商品2銷量),期望結果如下:

create_date diff
2021-09-27 2
2021-10-01 -10
2021-10-02 -49
2021-10-03 4
2021-10-04 -55
2021-10-05 -30
2021-10-06 -49
2021-10-07 -40
2021-10-08 -24

2.19.2 代碼實現

select create_date,
    sum(if(sku_id = '1', sku_num, 0)) - sum(if(sku_id = '2', sku_num, 0)) diff
from order_detail
where sku_id in ('1', '2')
group by create_date;

2.20 查詢出每個用戶的最近三筆訂單

2.20.1 題目需求

從訂單信息表(order_info)中查詢出每個用戶的最近三筆訂單,期望結果如下:

user_id order_id create_date
101 2 2021-09-28
101 3 2021-09-29
101 4 2021-09-30
102 5 2021-10-01
102 6 2021-10-01
102 8 2021-10-02
103 9 2021-10-02
103 10 2021-10-02
103 12 2021-10-03
104 13 2021-10-03
104 14 2021-10-03
104 15 2021-10-03
105 17 2021-10-04
105 18 2021-10-04
105 19 2021-10-04
106 22 2021-10-05
106 23 2021-10-05
106 24 2021-10-05
107 25 2021-10-05
107 27 2021-10-06
107 28 2021-10-06
108 29 2021-10-06
108 31 2021-10-07
108 32 2021-10-07
109 33 2021-10-07
109 35 2021-10-08
109 36 2021-10-08
1010 37 2021-10-08
1010 38 2021-10-08

2.20.2 代碼實現

TopK問題

select user_id,
    order_id,
    create_date
from (
     select user_id
       , order_id
       , create_date
       , row_number() over (partition by user_id order by create_date desc) rk
     from order_info
   ) t1
where rk <= 3;

如果是詢出每個用戶的最近三個下單日期的所有訂單

則將row_number()換成dense_rank()

窗口排序函數

  • row_number:在每個分組中,爲每行分配一個從1開始的唯一序列號,遞增,不考慮重複;(1234567……)

  • rank: 在每個分組中,爲每行分配一個從1開始的序列號,考慮重複,擠佔後續位置;(12225……)

  • dense_rank: 在每個分組中,爲每行分配一個從1開始的序列號,考慮重複,不擠佔後續位置(12223……)

2.21 查詢每個用戶登錄日期的最大空檔期

2.21.1 題目需求

從登錄明細表(user_login_detail)中查詢每個用戶兩個登錄日期(以login_ts爲準)之間的最大的空檔期。統計最大空檔期時,用戶最後一次登錄至今的空檔也要考慮在內,假設今天爲2021-10-10。期望結果如下:

user_id(用戶id) max_diff(最大空檔期)
101 10
102 9
103 10
104 9
105 6
106 5
107 10
108 4
109 10
1010 12

2.21.2 代碼實現

select
  user_id,
  max(diff) max_diff
from
(
  select
    user_id,
    -- 計算空擋
    datediff(next_login_date,login_date) diff
  from
  (
    select
      user_id,
      login_date,
      -- 開窗獲得下次登錄日期
      lead(login_date,1,'2021-10-10') over(partition by user_id order by login_date) next_login_date
    from
    (
      -- 對登錄時間進行去重
      select
        user_id,
        date_format(login_ts,'yyyy-MM-dd') login_date
      from user_login_detail
      group by user_id,date_format(login_ts,'yyyy-MM-dd')
    )t1
  )t2
)t3
group by user_id;

窗口函數lead

  • 功能:用於從當前數據中基於當前行的數據向後偏移取值
  • 語法:lead(colName,N,defautValue)
    • colName:取哪一列的值
    • N:向後偏移N行
    • defaultValue:如果取不到返回的默認值
  • 分析

當前數據中記錄了每個用戶每一次登陸的日期,一個用戶在一天只有1條信息,我們可以基於用戶的登陸信息,找到如下規律:

連續兩天登陸 : 用戶下次登陸時間 = 本次登陸以後的第二天

連續三天登陸 : 用戶下下次登陸時間 = 本次登陸以後的第三天
……依次類推。

我們可以對用戶ID進行分區,按照登陸時間進行排序,通過lead函數計算出用戶下次登陸時間,通過日期函數計算出登陸以後第二天的日期,如果相等即爲連續兩天登錄。

  • 統計連續2天登錄
select
  userid,
  logintime,
  -- 本次登陸日期的第二天
  date_add(logintime,1) as nextday,
  -- 按照用戶id分區,按照登陸日期排序,取下一次登陸時間,取不到就爲0
  lead(logintime,1,0) over (partition by userid order by logintime) as nextlogin
from tb_login;

img

with t1 as (
  select
    userid,
    logintime,
    -- 本次登陸日期的第二天
      date_add(logintime,1) as nextday,
    -- 按照用戶id分區,按照登陸日期排序,取下一次登陸時間,取不到就爲0
     lead(logintime,1,0) over (partition by userid order by logintime) as nextlogin
from tb_login )
select distinct userid from t1 where nextday = nextlogin;
  • 統計連續3天登錄
select
  userid,
  logintime,
  -- 本次登陸日期的第三天
  date_add(logintime,2) as nextday,
  -- 按照用戶id分區,按照登陸日期排序,取下下一次登陸時間,取不到就爲0
  lead(logintime,2,0) over (partition by userid order by logintime) as nextlogin
from tb_login;

img

with t1 as (
select
  userid,
  logintime,
  -- 本次登陸日期的第三天
  date_add(logintime,2) as nextday,
  -- 按照用戶id分區,按照登陸日期排序,取下下一次登陸時間,取不到就爲0
  lead(logintime,2,0) over (partition by userid order by logintime) as nextlogin
from tb_login )
select distinct userid from t1 where nextday = nextlogin;
  • 統計連續N天登錄
select
  userid,
  logintime,
  -- 本次登陸日期的第N天
  date_add(logintime,N-1) as nextday,
  -- 按照用戶id分區,按照登陸日期排序,取下下一次登陸時間,取不到就爲0
  lead(logintime,N-1,0) over (partition by userid order by logintime) as nextlogin
from tb_login;

2.22 查詢相同時刻多地登陸的用戶

2.22.1 題目需求

從登錄明細表(user_login_detail)中查詢在相同時刻,多地登陸(ip_address不同)的用戶,期望結果如下:

user_id (用戶id)
101
102
104
107

2.22.2 代碼實現

select
 distinct t2.user_id
from
 (
  select
   t1.user_id,
   -- t1.max_logout is not null
   -- t1.max_logout>t1.login_ts
   if(t1.max_logout is null ,2,if(t1.max_logout<t1.login_ts,1,0)) flag
  from
   (
   select
    user_id,
    login_ts,
    logout_ts,
    -- 當前的最大登錄時間
    max(logout_ts)over(partition by user_id order by login_ts rows between unbounded preceding and 1 preceding) max_logout
   from
    user_login_detail
  )t1
)t2
where
 t2.flag=0

2.23 銷售額完成任務指標的商品

2.23.1 題目需求

商家要求每個商品每個月需要售賣出一定的銷售總額

假設1號商品銷售總額大於21000,2號商品銷售總額大於10000,其餘商品沒有要求

請寫出SQL從訂單詳情表中(order_detail)查詢連續兩個月銷售總額大於等於任務總額的商品

結果如下:

sku_id(商品id)
1

2.23.2 代碼實現及步驟

-- 判斷是否爲連續兩個月
select
 distinct t3.sku_id
from
 (
  select
   t2.sku_id,
   count(*)over(partition by t2.sku_id,t2.rymd) cn
  from
   (
    select
     t1.sku_id,
     add_months(t1.ymd,-row_number()over(partition by t1.sku_id order by t1.ymd)) rymd
    from
     (
      -- 求出1號商品和2號商品每個月的購買總額 並過濾掉沒有滿足指標的商品
      select
       sku_id,
       concat(substring(create_date,0,7),'-01') ymd,
       sum(price*sku_num)  sku_sum
      from
       order_detail
      where 
       sku_id=1 or sku_id=2
      group by
       sku_id,substring(create_date,0,7)
      having
       (sku_id=1 and sku_sum>=21000) or (sku_id=2 and sku_sum>=10000)
    )t1
  )t2
)t3
where 
 t3.cn>=2

2.24 根據商品銷售情況進行商品分類

2.24.1 題目需求

從訂單詳情表中(order_detail)對銷售件數對商品進行分類,0-5000爲冷門商品,5001-19999位一般商品,20000往上爲熱門商品,並求出不同類別商品的數量

結果如下:

Category(類型) Cn(數量)
一般商品 1
冷門商品 10
熱門商品 1

2.24.2 代碼實現

select
 t2.category,
 count(*) cn
from
 (
  select
   t1.sku_id,
   case 
   when  t1.sku_sum >=0 and t1.sku_sum<=5000 then '冷門商品'
   when  t1.sku_sum >=5001 and t1.sku_sum<=19999 then '一般商品'
   when  t1.sku_sum >=20000 then '熱門商品'
   end  category
  from
   (
    select
     sku_id,
     sum(sku_num)  sku_sum
    from
     order_detail
    group by
     sku_id
  )t1
)t2
group by t2.category

2.25 各品類銷量前三的所有商品

2.25.1 題目需求

從訂單詳情表中(order_detail)和商品(sku_info)中查詢各個品類銷售數量前三的商品。如果該品類小於三個商品,則輸出所有的商品銷量。

結果如下:

Sku_id(商品id) Category_id(品類id)
2 1
4 1
1 1
8 2
7 2
5 2
12 3
11 3
10 3

2.25.2 代碼實現

select
 t2.sku_id,
 t2.category_id
from
 (
  select
   t1.sku_id,
   si.category_id,
   rank()over(partition by category_id order by t1.sku_sum desc) rk
  from
   (
    -- 去重 匯聚
    select 
     sku_id,
     sum(sku_num) sku_sum
    from
     order_detail
    group by
     sku_id
  )t1
  join
   sku_info si
  on
   t1.sku_id=si.sku_id
  )t2
where 
 t2.rk<=3;

2.26 各品類中商品價格的中位數

2.26.1 題目需求

從商品(sku_info)中求中位數如果是偶數則輸出中間兩個值的平均值,如果是奇數,則輸出中間數即可。

結果如下:

Category_id(品類id) Medprice(中位數)
1 3500.0
2 1250.0
3 510.0

2.26.2 代碼實現

select category_id,
 cast(avg(price) as decimal(10,2) )  as medprice
from(
    -- 求個每個品類 價格排序 商品數量 
   select
    sku_id,
    category_id,
    price,
    row_number()over(partition by category_id order by price desc) rn,
   count(1)over(partition by category_id) cnt
   from sku_info
)t1
-- 偶數取中間, 奇數取中間值
where if(cnt %2 = 0,rn in(cnt/2,cnt/2+1), rn = (cnt+1)/2)
group by category_id

2.27 找出銷售額連續3天超過100的商品

2.27.1 題目需求

從訂單詳情表(order_detail)中找出銷售額連續3天超過100的商品

結果如下:

Sku_id(商品id)
1
10
11
12
2
3
4
5
6
7
8
9

2.27.2代碼實現

統計相同日期差值的個數,同2.2的方案2思路一致

select
  distinct sku_id
  from(
 select sku_id
  from
   (
    select
     sku_id,
     create_date,
     date_sub(create_date,row_number()over(partition by sku_id order by create_date)) diff
    from
     (
      select
       sku_id,
       create_date,
       sum(price*sku_num) sku_sum
      from
       order_detail
      group by
       sku_id,create_date
      having 
       sku_sum>=100
    )t1 -- 每個商品每天的銷售總額
  )t2
  group by sku_id,diff
  -- 判斷連續三天以上
  having count(diff)>=3
  )t3

2.28 查詢有新註冊用戶的當天的新用戶數量、新用戶的第一天留存率

2.28.1 題目需求

從用戶登錄明細表(user_login_detail)中首次登錄算作當天新增,第二天也登錄了算作一日留存

結果如下:

first_login(註冊時間) Register(新增用戶數) Retention(留存率)
2021-09-21 1 0.0
2021-09-22 1 0.0
2021-09-23 1 0.0
2021-09-24 1 0.0
2021-09-25 1 0.0
2021-09-26 1 0.0
2021-09-27 1 0.0
2021-10-04 2 0.5
2021-10-06 1 0.0

2.28.2 代碼實現

-- 新增數量和留存率
select
 t3.first_login,
 t3.register,
 cast (t3.remain_1/t3.registeras as decimal(10,2)) as retention
from
 (
  -- 每個用戶首次登錄時間 和 第二天是否登錄 並看每天新增和留存數量
  select
   t1.first_login,
   count(t1.user_id) register,
   count(t2.user_id) remain_1
  from
   (
   -- 首次登錄時間
   select
    user_id,
    date_format(min(login_ts),'yyyy-MM-dd')  first_login
   from
    user_login_detail
   group by
    user_id
   )t1
  left join
   user_login_detail t2
  on
   t1.user_id=t2.user_id and datediff(date_format(t2.login_ts,'yyyy-MM-dd'),t1.first_login)=1
  group by
   t1.first_login
)t3

2.29 求出商品連續售賣的時間區間

2.29.1 題目需求

從訂單詳情表(order_detail)中,求出商品連續售賣的時間區間

結果如下(截取部分):

Sku_id(商品id) Start_date(起始時間) End_date(結束時間)
1 2021-09-27 2021-09-27
1 2021-09-30 2021-10-01
1 2021-10-03 2021-10-08
10 2021-10-02 2021-10-03
10 2021-10-05 2021-10-08
11 2021-10-02 2021-10-08
12 2021-09-30 2021-09-30
12 2021-10-02 2021-10-06
12 2021-10-08 2021-10-08

2.29.2 代碼實現

-- 拿到每次售賣的區間
select
 distinct
 sku_id,
 first_value(t1.create_date)over(partition by t1.sku_id,t1.ddrk order by t1.create_date  rows between unbounded preceding and unbounded following) start_date,
 last_value(t1.create_date)over(partition by t1.sku_id,t1.ddrk order by t1.create_date  rows between unbounded preceding and unbounded following) end_date
from
 (
  -- 每個商品售賣的日期以及拿到按排序後日期的差值
  select
   sku_id,
   create_date,
   date_sub(create_date,rank()over(partition by sku_id order by create_date)) ddrk
  from
   order_detail
  group by
   sku_id,create_date
)t1

新學到的窗口函數

  • first_value取分組內排序後,截止到當前行,第一個值

  • last_value取分組內排序後,截止到當前行,最後一個值

2.30 登錄次數及交易次數統計

2.30.1 題目需求

分別從登陸明細表(user_login_detail)和配送信息表中用戶登錄時間和下單時間統計登陸次數和交易次數

結果如下(截取部分):

User_id(用戶id) Login_date(登錄時間) login_count(登陸次數) order_count(交易次數)
101 2021-09-21 1 0
101 2021-09-27 1 1
101 2021-09-28 1 1
101 2021-09-29 1 1
101 2021-09-30 1 1
1010 2021-09-27 1 0
1010 2021-10-09 1 0
102 2021-09-22 1 0
102 2021-10-01 2 3

2.30.2 代碼實現

-- 拿到每個用戶每天的交易次數
select
 t1.user_id,
 t1.login_date,
 collect_set(t1.login_count)[0] login_count ,
 count(di.user_id) order_count
from
 (
  -- 拿到每個用戶每天的登錄次數
  -- 去重 聚合
  select
   user_id,
   date_format(login_ts,'yyyy-MM-dd') login_date,
   count(*) login_count
  from
   user_login_detail
  group by
   user_id,date_format(login_ts,'yyyy-MM-dd')
)t1
left join
 delivery_info di
on
 t1.user_id=di.user_id and t1.login_date=di.order_date
group by
 t1.user_id,t1.login_date

2.31 按年度列出每個商品銷售總額

2.31.1 題目需求

從訂單明細表(order_detail)中列出每個商品每個年度的購買總額

結果如下(截取部分):

Sku_id(商品id) Year_date(年份) Sku_sum(銷售總額)
1 2021 102000.00
10 2021 29900.00
11 2021 16000.00
12 2021 413640.00
2 2021 60440.00
3 2021 180000.00
4 2021 318000.00
5 2021 121000.00
6 2021 64000.00
7 2021 25200.00
8 2021 151800.00
9 2021 194000.00

2.31.2 代碼實現

select
 sku_id,
 year(create_date) year_date,
 sum(price*sku_num) sku_sum
from
 order_detail
group by
 sku_id,year(create_date)

2.32. 某周內每件商品每天銷售情況

2.32.1 題目需求

從訂單詳情表(order_detail)中查詢2021年9月27號-2021年10月3號這一週所有商品每天銷售情況。

結果如下:

Sku_id(商品id) Monday Tuesday Wednesday Thursday Friday Saturday Sunday
1 0 0 9 8 0 4 2
10 0 0 0 0 48 69 0
11 0 0 0 0 15 61 0
12 0 0 43 0 31 20400 0
2 0 0 0 18 5800 0 0
3 0 0 0 6 0 1 5
4 9 0 0 8 1 5 0
5 33 0 0 0 24 47 0
6 0 0 0 1 5 8 0
7 0 37 0 17 0 20 0
8 0 46 0 48 39 0 0
9 0 12 0 45 0 0 0

2.32.2 代碼實現

select
 sku_id,
 sum(if(dayofweek(create_date)=2,sku_num,0)) Monday,
 sum(if(dayofweek(create_date)=3,sku_num,0)) Tuesday,
 sum(if(dayofweek(create_date)=4,sku_num,0)) Wednesday,
 sum(if(dayofweek(create_date)=5,sku_num,0)) Thursday,
 sum(if(dayofweek(create_date)=6,sku_num,0)) Friday,
 sum(if(dayofweek(create_date)=7,sku_num,0)) Saturday,
 sum(if(dayofweek(create_date)=1,sku_num,0)) Sunday
from
 order_detail
where
 create_date>='2021-09-27' and create_date<='2021-10-03'
group by
 sku_id
  • dayofweek() 獲取一個日期是星期幾的方法

2.33 查看每件商品的售價漲幅情況

2.33.1 題目需求

從商品價格變更明細表(sku_price_modify_detail),得到最近一次價格的漲幅情況,並按照漲幅升序排序。

結果如下:

Sku_id(商品id) Price_change(漲幅)
8 -200.00
9 -100.00
2 -70.00
11 -16.00
12 -15.00
3 1.00
5 10.00
10 10.00
7 12.00
6 12.00
1 100.00
4 400.00

2.33.2 代碼實現

-- 對每個商品按照修改日期倒序排序 並求出差值
select
 t1.sku_id,
 t1.price_change
from
 (
  -- 最近一次修改的價格
  select
   sku_id,
   new_price-lead(new_price,1,0)over(partition by sku_id order by change_date desc) price_change,
   rank()over(partition by sku_id order by change_date desc) rk
  from
   sku_price_modify_detail
)t1
where 
 rk=1
order by
 t1.price_change 

2.34 銷售訂單首購和次購分析

2.34.1 題目需求

通過商品信息表(sku_info)訂單信息表(order_info)訂單明細表(order_detail)分析如果有一個用戶成功下單兩個及兩個以上的購買成功的手機訂單(購買商品爲xiaomi 10,apple 12,小米13)那麼輸出這個用戶的id及第一次成功購買手機的日期和第二次成功購買手機的日期,以及購買手機成功的次數。

結果如下:

User_id(用戶id) First_date(首次時間) Last_value(末次時間) Cn(購買次數)
101 2021-09-27 2021-09-28 3
1010 2021-10-08 2021-10-08 2
102 2021-10-01 2021-10-01 3
103 2021-09-30 2021-10-02 2
104 2021-10-03 2021-10-03 3
105 2021-10-04 2021-10-04 2
106 2021-10-04 2021-10-05 3
107 2021-10-05 2021-10-05 3
108 2021-10-06 2021-10-06 3
109 2021-10-07 2021-10-07 3

2.34.2 代碼實現

select
 distinct oi.user_id,
 first_value(od.create_date)over(partition by oi.user_id order by od.create_date rows between unbounded preceding and unbounded following ) first_date,
 last_value(od.create_date)over(partition by oi.user_id order by od.create_date rows between unbounded preceding and unbounded following ) last_date,
 count(*)over(partition by oi.user_id order by od.create_date rows between unbounded preceding and unbounded following) cn
from
  order_info oi
join
  order_detail od
on
 oi.order_id=od.order_id
join
 sku_info si
on
 od.sku_id=si.sku_id
where
 si.name in('xiaomi 10','apple 12','xiaomi 13')

2.35 同期商品售賣分析表

2.35.1 題目需求

從訂單明細表(order_detail)中。

求出同一個商品在2021年和2022年中同一個月的售賣情況對比。

結果如下(截取部分):

Sku_id(商品id) Month(月份) 2020_skusum(2020銷售量) 2021_skusum(2021銷售量)
1 9 0 11
1 10 2 38
1 10 94 205
11 10 95 225
12 9 0 43
12 10 83 20556
2 10 26 6018
3 9 0 5
3 10 1 30
4 9 0 9

2.35.2 代碼實現

select
 if(t1.sku_id is null,t2.sku_id,t1.sku_id) as sku_id,
 month(if(t1.ym is null,t2.ym,t1.ym)) as month, 
 if(t1.sku_sum is null ,0 ,t1.sku_sum) as 2020_skusum,
 if(t2.sku_sum is null ,0 ,t2.sku_sum) as 2021_skusum
from
 (
  -- 2020年銷售量
  select
   sku_id,
   concat(date_format(create_date,'yyyy-MM'),'-01') ym,
   sum(sku_num) sku_sum
  from
   order_detail
  where
   year(create_date)=2020
  group by
   sku_id,date_format(create_date,'yyyy-MM')
)t1
full join
 (
  -- 2021年銷售量
  select
   sku_id,
   concat(date_format(create_date,'yyyy-MM'),'-01')  ym,
   sum(sku_num) sku_sum
  from
   order_detail
  where
   year(create_date)=2021
  group by
   sku_id,date_format(create_date,'yyyy-MM')
)t2
on
 t1.sku_id=t2.sku_id and month(t1.ym) = month(t2.ym)

2.36 國慶期間每個品類的商品的收藏量和購買量

2.36.1 題目需求

從訂單明細表(order_detail)和收藏信息表(favor_info)統計2021國慶期間,每個商品總收藏量和購買量

結果如下:

Sku_id Sku_sum(購買量) Favor_cn(收藏量)
1 38 1
10 205 2
11 225 2
12 20556 0
2 6018 1
3 30 0
4 44 2
5 209 1
6 26 1
7 180 1
8 148 0
9 182 1

2.36.2 代碼實現

select
 t1.sku_id,
 t1.sku_sum,
 t2.favor_cn
from
 (
  -- 國慶期間 每個商品的總購買量
  select
   sku_id,
   sum(sku_num) sku_sum
  from
   order_detail
  where
   create_date>='2021-10-01' and create_date<='2021-10-07'
  group by 
   sku_id
)t1
join
 (
  -- 國慶期間 每個商品的總收藏量
  select
   sku_id,
   count(*) favor_cn
  from
   favor_info
  where
   create_date>='2021-10-01' and create_date<='2021-10-07'
  group by 
   sku_id
)t2
on 
 t1.sku_id=t2.sku_id

2.37 統計活躍間隔對用戶分級結果

2.37.1 題目需求

用戶等級:

忠實用戶:近7天活躍且非新用戶

新晉用戶:近7天新增

沉睡用戶:近7天未活躍但是在7天前活躍

流失用戶:近30天未活躍但是在30天前活躍

假設今天是數據中所有日期的最大值,從用戶登錄明細表中的用戶登錄時間給各用戶分級,求出各等級用戶的人數

結果如下:

Level(用戶等級) Cn(用戶數量)
忠實用戶 6
新增用戶 3
沉睡用戶 1

2.37.2 代碼實現

select
 t2.level,
 count(*) as cn
from
 (
  select
   uld.user_id,
   case
     when (date_format(max(uld.login_ts),'yyyy-MM-dd') <=date_sub(today, 30))
       then '流失用戶'-- 最近登錄時間三十天前
     when (date_format(min(uld.login_ts),'yyyy-MM-dd') <=date_sub(today, 7) and date_format(max(uld.login_ts),'yyyy-MM-dd') >=date_sub(today, 7))
       then '忠實用戶' -- 最早登陸時間是七天前,並且最近七天登錄過
     when (date_format(min(uld.login_ts),'yyyy-MM-dd') >=date_sub(today, 7))
       then '新增用戶' -- 最早登錄時間是七天內
     when (date_format(min(uld.login_ts),'yyyy-MM-dd') <= date_sub(today, 7) and date_format(max(uld.login_ts),'yyyy-MM-dd') <= date_sub(today, 7))
       then '沉睡用戶'-- 最早登陸時間是七天前,最大登錄時間也是七天前
    end level
  from
   user_login_detail  uld
  join
   (
    select 
     date_format(max(login_ts),'yyyy-MM-dd') today 
    from
     user_login_detail
  )t1
  on
   1=1
  group by 
   uld.user_id,t1.today
  )t2
group by
 t2.level

2.38 連續簽到領金幣數

2.38.1 題目需求

用戶每天簽到可以領1金幣,並可以累計簽到天數,連續簽到的第3、7天分別可以額外領2和6金幣。

每連續簽到7天重新累積簽到天數。

從用戶登錄明細表中求出每個用戶金幣總數,並按照金幣總數倒序排序

結果如下:

User_id(用戶id) Sum_coin_cn(金幣總數)
101 7
109 3
107 3
102 3
106 2
104 2
103 2
1010 2
108 1
105 1

2.38.2 代碼實現

兩個難點:

  • 如何確定簽到日期是否連續?

  • 如何確定每一次簽到獲取金幣的數量?

-- 求出每個用戶的金幣總數
select
 t3.user_id,
 sum(t3.coin_cn) sum_coin_cn
from 
 (
  -- 求出金幣數量,以及簽到獎勵的金幣數量
  select
   t2.user_id,
   max(t2.counti_cn)+sum(if(t2.counti_cn%3=0,2,0))+sum(if(t2.counti_cn%7=0,6,0)) coin_cn
  from
   (
  -- 求連續並標誌是連續的第幾天
  select
     t1.user_id,
     t1.login_date,
     date_sub(t1.login_date,t1.rk) login_date_rk,
     count(*)over(partition by t1.user_id, date_sub(t1.login_date,t1.rk) order by t1.login_date) counti_cn
    from
     (
     -- 窗口排序
     select
      user_id,
      date_format(login_ts,'yyyy-MM-dd') login_date,
      rank()over(partition by user_id order by date_format(login_ts,'yyyy-MM-dd')) rk
     from
      user_login_detail
     group by
      user_id,date_format(login_ts,'yyyy-MM-dd')
    )t1
  )t2
  group by
   t2.user_id,t2.login_date_rk
  )t3
group by
 t3.user_id
order by
 sum_coin_cn desc

2.39 國慶期間的7日動銷率和滯銷率

2.39.1 題目需求

動銷率定義爲品類商品中一段時間內有銷量的商品佔當前已上架總商品數的比例(有銷量的商品/已上架總商品數)。

滯銷率定義爲品類商品中一段時間內沒有銷量的商品佔當前已上架總商品數的比例。(沒有銷量的商品 / 已上架總商品數)。

只要當天任一店鋪有任何商品的銷量就輸出該天的結果

從訂單明細表(order_detail)和商品信息表(sku_info)表中求出國慶7天每天每個品類的商品的動銷率和滯銷率

結果如下(截取部分):

Category_id(品類id) 1號(動銷) 1號(滯銷) 2號(動銷) 2號(滯銷) 3號(動銷) 3號(滯銷)
1 1.0 0.0 0.5 0.5 0.75 0.25
2 0.75 0.25 0.75 0.25 0.75 0.25
3 0.25 0.75 0.75 0.25 0.75 0.25

2.39.2 代碼實現

-- 每一天的動銷率 和 滯銷率
select
 t2.category_id,
  cast(t2.`第1天`/t3.cn as decimal(10,2)) as first_sale_rate,
  cast(1-t2.`第1天`/t3.cn as decimal(10,2)) as first_unsale_rate,
  cast(t2.`第2天`/t3.cn as decimal(10,2)) as second_sale_rate,
  cast(1-t2.`第2天`/t3.cn as decimal(10,2))as second_unsale_rate,
  cast(t2.`第3天`/t3.cn as decimal(10,2))as third_sale_rate,
  cast(1-t2.`第3天`/t3.cn as decimal(10,2))as third_unsale_rate,
  cast(t2.`第4天`/t3.cn as decimal(10,2))as fourth_sale_rate,
  cast(1-t2.`第4天`/t3.cn as decimal(10,2))as fourth_unsale_rate,
  cast(t2.`第5天`/t3.cn as decimal(10,2))as fifth_sale_rate,
  cast(1-t2.`第5天`/t3.cn as decimal(10,2))as fifth_unsale_rate,
  cast(t2.`第6天`/t3.cn as decimal(10,2))as sixth_sale_rate,
  cast(1-t2.`第6天`/t3.cn as decimal(10,2))as sixth_unsale_rate,
  cast(t2.`第7天`/t3.cn as decimal(10,2))as seventh_sale_rate,
  cast(1-t2.`第7天`/t3.cn as decimal(10,2))as seventh_unsale_rate
from
 (
  -- 國慶每一天 每個商品品類有多少商品被銷售了
  select
   t1.category_id,
   sum(if(t1.create_date='2021-10-01',1,0)) `第1天`,
   sum(if(t1.create_date='2021-10-02',1,0)) `第2天`,
   sum(if(t1.create_date='2021-10-03',1,0)) `第3天`,
   sum(if(t1.create_date='2021-10-04',1,0)) `第4天`,
   sum(if(t1.create_date='2021-10-05',1,0)) `第5天`,
   sum(if(t1.create_date='2021-10-06',1,0)) `第6天`,
   sum(if(t1.create_date='2021-10-07',1,0)) `第7天`
  from
   (
    select 
     distinct 
     si.category_id,
     od.create_date,
     si.name
    from 
     order_detail od
    join
     sku_info si
    on
     od.sku_id=si.sku_id
    where
     od.create_date>='2021-10-01' and od.create_date<='2021-10-07'
  )t1
  group by
   t1.category_id
  )t2
join
 (
  select
   category_id,
   count(*) cn
  from
   sku_info
  group by
   category_id
  )t3
on 
 t2.category_id=t3.category_id

2.40 同時在線最多的人數

2.40.1 題目需求

根據用戶登錄明細表(user_login_detail),求出平臺同時在線最多的人數。

結果如下:

Cn(人數)
7

2.40.2 代碼實現

  • 時間有交集,纔會出現某一是個用戶同時在線

  • 登入+1

  • 登出-1

-- 拿到最大值 就是同時在線最多人數
select
 max(sum_l_time) as cn
from
 (
  -- 按照時間求和
  select
   -- 窗口內求和
   sum(flag)over(order by t1.l_time rows between unbounded preceding and current row) sum_l_time
  from
   (
     -- 登錄標記1 登出標記-1
    select
     login_ts l_time,
     1 flag
    from
     user_login_detail
    union
    select
     logout_ts l_time,
     -1 flag
    from
     user_login_detail
  )t1 
)t2

當order by後面的rows/range缺失時,默認是range between unbounded preceding and current row

rows是基於行數,range是基於值的大小

  • range是邏輯窗口,是指定當前行對應值的範圍取值,列數不固定,只要行值在範圍內,對應列都包含在內
  • rows是物理窗口,即根據order by 子句排序後,取的前N行及後N行的數據計算(與當前行的值無關,只與排序後的行號相關)

如 row 2 preceding,表示取前N行的數據計算,如 range 2 preceding,取前N行與當前行的差值不超過2的數據計算

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章