【數據分析可視化】數據分箱技術Binning

分箱:抽象理解爲蘋果根據大小不同分級分箱
在這裏插入圖片描述

import numpy as np
import pandas as pd
from pandas import Series,DataFrame
# 模擬成績分箱
score_list = np.random.randint(35, 100, size=20)
score_list
array([93, 35, 83, 44, 56, 62, 37, 86, 44, 82, 49, 91, 49, 82, 53, 89, 47,
       56, 38, 86])
# 成績評級分段
bins = [0,59,70,80,100]
# 分箱(返回Categories類型)
score_cut = pd.cut(score_list, bins)
score_cut
[(80, 100], (0, 59], (80, 100], (0, 59], (0, 59], ..., (80, 100], (0, 59], (0, 59], (0, 59], (80, 100]]
Length: 20
Categories (4, interval[int64]): [(0, 59] < (59, 70] < (70, 80] < (80, 100]]
# 每一個分箱多少人
pd.value_counts(score_cut)
(0, 59]      11
(80, 100]     8
(59, 70]      1
(70, 80]      0
dtype: int64
# 將模擬的成績 放入DataFrame
df = DataFrame()
df['score'] = score_list
df
score
0 93
1 35
2 83
3 44
4 56
5 62
6 37
7 86
8 44
9 82
10 49
11 91
12 49
13 82
14 53
15 89
16 47
17 56
18 38
19 86
# 填充長度爲3的隨機字符串
df['student'] = [pd.util.testing.rands(3) for i in range(20)]
df
score student
0 93 8c1
1 35 cHy
2 83 6xy
3 44 6gY
4 56 tc5
5 62 r5T
6 37 3z3
7 86 vsy
8 44 F6h
9 82 hgC
10 49 xA9
11 91 iLZ
12 49 BVK
13 82 E9C
14 53 rbE
15 89 hSL
16 47 AIt
17 56 Gdk
18 38 AFX
19 86 JhU
# 利用pd.cut將數據處理並填充到DataFrame
pd.cut(df['score'], bins)
0     (80, 100]
1       (0, 59]
2     (80, 100]
3       (0, 59]
4       (0, 59]
5      (59, 70]
6       (0, 59]
7     (80, 100]
8       (0, 59]
9     (80, 100]
10      (0, 59]
11    (80, 100]
12      (0, 59]
13    (80, 100]
14      (0, 59]
15    (80, 100]
16      (0, 59]
17      (0, 59]
18      (0, 59]
19    (80, 100]
Name: score, dtype: category
Categories (4, interval[int64]): [(0, 59] < (59, 70] < (70, 80] < (80, 100]]
# cut的標籤化
df['Categories'] = pd.cut(df['score'], bins, labels=['low','ok','good','great'])
df
score student Categories
0 93 8c1 great
1 35 cHy low
2 83 6xy great
3 44 6gY low
4 56 tc5 low
5 62 r5T ok
6 37 3z3 low
7 86 vsy great
8 44 F6h low
9 82 hgC great
10 49 xA9 low
11 91 iLZ great
12 49 BVK low
13 82 E9C great
14 53 rbE low
15 89 hSL great
16 47 AIt low
17 56 Gdk low
18 38 AFX low
19 86 JhU great
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章