StratifiedShuffleSplit | StratifiedKFold | |
訓練集、測試集劃分 |
通過指定 test_size 或train_size |
通過指定 cv |
是否支持shuffle | 是 | 是 |
區別 | 保證每組訓練集(或測試集)的數據分佈一致,但不一定與原始數據分佈一致 | 保證訓練集(或測試集)與原始數據的分佈一致。 |
from sklearn.model_selection import StratifiedShuffleSplit,StratifiedKFold
import numpy as np
X = np.array([
[1, 2], [3, 4], [1, 2], [3, 4], [1, 2],[3, 4], [1, 2], [3, 4],[3, 4],[3, 4],
[1, 2], [3, 4], [1, 2], [3, 4], [1, 2],[3, 4], [1, 2], [3, 4],[3, 4],[3, 4],
[1, 2], [3, 4], [1, 2], [3, 4], [1, 2],[3, 4], [1, 2], [3, 4],[3, 4],[3, 4],
[1, 2], [3, 4], [1, 2], [3, 4], [1, 2],[3, 4], [1, 2], [3, 4],[3, 4],[3, 4],
[1, 2], [3, 4], [1, 2], [3, 4], [1, 2],[3, 4], [1, 2], [3, 4],[3, 4],[3, 4]])
y = np.array([
0, 0, 0, 0, 1, 1, 0, 0, 1, 1,
0, 0, 0, 0, 1, 1, 0, 0, 1, 1,
0, 0, 0, 0, 1, 1, 0, 0, 1, 1,
0, 0, 0, 0, 1, 1, 0, 0, 1, 1 ,
0, 0, 0, 0, 1, 1, 0, 0, 1 ,1])
ss=StratifiedShuffleSplit(n_splits=5,tra,random_state=0)#分成5組,測試比例爲0.25,
for train_index, test_index in ss.split(X, y):
y_train, y_test = y[train_index], y[test_index]#類別集對應的值
print(y_train,y_test)
print('/n')
[1 1 1 1 0 0 0 1 1 0 1 0 0 0 0 1 0 1 0 1 0 0 0 1 0 0 0 0 0 0 1 0 1 0 1]
[1 0 0 0 0 0 0 1 1 0 1 1 0 1 0]
[0 0 0 0 0 0 0 0 1 1 0 0 1 1 1 0 0 0 0 1 1 1 0 1 0 0 1 0 1 0 0 1 1 1 0]
[0 1 1 1 1 1 0 1 0 0 0 0 0 0 0]
[1 1 0 0 1 1 0 0 0 1 1 0 0 0 0 0 0 1 1 0 1 0 0 1 0 1 1 0 0 0 0 0 0 1 1]
[0 0 1 0 1 0 0 0 0 1 0 1 1 1 0]
[1 1 1 1 1 1 0 0 0 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0 0 0 0 0]
[1 0 0 1 0 0 0 0 0 1 1 0 1 0 1]
[1 0 1 1 0 1 0 1 0 0 0 1 1 1 0 1 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 0 0]
[0 1 1 0 0 1 0 0 0 0 1 0 1 0 1]
sk=StratifiedKFold(n_splits=5, shuffle=True)
for train_idx, valid_idx in sk.split(X,y):
y_train, y_test = y[train_idx], y[valid_idx]#類別集對應的值
print(y_train,y_test)
print('/n')
[0 0 0 0 1 1 0 1 0 0 1 1 0 1 0 0 0 0 1 1 0 0 1 1 0 0 0 1 0 0 1 0 0 0 0 1 1 0 1 1]
[0 1 0 0 0 1 0 1 1 0]
[0 0 0 1 1 0 0 1 1 0 0 0 0 1 1 0 0 1 0 0 1 1 0 0 0 0 0 0 1 1 1 1 0 0 0 1 1 0 0 1]
[0 1 0 0 1 1 0 0 0 1]
[0 0 0 0 1 1 0 1 1 0 0 0 0 0 1 1 0 0 1 1 0 0 1 1 0 0 0 1 0 0 1 0 0 0 1 1 0 0 1 1]
[0 0 1 1 0 0 0 1 1 0]
[0 0 1 0 0 1 0 0 0 0 1 1 0 1 1 0 0 0 0 1 0 1 1 0 0 0 0 1 1 0 0 1 1 0 0 0 1 0 1 1]
[0 0 1 1 0 1 0 0 1 0]
[0 0 0 1 0 0 1 1 0 0 0 1 1 0 0 1 1 0 0 0 0 1 0 1 1 0 0 1 1 0 0 1 1 0 0 0 1 0 0 1]
[0 1 0 1 0 0 0 0 1 1]