from sklearn import datasets,svm
digits = datasets.load_digits() # load iris flower dataset
x_digits = digits.data # trian dataset
y_digits = digits.target # prediction dataset
svc = svm.SVC(C=1,kernel = 'linear') # build a linear regression model
svc.fit(x_digits[:-100],y_digits[:-100]).score(x_digits[-100:],y_digits[-100:]) # get a prediction accuracy score and return 0.979999999
# k-folds cross validation
import numpy as np
x_folds = np.array_split(x_digits,3)
y_folds = np.array_split(y_digits,3) # y_digits 數據是0到9的分類,len(y_folds)=3,每個子數據集599條數據向量
scores = list()
for k in range(3):
x_train = list(x_folds)
x_test = x_train.pop(k) # 排除一個折,剩餘的作爲測試數據集
x_train = np.concatenate(x_train) # 將訓練數據連接爲一個array
y_train = list(y_folds)
y_test = y_train.pop(k)
y_train = np.concatenate(y_train)
svc = svm.SVC( C=1,kernel='linear')
scores.append(svc.fit(x_train,y_train).score(x_test,y_test))
print(scores)
# new and easier method to calculate
#split method set indices for each iteration of the chosen CV strategy
# for instance
from sklearn.model_selection import KFold,cross_val_score
X = ["a","a","b","c","c","d","d"]
k_fold = KFold(n_splits=3)
for train_indices,test_indices in k_fold.split(X):
print("train:%s | test:%s" % (train_indices,test_indices))
cross_val_score(svc,x_digits,y_digits,cv=k_fold,n_jobs=-1)
# n_jobs=-1指計算調用CPU