Python源碼
#coding=utf-8
from sklearn.datasets import load_digits
#-------------
from sklearn.cross_validation import train_test_split
#-------------
#load data standardize model
from sklearn.preprocessing import StandardScaler
#load SVM:LinearSVC which is based on Linear hypothesis
from sklearn.svm import LinearSVC
#-------------
from sklearn.metrics import classification_report
#------------- store handwrite num datas on digits
digits=load_digits()
print 'Total dataset shape',digits.data.shape
#------------- data prepare
#75% training set,25% testing set
X_train,X_test,y_train,y_test=train_test_split(digits.data,digits.target,test_size=0.25,random_state=33)
print 'training data shape',y_train.shape
print 'testing data shape',y_test.shape
#------------- training
ss=StandardScaler()
X_train=ss.fit_transform(X_train)
X_test=ss.transform(X_test)
#initialize LinearSVC
lsvc=LinearSVC()
#training model
lsvc.fit(X_train,y_train)
#use trained model to predict testing dataset,and store the result on y_predict
y_predict=lsvc.predict(X_test)
#------------- performance measure
print 'The Accuracy is',lsvc.score(X_test,y_test)
print classification_report(y_test,y_predict,target_names=digits.target_names.astype(str))
Result:
Total dataset shape (1797, 64)
training data shape (1347,)
testing data shape (450,)
The Accuracy of Linear SVC is 0.953333333333
precision recall f1-score support
0 0.92 1.00 0.96 35
1 0.96 0.98 0.97 54
2 0.98 1.00 0.99 44
3 0.93 0.93 0.93 46
4 0.97 1.00 0.99 35
5 0.94 0.94 0.94 48
6 0.96 0.98 0.97 51
7 0.92 1.00 0.96 35
8 0.98 0.84 0.91 58
9 0.95 0.91 0.93 44
avg / total 0.95 0.95 0.95 450