sklearn學習筆記——svm

三維圖顯示

from sklearn.datasets import make_blobs
from sklearn.datasets import make_circles
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
import numpy as np
x,y=make_circles(n_samples=250, random_state=20,factor=0.5)
r=np.exp(-(x**2)).sum(1)
rlim=np.linspace(min(r),max(r),0.2)

def plot_3D(elev=30,azim=30,x=x,y=y):
    ax=plt.subplot(projection='3d')
    ax.scatter3D(x[:,0],x[:,1],r,c=y,s=10)
    ax.view_init(elev=elev,azim=azim)
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_zlabel('r')
    plt.show()
plot_3D(elev=70,azim=30,x=x,y=y)

在這裏插入圖片描述

from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
from sklearn import svm
from sklearn.datasets import make_blobs
X,y=make_blobs(n_samples=50,centers=2,random_state=2,cluster_std=0.6)
def plot_svm_decision_bounder(X,y):
    
    ax=plt.gca()
    xlim=ax.get_xlim()
    ylim=ax.get_ylim()
    axis=np.linspace(xlim[0],xlim[1],40)
    ayis=np.linspace(ylim[0],ylim[1],40)
    axis,ayis=np.meshgrid(axis,ayis)
    xy=np.vstack([axis.ravel(),ayis.ravel()]).T

    clf=svm.SVC(kernel='linear')
    clf=clf.fit(X,y)
    p=clf.decision_function(xy).reshape(axis.shape)#計算網格點到決策邊界距離
    ax.contour(axis,ayis,p,color='k',
               levels=[-1,0,1],alpha=0.5,linstyles=['--','-','--'])
    ax.set_xlim(xlim)
    ax.ylim=(ylim)

plt.scatter(X[:,0],X[:,1],c=y,s=10)
plot_svm_decision_bounder(X,y)

在這裏插入圖片描述
t調參

data=load_breast_cancer()
X=data.data
y=data.target
np.unique(y)
X_train,X_test,Y_train,Y_test=train_test_split(data.data,data.target,test_size=0.3)
#help(svm.SVC)
kernels=[ 'linear', 'poly', 'rbf', 'sigmoid']
for kernel in kernels:
    time0=time()
    clf=svm.SVC(kernel=kernel
                ,degree=1
                ,gamma='auto'
                ,cache_size=5000).fit(X_train,Y_train)
    print('the accuracy under kernel %s is %f' %(kernel,clf.score(X_test,Y_test)))
    #print(datetime.datetime.fromtimestamp(time()-time0).strftime('%M:%S:%f'))

the accuracy under kernel linear is 0.964912
the accuracy under kernel poly is 0.959064
the accuracy under kernel rbf is 0.619883
the accuracy under kernel sigmoid is 0.619883

查看數據分佈

import pandas as pd
data=pd.DataFrame(X)
a=data.describe([0.01,0.05,0.1,0.25,0.5,0.75,0.9,0.99]).T#返回一個描述系列
a=data.describe()
from sklearn.preprocessing import StandardScaler
X=StandardScaler().fit_transform(X)#數據標準化

X_train,X_test,Y_train,Y_test=train_test_split(X,y,test_size=0.3)
#help(svm.SVC)
kernels=[ 'linear', 'poly', 'rbf', 'sigmoid']
for kernel in kernels:
    time0=time()
    clf=svm.SVC(kernel=kernel
                ,degree=1
                ,gamma='auto'
                ,cache_size=5000).fit(X_train,Y_train)
    print('the accuracy under kernel %s is %f' %(kernel,clf.score(X_test,Y_test)))
    #print(datetime.datetime.fromtimestamp(time()-time0).strftime('%M:%S:%f'))

the accuracy under kernel linear is 0.976608
the accuracy under kernel poly is 0.982456
the accuracy under kernel rbf is 0.976608
the accuracy under kernel sigmoid is 0.959064
score=[]
gamma_ranges=np.logspace(-10,1,50)
for i in gamma_ranges:
    clf=svm.SVC(kernel='rbf'
                ,gamma=i
                ,cache_size=5000).fit(X_train,Y_train)
    score.append(clf.score(X_test,Y_test))
print(max(score),gamma_ranges[score.index(max(score))])
plt.plot(gamma_ranges,score)
plt.show()

在這裏插入圖片描述
0.9883040935672515 0.020235896477251554

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedShuffleSplit#劃分測試樣本
gamma_ranges=np.logspace(-10,1,10)
coef0_ranges=np.logspace(0,5,10)
param_grid=dict(gamma=gamma_ranges,coef0=coef0_ranges)
cv=StratifiedShuffleSplit(n_splits=5,test_size=0.3,random_state=50)
clf=svm.SVC(kernel='poly',cache_size=5000,degree=1)
Gs=GridSearchCV(clf,param_grid,cv=10)
Gs.fit(X,y)
#help(StratifiedShuffleSplit)
#clf=svm.SVC(kernel='poly',cache_size=5000,degree=1).fit(X,y)
Gs.best_params_
Gs.best_score_
#調懲罰係數
score=[]
C_ranges=np.linspace(0.01,30,50)
for i in C_ranges:
    clf=svm.SVC(kernel='rbf'
                ,C=i
                ,gamma=0.020235896477251554
                ,cache_size=5000).fit(X_train,Y_train)
    score.append(clf.score(X_test,Y_test))
print(max(score),C_ranges[score.index(max(score))])
plt.plot(C_ranges,score)
plt.show()

在這裏插入圖片描述
0.9883040935672515 1.2340816326530613

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章