sklearn學習筆記——svm

原創

2020-06-16 14:17

三維圖顯示

from sklearn.datasets import make_blobs
from sklearn.datasets import make_circles
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
import numpy as np
x,y=make_circles(n_samples=250, random_state=20,factor=0.5)
r=np.exp(-(x**2)).sum(1)
rlim=np.linspace(min(r),max(r),0.2)

def plot_3D(elev=30,azim=30,x=x,y=y):
    ax=plt.subplot(projection='3d')
    ax.scatter3D(x[:,0],x[:,1],r,c=y,s=10)
    ax.view_init(elev=elev,azim=azim)
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_zlabel('r')
    plt.show()
plot_3D(elev=70,azim=30,x=x,y=y)

from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
from sklearn import svm
from sklearn.datasets import make_blobs
X,y=make_blobs(n_samples=50,centers=2,random_state=2,cluster_std=0.6)
def plot_svm_decision_bounder(X,y):
    
    ax=plt.gca()
    xlim=ax.get_xlim()
    ylim=ax.get_ylim()
    axis=np.linspace(xlim[0],xlim[1],40)
    ayis=np.linspace(ylim[0],ylim[1],40)
    axis,ayis=np.meshgrid(axis,ayis)
    xy=np.vstack([axis.ravel(),ayis.ravel()]).T

    clf=svm.SVC(kernel='linear')
    clf=clf.fit(X,y)
    p=clf.decision_function(xy).reshape(axis.shape)#計算網格點到決策邊界距離
    ax.contour(axis,ayis,p,color='k',
               levels=[-1,0,1],alpha=0.5,linstyles=['--','-','--'])
    ax.set_xlim(xlim)
    ax.ylim=(ylim)

plt.scatter(X[:,0],X[:,1],c=y,s=10)
plot_svm_decision_bounder(X,y)

t調參

data=load_breast_cancer()
X=data.data
y=data.target
np.unique(y)
X_train,X_test,Y_train,Y_test=train_test_split(data.data,data.target,test_size=0.3)
#help(svm.SVC)
kernels=[ 'linear', 'poly', 'rbf', 'sigmoid']
for kernel in kernels:
    time0=time()
    clf=svm.SVC(kernel=kernel
                ,degree=1
                ,gamma='auto'
                ,cache_size=5000).fit(X_train,Y_train)
    print('the accuracy under kernel %s is %f' %(kernel,clf.score(X_test,Y_test)))
    #print(datetime.datetime.fromtimestamp(time()-time0).strftime('%M:%S:%f'))

the accuracy under kernel linear is 0.964912
the accuracy under kernel poly is 0.959064
the accuracy under kernel rbf is 0.619883
the accuracy under kernel sigmoid is 0.619883

查看數據分佈

import pandas as pd
data=pd.DataFrame(X)
a=data.describe([0.01,0.05,0.1,0.25,0.5,0.75,0.9,0.99]).T#返回一個描述系列
a=data.describe()

from sklearn.preprocessing import StandardScaler
X=StandardScaler().fit_transform(X)#數據標準化

X_train,X_test,Y_train,Y_test=train_test_split(X,y,test_size=0.3)
#help(svm.SVC)
kernels=[ 'linear', 'poly', 'rbf', 'sigmoid']
for kernel in kernels:
    time0=time()
    clf=svm.SVC(kernel=kernel
                ,degree=1
                ,gamma='auto'
                ,cache_size=5000).fit(X_train,Y_train)
    print('the accuracy under kernel %s is %f' %(kernel,clf.score(X_test,Y_test)))
    #print(datetime.datetime.fromtimestamp(time()-time0).strftime('%M:%S:%f'))

the accuracy under kernel linear is 0.976608
the accuracy under kernel poly is 0.982456
the accuracy under kernel rbf is 0.976608
the accuracy under kernel sigmoid is 0.959064

score=[]
gamma_ranges=np.logspace(-10,1,50)
for i in gamma_ranges:
    clf=svm.SVC(kernel='rbf'
                ,gamma=i
                ,cache_size=5000).fit(X_train,Y_train)
    score.append(clf.score(X_test,Y_test))
print(max(score),gamma_ranges[score.index(max(score))])
plt.plot(gamma_ranges,score)
plt.show()

0.9883040935672515 0.020235896477251554

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedShuffleSplit#劃分測試樣本
gamma_ranges=np.logspace(-10,1,10)
coef0_ranges=np.logspace(0,5,10)
param_grid=dict(gamma=gamma_ranges,coef0=coef0_ranges)
cv=StratifiedShuffleSplit(n_splits=5,test_size=0.3,random_state=50)
clf=svm.SVC(kernel='poly',cache_size=5000,degree=1)
Gs=GridSearchCV(clf,param_grid,cv=10)
Gs.fit(X,y)
#help(StratifiedShuffleSplit)
#clf=svm.SVC(kernel='poly',cache_size=5000,degree=1).fit(X,y)
Gs.best_params_
Gs.best_score_

#調懲罰係數
score=[]
C_ranges=np.linspace(0.01,30,50)
for i in C_ranges:
    clf=svm.SVC(kernel='rbf'
                ,C=i
                ,gamma=0.020235896477251554
                ,cache_size=5000).fit(X_train,Y_train)
    score.append(clf.score(X_test,Y_test))
print(max(score),C_ranges[score.index(max(score))])
plt.plot(C_ranges,score)
plt.show()

0.9883040935672515 1.2340816326530613

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

sklearn學習筆記——svm

EPnP: An Accurate O(n) Solution to the PnP Problem

多視圖幾何筆記二——從圖像恢復仿射度量性質

CVXPnPL論文筆記

相機標定——論文筆記

sklearn學習筆記——svm

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結