機器學習-感知機(Perceptron)-Scikit-Learn

Section I: Load package
#Section 1: Load package
from sklearn import datasets
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Perceptron
from sklearn.metrics import accuracy_score

plt.rcParams['figure.dpi']=200
plt.rcParams['savefig.dpi']=200
font = {'family': 'Times New Roman',
        'weight': 'light'}
plt.rc("font", **font)
Section II: Load data and split them into train/test dataset
#Section 2: Load data and split it into train/test dataset
iris=datasets.load_iris()
X=iris.data[:,[2,3]]
y=iris.target

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=1,stratify=y)
print('Label counts in y:',np.bincount(y))
Section III: Train perceptron model
#Section 3: Train perceptron model
sc=StandardScaler()
sc.fit(X_train)
X_train_std=sc.transform(X_train)
X_test_std=sc.transform(X_test)

ppn=Perceptron(n_iter_no_change=40,eta0=0.1,random_state=1)
ppn.fit(X_train_std,y_train)
y_pred=ppn.predict(X_test_std)
print('Misclassified samples: %d' % (y_test!=y_pred).sum())
print('Accuracy: %.2f' % accuracy_score(y_test,y_pred))
print('Accuracy: %.2f' % ppn.score(X_test_std,y_test))
Section IV: Visualize decision boundary

此小節包含兩個部分,分別爲“plot_decision_regions”函數的定義和調用及可視化參數調整。
第一部分

from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt
import numpy as np

def plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02):
    markers=('s','x','o','^','v')
    colors=('red','blue','lightgreen','gray','cyan')
    cmap=ListedColormap(colors[:len(np.unique(y))])
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, resolution),
                         np.arange(y_min, y_max, resolution))

    Z = classifier.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    plt.contourf(xx, yy, Z, alpha=0.4,cmap=cmap)
    plt.scatter(X[:, 0], X[:, 1], c=y, alpha=0.8)

    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())

    for idx,cl in enumerate(np.unique(y)):
        plt.scatter(x=X[y==cl,0],
                    y=X[y==cl,1],
                    alpha=0.8,
                    c=colors[idx],
                    marker=markers[idx],
                    label=cl,
                    edgecolors='black')

    #Highlight test samples
    if test_idx:
        X_test,y_test=X[test_idx,:],y[test_idx]
        plt.scatter(X_test[:,0],X_test[:,1],c='',edgecolor='black',alpha=1.0,
                    linewidth=1,marker='o',s=100,label='test set')

第二部分

#Section 4: Visualize decision boundary
from Perceptron_Sklearn.visualize import plot_decision_regions

X_combined_std=np.vstack((X_train_std,X_test_std))
y_combined=np.hstack((y_train,y_test))

plot_decision_regions(X=X_combined_std,
                      y=y_combined,
                      classifier=ppn,
                      test_idx=range(105,150))
plt.xlabel('petal length [standardized]')
plt.ylabel('petal width [standardized]')
plt.legend(loc='upper left')
plt.show()

在這裏插入圖片描述
小結
基於此,以petal長度和寬度兩位特徵,構造劃分空間。以Sklearn_Learn包的感知機Perceptron進行訓練,並以plot_decision_regions函數輔助可視化後,決策邊界可分爲3個部分。不同於自編Perceptron,Sklearn的感知機支持多分類,主要在於OvR的應用,即每分類一個,其它不同於此類的均視作第二種類別。

參考文獻
Sebastian Raschka, Vahid Mirjalili. Python機器學習第二版. 南京:東南大學出版社,2018.

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章