Homework 3
報告
Problem 1
In this problem, we will write a program to implement the SVM algorithm. Let us start with a toy example (which can be found at SVM_matlab_Prof_olga_Veksler.pdf) and then work on more complicated cases. The toy example (credit goes to Prof. Olga Veksler, University of Western Ontario) provides detailed implementation of SVM using Matlab. It is noted that this example works in the original feature space, rather than the augmented one.
(a) (5 points) Try the toy example, and plot the separating hyperplane and the support vectors.
algorithm steps:
Step 1: Calculate the error
Step 2: Calculate the upper and lower limits and
Step 3: Calculate
Step 4: Update
Step 5: Clip according to the value range
Step 6: Update
Step 7: Update and
Step 8: Update b according to and
(b) (10 points) Train a SVM classifier with TrainSet1.txt*, and plot the separating hyperplane and the support vectors.
© (15 points) Design a quadratic kernel and train a SVM classier with TrainSet2.txt. Plot the separating boundary and support vectors in the original feature space.
Problem 2
DHS p276-Problem 33 (p225-33 in the translated book).
(Hint for (b): You may use the original feature space rather than the augmented feature space.)
This problem asks you to follow the Kuhn-Tucker theorem to convert the con-strained optimization problem in Support Vector Machines into a dual, unconstrained one. For SVMs, the goal is to find the minimum length weight vector subject to the (classification) constraints
where indicates the target category of each of the patterns . Note that and are augmented (by and , respectively).
(a) Consider the unconstrained optimization associated with :
In the space determined by the components of a, and the n (scalar) undetermined multipliers α k , the desired solution is a saddle point, rather than a global maximum or minimum. Explain.
(b)Next eliminate the dependency of this (“primal”) functional upon , i.e., reformulated the optimization in a dual form, by the following steps. Note that at the saddle point of the primal functional, we have
Solve for the partial derivatives and conclude
© The second derivative vanishing implies
KaTeX parse error: Undefined control sequence: \
at position 111: …\mathbf{y}_{k}
\̲
̲
and thus
KaTeX parse error: Undefined control sequence: \
at position 2:
\̲
̲\mathbf{a}_{r}=…
since we can thus write the solution in augmented form as
KaTeX parse error: Undefined control sequence: \
at position 2:
\̲
̲\mathbf{a}=\sum…
(d) According to the Kuhn-Tucker theorem (cf. Bibliography), an undetermined multiplier is non-zero only if the corresponding sample satisfies Show that this can be expressed as
KaTeX parse error: Undefined control sequence: \
at position 2:
\̲
̲\alpha_{k}^{*}\…
(The samples where are nonzero, i.e., are the support vectors.)
(e) Use the results from parts to eliminate the weight vector in the functional, and thereby construct the dual functional
KaTeX parse error: Undefined control sequence: \
at position 2:
\̲
̲\tilde{L}(\math…
(f) Substitute the solution a* from part © to find the dual functional
KaTeX parse error: Undefined control sequence: \
at position 2:
\̲
̲\tilde{L}(\bold…
代碼
線性SVM
import time
import matplotlib.pyplot as plt
import numpy as np
import random
import types
import math
# 讀取數據
def loadDataSet(fileName):
dataMat = [] # 數據矩陣
labelMat = [] # 標籤矩陣
file = open(fileName)
for line in file.readlines(): #逐行讀取,濾除空格等
lineArr = line.strip().split('\t')
dataMat.append([float(lineArr[0]), float(lineArr[1])]) #添加數據
labelMat.append(float(lineArr[2])) #添加標籤
return dataMat, labelMat
# 數據可視化
def showDataSet(dataMat, labelMat):
"""
dataMat:數據矩陣
labelMat:數據標籤
"""
data_plus = [] #正樣本
data_minus = [] #負樣本
for i in range(len(dataMat)):
if labelMat[i] > 0:
data_plus.append(dataMat[i])
else:
data_minus.append(dataMat[i])
data_plus_np = np.array(data_plus) #轉換爲numpy矩陣
data_minus_np = np.array(data_minus) #轉換爲numpy矩陣
plt.scatter(np.transpose(data_plus_np)[0],
np.transpose(data_plus_np)[1]) #正樣本散點圖
plt.scatter(
np.transpose(data_minus_np)[0],
np.transpose(data_minus_np)[1]) #負樣本散點圖
plt.show()
# 隨機選擇alpha
def selectJrand(alpha, n):
'''
alpha: 拉格朗日乘子
n: alpha參數個數
'''
j = alpha
while (j == alpha):
j = int(random.uniform(0, n))
return j
# 修剪alpha
def clipAlpha(aj, H, L):
'''
aj: alpha值
H: alpha上界
L: alpha下界
'''
if aj > H:
aj = H
if L > aj:
aj = L
return aj
The algorithm steps:
Step 1: Calculate the error
Step 2: Calculate the upper and lower limits and
Step 3: Calculate
Step 4: Update
Step 5: Clip according to the value range
Step 6: Update
Step 7: Update and
Step 8: Update b according to and
# SMO算法實現
def smoSimple(dataMatIn, classLabels, C, toler, maxIter):
'''
dataMatIn: 數據矩陣
classLabels: 標籤矩陣
C: 懲罰因子
tole: 鬆弛變量
maxIter: 最大迭代次數
'''
#轉換爲numpy的mat存儲
dataMatrix = np.mat(dataMatIn)
labelMat = np.mat(classLabels).transpose()
#初始化b參數
b = 0
#統計dataMatrix的維度
m, n = np.shape(dataMatrix)
#初始化alpha參數,設爲0
alphas = np.mat(np.zeros((m, 1)))
#初始化迭代次數
iter_num = 0
#最多迭代matIter次
while (iter_num < maxIter):
alphaPairsChanged = 0
for i in range(m):
#步驟1:計算誤差Ei
fXi = float(
np.multiply(alphas, labelMat).T *
(dataMatrix * dataMatrix[i, :].T)) + b
Ei = fXi - float(labelMat[i])
#優化alpha,並設定一定的容錯率。
if ((labelMat[i] * Ei < -toler) and
(alphas[i] < C)) or ((labelMat[i] * Ei > toler) and
(alphas[i] > 0)):
#隨機選擇另一個與alpha_i成對優化的alpha_j
j = selectJrand(i, m)
#步驟1:計算誤差Ej
fXj = float(
np.multiply(alphas, labelMat).T *
(dataMatrix * dataMatrix[j, :].T)) + b
Ej = fXj - float(labelMat[j])
#保存更新前的aplpha值,使用深拷貝
alphaIold = alphas[i].copy()
alphaJold = alphas[j].copy()
#步驟2:計算上下界L和H
if (labelMat[i] != labelMat[j]):
L = max(0, alphas[j] - alphas[i])
H = min(C, C + alphas[j] - alphas[i])
else:
L = max(0, alphas[j] + alphas[i] - C)
H = min(C, alphas[j] + alphas[i])
if L == H:
continue
#步驟3:計算eta
eta = 2.0 * dataMatrix[i, :] * dataMatrix[j, :].T - dataMatrix[
i, :] * dataMatrix[i, :].T - dataMatrix[j, :] * dataMatrix[
j, :].T
if eta >= 0:
continue
#步驟4:更新alpha_j
alphas[j] -= labelMat[j] * (Ei - Ej) / eta
#步驟5:修剪alpha_j
alphas[j] = clipAlpha(alphas[j], H, L)
if (abs(alphas[j] - alphaJold) < 0.00001):
continue
#步驟6:更新alpha_i
alphas[i] += labelMat[j] * labelMat[i] * (alphaJold -
alphas[j])
#步驟7:更新b_1和b_2
b1 = b - Ei - labelMat[i] * (
alphas[i] - alphaIold
) * dataMatrix[i, :] * dataMatrix[i, :].T - labelMat[j] * (
alphas[j] -
alphaJold) * dataMatrix[i, :] * dataMatrix[j, :].T
b2 = b - Ej - labelMat[i] * (
alphas[i] - alphaIold
) * dataMatrix[i, :] * dataMatrix[j, :].T - labelMat[j] * (
alphas[j] -
alphaJold) * dataMatrix[j, :] * dataMatrix[j, :].T
#步驟8:根據b_1和b_2更新b
if (0 < alphas[i]) and (C > alphas[i]): b = b1
elif (0 < alphas[j]) and (C > alphas[j]): b = b2
else: b = (b1 + b2) / 2.0
#統計優化次數
alphaPairsChanged += 1
#更新迭代次數
if (alphaPairsChanged == 0): iter_num += 1
else: iter_num = 0
return b, alphas
# 分類結果可視化
def showClassifer(dataMat, w, b):
"""
dataMat: 數據矩陣
w: 直線法向量
b: 直線截距
"""
#繪製樣本點
data_plus = [] #正樣本
data_minus = [] #負樣本
for i in range(len(dataMat)):
if labelMat[i] > 0:
data_plus.append(dataMat[i])
else:
data_minus.append(dataMat[i])
data_plus_np = np.array(data_plus) #轉換爲numpy矩陣
data_minus_np = np.array(data_minus) #轉換爲numpy矩陣
plt.scatter(np.transpose(data_plus_np)[0],
np.transpose(data_plus_np)[1],
s=30,
alpha=0.7) #正樣本散點圖
plt.scatter(np.transpose(data_minus_np)[0],
np.transpose(data_minus_np)[1],
s=30,
alpha=0.7) #負樣本散點圖
#繪製直線
x1 = max(dataMat)[0]
x2 = min(dataMat)[0]
a1, a2 = w
b = float(b)
a1 = float(a1[0])
a2 = float(a2[0])
y1, y2 = (-b - a1 * x1) / a2, (-b - a1 * x2) / a2
plt.plot([x1, x2], [y1, y2])
#找出支持向量點,並可視化
for i, alpha in enumerate(alphas):
if alpha > 0:
x, y = dataMat[i]
plt.scatter([x], [y],
s=150,
c='none',
alpha=0.7,
linewidth=1.5,
edgecolor='red')
plt.show()
# 計算w
def get_w(dataMat, labelMat, alphas):
"""
dataMat: 數據矩陣
labelMat: 標籤矩陣
alphas: alpha值
"""
alphas, dataMat, labelMat = np.array(alphas), np.array(dataMat), np.array(
labelMat) # 轉爲numpy數組
w = np.dot((np.tile(labelMat.reshape(1, -1).T, (1, 2)) * dataMat).T,
alphas) # 根據公式計算
return w.tolist()
path = 'C:/Users/86187/Desktop/HW3_data/'
dataMat, labelMat = loadDataSet(path + 'TrainSet1.txt')
showDataSet(dataMat, labelMat)
b,alphas = smoSimple(dataMat, labelMat, 0.6, 0.001, 50)
w = get_w(dataMat, labelMat, alphas)
showClassifer(dataMat, w, b)
# Toy sample
dataMat = [[1, 6], [1, 10], [4, 11], [5, 2], [7, 6], [10, 4]]
labelMat = np.transpose([1, 1, 1, -1, -1, -1])
b, alphas = smoSimple(dataMat, labelMat, 0.6, 0.001, 40)
w = get_w(dataMat, labelMat, alphas)
showClassifer(dataMat, w, b)
非線性可分SVM
import numpy as np
from numpy import linalg
import matplotlib.pyplot as plt
from sklearn.svm import SVC
%matplotlib inline
# 加上這兩行可以一次性輸出多個變量而不用print
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
# 讀取數據
def loadDataSet(fileName):
dataMat = [] # 數據矩陣
labelMat = [] # 標籤矩陣
file = open(fileName)
for line in file.readlines(): #逐行讀取,濾除空格等
lineArr = line.strip().split('\t')
dataMat.append([float(lineArr[0]), float(lineArr[1])]) #添加數據
labelMat.append(float(lineArr[2])) #添加標籤
return dataMat, labelMat
# 數據可視化
def showDataSet(dataMat, labelMat):
"""
dataMat:數據矩陣
labelMat:數據標籤
"""
data_plus = [] #正樣本
data_minus = [] #負樣本
for i in range(len(dataMat)):
if labelMat[i] > 0:
data_plus.append(dataMat[i])
else:
data_minus.append(dataMat[i])
data_plus_np = np.array(data_plus) #轉換爲numpy矩陣
data_minus_np = np.array(data_minus) #轉換爲numpy矩陣
plt.scatter(np.transpose(data_plus_np)[0],
np.transpose(data_plus_np)[1]) #正樣本散點圖
plt.scatter(
np.transpose(data_minus_np)[0],
np.transpose(data_minus_np)[1]) #負樣本散點圖
plt.show()
# 數據可視化觀察
path = 'C:/Users/86187/Desktop/HW3_data/'
X, y = loadDataSet(path + 'TrainSet2.txt')
showDataSet(X, y)
def plot_svc_decision_function(model,ax=None):
if ax is None:
ax = plt.gca()
xlim = ax.get_xlim()
ylim = ax.get_ylim()
x = np.linspace(xlim[0],xlim[1],30)
y = np.linspace(ylim[0],ylim[1],30)
Y,X = np.meshgrid(y,x)
xy = np.vstack([X.ravel(), Y.ravel()]).T
P = model.decision_function(xy).reshape(X.shape)
ax.contour(X, Y, P,colors="k",levels=[-1,0,1],alpha=0.5,linestyles=["--","-","--"])
ax.set_xlim(xlim)
ax.set_ylim(ylim)
X = np.array(X); y = np.array(y)
clf = SVC(kernel = "linear").fit(X,y)
plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap="rainbow")
plot_svc_decision_function(clf)
clf.score(X,y)
0.8
#定義一個由x計算出來的新維度r
r = np.exp(-(X**2).sum(1))
rlim = np.linspace(min(r),max(r),100)
from mpl_toolkits import mplot3d
#定義一個繪製三維圖像的函數
#elev表示上下旋轉的角度
#azim表示平行旋轉的角度
def plot_3D(elev=30,azim=30,X=X,y=y):
ax = plt.subplot(projection="3d")
ax.scatter3D(X[:,0],X[:,1],r,c=y,s=50,cmap='rainbow')
ax.view_init(elev=elev,azim=azim)
ax.set_xlabel("x")
ax.set_ylabel("y")
ax.set_zlabel("r")
plt.show()
plot_3D()
#如果放到jupyter notebook中運行
plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap="rainbow")
def plot_svc_decision_function(model,ax=None):
if ax is None:
ax = plt.gca()
xlim = ax.get_xlim()
ylim = ax.get_ylim()
x = np.linspace(xlim[0],xlim[1],30)
y = np.linspace(ylim[0],ylim[1],30)
Y,X = np.meshgrid(y,x)
xy = np.vstack([X.ravel(), Y.ravel()]).T
P = model.decision_function(xy).reshape(X.shape)
ax.contour(X, Y, P,colors="k",levels=[-1,0,1],alpha=0.5,linestyles=["--","-","--"])
ax.set_xlim(xlim)
ax.set_ylim(ylim)
clf = SVC(kernel = "linear").fit(X,y)
plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap="rainbow")
plot_svc_decision_function(clf)
r = np.exp(-(X**2).sum(1))
rlim = np.linspace(min(r),max(r),0.2)
from mpl_toolkits import mplot3d
def plot_3D(elev=30,azim=30,X=X,y=y):
ax = plt.subplot(projection="3d")
ax.scatter3D(X[:,0],X[:,1],r,c=y,s=50,cmap='rainbow')
ax.view_init(elev=elev,azim=azim)
ax.set_xlabel("x")
ax.set_ylabel("y")
ax.set_zlabel("r")
plt.show()
# 進行交互
from ipywidgets import interact,fixed
interact(plot_3D,elev=[0,30,60,90],azip=(-180,180),X=fixed(X),y=fixed(y))
plt.show()
<function __main__.plot_3D(elev=30, azim=30, X=array([[-51.84331797, -57.01754386],
[-51.84331797, -48.24561404],
[-47.69585253, -30.11695906],
[-44.00921659, -11.98830409],
[-33.87096774, 16.08187135],
[-27.41935484, 27.77777778],
[-13.59447005, 38.88888889],
[ -5.76036866, 39.47368421],
[ 18.20276498, 38.30409357],
[ 35.25345622, 31.28654971],
[ 47.69585253, 21.92982456],
[ 59.67741935, 10.81871345],
[ 61.98156682, 30.70175439],
[ 62.90322581, 42.98245614],
[ 55.06912442, 55.84795322],
[ 36.6359447 , 68.12865497],
[ 1.15207373, 74.56140351],
[ 43.5483871 , 51.75438596],
[ -1.15207373, 58.77192982],
[-32.94930876, 68.12865497],
[ -0.69124424, 20.1754386 ],
[-11.29032258, 11.40350877],
[-22.35023041, -4.97076023],
[-27.41935484, -28.94736842],
[-29.26267281, -48.83040936],
[-28.80184332, -64.61988304],
[-20.96774194, -42.39766082],
[-10.36866359, -11.40350877],
[ -2.99539171, 0.87719298],
[ 7.60368664, 20.1754386 ],
[ 16.359447 , 1.4619883 ],
[ 43.5483871 , -29.53216374],
[ 41.24423963, 9.64912281],
[ 25.57603687, -18.42105263],
[ 4.83870968, -31.87134503],
[ 9.44700461, -14.32748538],
[ 19.12442396, -51.75438596],
[-10.36866359, -65.78947368],
[ 59.21658986, -16.66666667],
[ 67.51152074, -44.73684211]]), y=array([-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
-1., -1., -1., -1., -1., -1., -1., 1., 1., 1., 1., 1., 1.,
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
1.]))>
clf = SVC(kernel = "linear").fit(X,y)
plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap="rainbow")
plot_svc_decision_function(clf)
clf.score(X,y)
1.0
# 多種核函數進行實驗比較
Kernel = ["linear","poly","rbf","sigmoid"]
from sklearn import svm
fig, axes = plt.subplots(1, ncols,figsize=(20,4))
#在圖像中的第一列,放置原數據的分佈
ax = axes[0]
ax.set_title("Input data")
ax.scatter(X[:, 0], X[:, 1], c=y, zorder=10, cmap=plt.cm.Paired,edgecolors='k')
ax.set_xticks(())
ax.set_yticks(())
nrows=1
ncols=len(Kernel) + 1
#循環:在不同的核函數中循環
#從圖像的第二列開始,一個個填充分類結果
for est_idx, kernel in enumerate(Kernel):
#定義子圖位置
ax = axes[est_idx + 1]
#建模
clf = svm.SVC(kernel=kernel, gamma=2).fit(X, y)
score = clf.score(X, y)
#繪製圖像本身分佈的散點圖
ax.scatter(X[:, 0], X[:, 1], c=y
,zorder=10
,cmap=plt.cm.Paired,edgecolors='k')
#繪製支持向量
ax.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=50,
facecolors='none', zorder=10, edgecolors='k')# facecolors='none':透明的
#繪製決策邊界
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
#np.mgrid,合併了我們之前使用的np.linspace和np.meshgrid的用法
#一次性使用最大值和最小值來生成網格
#表示爲[起始值:結束值:步長]
#如果步長是複數,則其整數部分就是起始值和結束值之間創建的點的數量,並且結束值被包含在內
XX, YY = np.mgrid[x_min:x_max:200j, y_min:y_max:200j]
#np.c_,類似於np.vstack的功能
Z = clf.decision_function(np.c_[XX.ravel(), YY.ravel()]).reshape(XX.shape)
#填充等高線不同區域的顏色
ax.pcolormesh(XX, YY, Z > 0, cmap=plt.cm.Paired)
#繪製等高線
ax.contour(XX, YY, Z, colors=['k', 'k', 'k'], linestyles=['--', '-', '--'],
levels=[-1, 0, 1])
#設定座標軸爲不顯示
ax.set_xticks(())
ax.set_yticks(())
#將標題放在第一行的頂上
ax.set_title(kernel)
#爲每張圖添加分類的分數
ax.text(0.95, 0.06, ('%.2f' % score).lstrip('0')
, size=15
, bbox=dict(boxstyle='round', alpha=0.8, facecolor='white')
#爲分數添加一個白色的格子作爲底色
, transform=ax.transAxes #確定文字所對應的座標軸,就是ax子圖的座標軸本身
, horizontalalignment='right' #位於座標軸的什麼方向
)
plt.tight_layout()
plt.show()
學習筆記
參考內容
- https://cuijiahua.com/blog/2017/11/ml_8_svm_1.html
- https://cuijiahua.com/blog/2017/11/ml_9_svm_2.html
- https://space.bilibili.com/97068901/