Multiclass Support Vector Machine exercise
Complete and hand in this completed worksheet (including its outputs and any supporting code outside of the worksheet) with your assignment submission. For more details see the assignments page on the course website.
In this exercise you will:
- implement a fully-vectorized loss function for the SVM
- implement the fully-vectorized expression for its analytic gradient
- check your implementation using numerical gradient
- use a validation set to tune the learning rate and regularization strength
- optimize the loss function with SGD
- visualize the final learned weights
這個題目主要難點是 Loss 對
首先參考 lecture3 中的相關公式:
首先求一個樣本的
每一個大於零的項會給導數的兩個列帶來貢獻,對於
對
# linear_svm.py
import numpy as np
from random import shuffle
def svm_loss_naive(W, X, y, reg):
"""
Structured SVM loss function, naive implementation (with loops).
Inputs have dimension D, there are C classes, and we operate on minibatches
of N examples.
Inputs:
- W: A numpy array of shape (D, C) containing weights.
- X: A numpy array of shape (N, D) containing a minibatch of data.
- y: A numpy array of shape (N,) containing training labels; y[i] = c means
that X[i] has label c, where 0 <= c < C.
- reg: (float) regularization strength
Returns a tuple of:
- loss as single float
- gradient with respect to weights W; an array of same shape as W
"""
dW = np.zeros(W.shape) # initialize the gradient as zero
# compute the loss and the gradient
num_classes = W.shape[1]
num_train = X.shape[0]
num_dimension = W.shape[0]
loss = 0.0
for i in xrange(num_train):
scores = X[i].dot(W)
correct_class_score = scores[y[i]]
for j in xrange(num_classes):
if j == y[i]:
continue
margin = scores[j] - correct_class_score + 1 # note delta = 1
if margin > 0:
loss += margin
# calculate the dW, Sj - Syi + 1(j!=yi)
dW[:,j] += X[i,:].T
dW[:,y[i]] -= X[i,:].T
# Right now the loss is a sum over all training examples, but we want it
# to be an average instead so we divide by num_train.
loss /= num_train
dW /= num_train
# Add regularization to the loss.
loss += 0.5 * reg * np.sum(W * W)
dW += reg*W
#############################################################################
# TODO: #
# Compute the gradient of the loss function and store it dW. #
# Rather that first computing the loss and then computing the derivative, #
# it may be simpler to compute the derivative at the same time that the #
# loss is being computed. As a result you may need to modify some of the #
# code above to compute the gradient. #
#############################################################################
return loss, dW
def svm_loss_vectorized(W, X, y, reg):
"""
Structured SVM loss function, vectorized implementation.
Inputs and outputs are the same as svm_loss_naive.
"""
loss = 0.0
dW = np.zeros(W.shape) # initialize the gradient as zero
#############################################################################
# TODO: #
# Implement a vectorized version of the structured SVM loss, storing the #
# result in loss. #
#############################################################################
XW = X.dot(W)
num_train = X.shape[0]
Sy = np.zeros(num_train)
for i in xrange(num_train):
Sy[i] = XW[i, y[i]]
WX = XW.T - Sy + 1
for i in xrange(num_train):
WX[y[i],i] -= 1
loss = np.sum( WX[WX > 0] )
loss /= num_train
#############################################################################
# END OF YOUR CODE #
#############################################################################
#############################################################################
# TODO: #
# Implement a vectorized version of the gradient for the structured SVM #
# loss, storing the result in dW. #
# #
# Hint: Instead of computing the gradient from scratch, it may be easier #
# to reuse some of the intermediate values that you used to compute the #
# loss. #
#############################################################################
# keep only positive elements
XW = WX.T
num_classes = W.shape[1]
for i in xrange(num_train):
for j in xrange(num_classes):
if (XW[i, j] > 0):
dW[:,j] += X[i,:].T
dW[:,y[i]] -= X[i,:].T
dW /= num_train
dW += reg * W
#############################################################################
# END OF YOUR CODE #
#############################################################################
return loss, dW
# svm.ipynb
# Use the validation set to tune hyperparameters (regularization strength and
# learning rate). You should experiment with different ranges for the learning
# rates and regularization strengths; if you are careful you should be able to
# get a classification accuracy of about 0.4 on the validation set.
learning_rates = [1.4e-7, 1.5e-7, 1.6e-7]
regularization_strengths = [3e4, 3.1e4, 3.2e4, 3.3e4, 3.4e4]
# results is dictionary mapping tuples of the form
# (learning_rate, regularization_strength) to tuples of the form
# (training_accuracy, validation_accuracy). The accuracy is simply the fraction
# of data points that are correctly classified.
results = {}
best_val = -1 # The highest validation accuracy that we have seen so far.
best_svm = None # The LinearSVM object that achieved the highest validation rate.
################################################################################
# TODO: #
# Write code that chooses the best hyperparameters by tuning on the validation #
# set. For each combination of hyperparameters, train a linear SVM on the #
# training set, compute its accuracy on the training and validation sets, and #
# store these numbers in the results dictionary. In addition, store the best #
# validation accuracy in best_val and the LinearSVM object that achieves this #
# accuracy in best_svm. #
# #
# Hint: You should use a small value for num_iters as you develop your #
# validation code so that the SVMs don't take much time to train; once you are #
# confident that your validation code works, you should rerun the validation #
# code with a larger value for num_iters. #
################################################################################
params = [(x,y) for x in learning_rates for y in regularization_strengths]
for lrate, regular in params:
svm = LinearSVM()
loss_hist = svm.train(X_train, y_train, learning_rate=lrate, reg=regular,
num_iters=700, verbose=False)
y_train_pred = svm.predict(X_train)
accuracy_train = np.mean(y_train == y_train_pred)
y_val_pred = svm.predict(X_val)
accuracy_val = np.mean(y_val == y_val_pred)
results[(lrate, regular)]=(accuracy_train, accuracy_val)
if (best_val < accuracy_val):
best_val = accuracy_val
best_svm = svm
################################################################################
# END OF YOUR CODE #
################################################################################
# Print out results.
for lr, reg in sorted(results):
train_accuracy, val_accuracy = results[(lr, reg)]
print 'lr %e reg %e train accuracy: %f val accuracy: %f' % (
lr, reg, train_accuracy, val_accuracy)
print 'best validation accuracy achieved during cross-validation: %f' % best_val