吳恩達 machine learning 編程作業 python實現 ex5

# -*- coding: utf-8 -*-
"""
Created on Thu Jul  2 12:09:38 2020

@author: cheetah023
"""

import numpy as np
import matplotlib.pyplot as plt
import scipy.io as sci
import scipy.optimize as opt

#函數定義
def linearRegCostFunction(theta, X, y, lamda):
    m,n = X.shape
    #fmin優化函數輸出的theta是[n,],需要reshape
    theta = np.reshape(theta,[n,1])
    cost = np.sum((np.dot(X, theta) - y) ** 2) / (2 * m)
    reg = (lamda / (2 * m)) * np.sum(theta[1:] ** 2)
    cost = cost + reg
    return cost
def linearRegGradient(theta, X, y, lamda):
    (m,n) = X.shape
    theta = np.reshape(theta,[n,1])
    #print('theta',theta.shape)
    grad = np.dot(X.T, (np.dot(X, theta) - y)) / m
    #print('grad',grad.shape)
    reg = (lamda / m) * theta
    reg[0] = 0
    #print('reg',reg.shape)
    grad = grad + reg
    #輸出的grad需要時[n,],需要reshape
    grad = grad.reshape([n])
    return grad
def trainLinearReg(X, y, lamda):
    initial_theta = np.zeros([X.shape[1],1])
    theta = opt.fmin_cg(f=linearRegCostFunction, x0=initial_theta, fprime=linearRegGradient,
                     args=(X, y, lamda),
                     maxiter=50, disp=False)
    return theta
def learningCurve(X, y, Xval, yval, lamda):
    m = X.shape[0]
    error_train = np.zeros([m, 1])
    error_val   = np.zeros([m, 1])
    for i in range(0,m):
        theta = trainLinearReg(X[0:i+1,:], y[0:i+1], lamda)
        #注意!!!只有訓練的時候才使用lambda
        error_train[i] = linearRegCostFunction(theta, X[0:i+1,:], y[0:i+1], 0)
        error_val[i] = linearRegCostFunction(theta, Xval, yval, 0)
    return error_train,error_val
def polyFeatures(X, p):
    print('polyFeatures X',X.shape)
    X_poly = np.zeros([X.shape[0],p])
    for i in range(0,p):
        X_poly[:,i] = (X ** (i+1)).reshape([X.shape[0]])
    return X_poly
def featureNormalize(X):
    mu = np.mean(X,axis=0)
    #ddof=1纔是標準差
    sigma = np.std(X,axis=0,ddof=1)
    X_norm = (X-mu)/sigma
    return X_norm,mu,sigma
def plotFit(min_x, max_x, mu, sigma, theta, p):
    x = np.linspace(min_x - 15, max_x + 25, 50)
    X_poly = polyFeatures(x, p)
    X_poly = (X_poly - mu) / sigma
    ones = np.ones([X_poly.shape[0],1])
    X_poly = np.column_stack([ones,X_poly])
    plt.plot(x, np.dot(X_poly,theta), '--',c='b')
def validationCurve(X_poly, y, X_poly_val, yval):
    lambda_vec = np.array([0, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10])
    lambda_vec = np.reshape(lambda_vec,[len(lambda_vec),1])
    error_train = np.zeros([len(lambda_vec),1])
    error_val   = np.zeros([len(lambda_vec),1])
    for i in range(0,len(lambda_vec)):
        theta = trainLinearReg(X_poly, y, lambda_vec[i])
        #注意!!!只有訓練的時候才使用lambda
        error_train[i] = linearRegCostFunction(theta, X_poly, y, 0)
        error_val[i] = linearRegCostFunction(theta, X_poly_val, yval, 0)
    return lambda_vec,error_train,error_val
#Part 1: Loading and Visualizing Data
data = sci.loadmat('ex5data1.mat')
#print('data.keys',data.keys())
X = data['X']
y = data['y']
Xtest = data['Xtest']
ytest = data['ytest']
Xval = data['Xval']
yval = data['yval']
print('X:',X.shape)
print('y:',y.shape)
print('Xval:',Xval.shape)
print('yval:',yval.shape)
print('Xtest:',Xtest.shape)
print('ytest:',ytest.shape)

plt.figure(0)
plt.scatter(X,y,marker='x',c='r')
plt.xlabel('Change in water level (x)')
plt.ylabel('Water flowing out of the dam (y)')

#Part 2: Regularized Linear Regression Cost
theta = np.array([[1] ,[1]])
ones = np.ones([X.shape[0],1])
onesval = np.ones([Xval.shape[0],1])
onestest = np.ones([Xtest.shape[0],1])
X = np.column_stack([ones,X])
Xval = np.column_stack([onesval,Xval])
Xtest = np.column_stack([onestest,Xtest])
m,n = X.shape
cost = linearRegCostFunction(theta, X, y, 1)
print('Cost at theta = [1 ; 1]:',cost)
print('(this value should be about 303.993192)')

#Part 3: Regularized Linear Regression Gradient
grad = linearRegGradient(theta, X, y, 1)
print('Gradient at theta = [1 ; 1]:\n',grad)
print('(this value should be about \n[-15.303016; \n598.250744])')

#Part 4: Train Linear Regression
lamda = 0
theta = trainLinearReg(X, y, lamda)

print('theta_re',theta)
theta = theta.reshape([n,1])
h = np.dot(X,theta)
X_t = data['X']
plt.plot(X_t,h,'--',c='b')
#Part 5: Learning Curve for Linear Regression 
error_train,error_val = learningCurve(X, y, Xval, yval, lamda)
#print('error_train:',error_train)
#print('error_val:',error_val)
plt.figure(1)
plt.plot(range(0,m),error_train,range(0,m),error_val)
plt.title('Learning curve for linear regression')
plt.legend(['Train', 'Cross Validation'])
plt.xlabel('Number of training examples')
plt.ylabel('Error')

#Part 6: Feature Mapping for Polynomial Regression
Xtest_t = data['Xtest']
Xval_t = data['Xval']
p = 8
X_poly = polyFeatures(X_t, p)
#print('X_poly',X_poly)
[X_poly, mu, sigma] = featureNormalize(X_poly)
#print('X_poly',X_poly)
#print('mu',mu)
#print('sigma',sigma)
ones = np.ones([X_poly.shape[0],1])
X_poly = np.column_stack([ones,X_poly])

X_poly_test = polyFeatures(Xtest_t, p)
X_poly_test= (X_poly_test - mu) / sigma
ones = np.ones([X_poly_test.shape[0],1])
X_poly_test = np.column_stack([ones,X_poly_test])

X_poly_val = polyFeatures(Xval_t, p)
X_poly_val= (X_poly_val - mu) / sigma
ones = np.ones([X_poly_val.shape[0],1])
X_poly_val = np.column_stack([ones,X_poly_val])

#Part 7: Learning Curve for Polynomial Regression
lamda = 0
theta = trainLinearReg(X_poly, y, lamda)
#print('theta:',theta)
plt.figure(2)
plt.scatter(X_t,y,marker='x',c='r')
plotFit(np.min(X_t), np.max(X_t), mu, sigma, theta, p)
plt.xlabel('Change in water level (x)')
plt.ylabel('Water flowing out of the dam (y)')
plt.title ('Polynomial Regression Fit (lambda ={} )'.format(lamda))

plt.figure(3)
error_train,error_val = learningCurve(X_poly, y, X_poly_val, yval, lamda)
plt.plot(range(0,m),error_train,range(0,m),error_val)
plt.title('Polynomial Regression Learning Curve (lambda ={}'.format(lamda))
plt.legend(['Train', 'Cross Validation'])
plt.xlabel('Number of training examples')
plt.ylabel('Error')

for i in range(0,m):
    print("Training Examples:{},error_train:{},error_val:{}".format(i+1,
                                                         error_train[i],
                                                         error_val[i]))

#Part 8: Validation for Selecting Lambda
lambda_vec, error_train, error_val = validationCurve(X_poly, y, X_poly_val, yval)
plt.figure(4)
print('lambda_vec',lambda_vec.shape)
print('error_train',error_train.shape)
print('error_val',error_val.shape)
#plt.plot(lambda_vec, error_train, lambda_vec, error_val)
plt.plot(lambda_vec,error_train,label='Train')
plt.plot(lambda_vec,error_val,label='Cross Validation')
plt.legend(['Train', 'Cross Validation'])
plt.xlabel('lambda')
plt.ylabel('Error')
for i in range(0,len(lambda_vec)):
    print("lambda:{},error_train:{},error_val:{}".format(lambda_vec[i],
                                                         error_train[i],
                                                         error_val[i]))

運行結果:

X: (12, 1)
y: (12, 1)
Xval: (21, 1)
yval: (21, 1)
Xtest: (21, 1)
ytest: (21, 1)
Cost at theta = [1 ; 1]: 303.9931922202643
(this value should be about 303.993192)
Gradient at theta = [1 ; 1]:
 [-15.30301567 598.25074417]
(this value should be about 
[-15.303016; 
598.250744])
theta_re [13.08790351  0.36777923]
polyFeatures X (12, 1)
polyFeatures X (21, 1)
polyFeatures X (21, 1)
polyFeatures X (50,)
Training Examples:1,error_train:[0.],error_val:[160.72189969]
Training Examples:2,error_train:[3.08148791e-31],error_val:[160.12151033]
Training Examples:3,error_train:[1.91978245e-11],error_val:[61.75500548]
Training Examples:4,error_train:[5.966442e-24],error_val:[61.92889541]
Training Examples:5,error_train:[0.00028918],error_val:[7.10331359]
Training Examples:6,error_train:[4.79989886e-06],error_val:[10.60573856]
Training Examples:7,error_train:[0.05025379],error_val:[7.1182165]
Training Examples:8,error_train:[0.09112559],error_val:[5.24899194]
Training Examples:9,error_train:[0.21622546],error_val:[12.19001157]
Training Examples:10,error_train:[0.23174864],error_val:[10.17504437]
Training Examples:11,error_train:[0.21649865],error_val:[9.29555017]
Training Examples:12,error_train:[0.20825645],error_val:[21.22143456]
lambda_vec (10, 1)
error_train (10, 1)
error_val (10, 1)
lambda:[0.],error_train:[0.20825645],error_val:[21.22143456]
lambda:[0.001],error_train:[0.2226438],error_val:[20.09383565]
lambda:[0.003],error_train:[0.19333881],error_val:[22.39393165]
lambda:[0.01],error_train:[0.24293414],error_val:[16.31434454]
lambda:[0.03],error_train:[0.27992966],error_val:[13.08782916]
lambda:[0.1],error_train:[0.45931256],error_val:[7.58722655]
lambda:[0.3],error_train:[0.92176393],error_val:[4.63682878]
lambda:[1.],error_train:[2.07618042],error_val:[4.26062099]
lambda:[3.],error_train:[4.90134755],error_val:[3.82289867]
lambda:[10.],error_train:[16.09220999],error_val:[9.94550365]

總結:

1、lambda參數只在使用訓練優化求theta的時候使用,使用theta獲取train error和validation error的時候不用

2、np.std()使用的時候要加上ddof=1纔是求標準差

3、對於損失函數linearRegCostFunction(theta, X, y, lamda),fmin優化函數給它傳的theta爲[n, ],要reshape再使用(n爲theta長度)

4、對於梯度函數linearRegGradient(theta, X, y, lamda),它傳給fmin優化函數的grad需要爲[n, ]格式,也要reshape再輸出

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章