吳恩達第二課第二週編程作業

本次作業目的

認識課程中講述的三種優化算法:梯度下降,具有動量的梯度下降算法,Adam優化後的梯度下降。

觀察三種算法的優化方式和效果

下載地址

鏈接:https://pan.baidu.com/s/1av5v-tEbnx0cMjIlLLk_JQ 
提取碼:wk8o 

代碼:

# 1. 分割數據集
#   2. 優化梯度下降算法:
#      2.1 不使用任何優化算法
#      2.2 mini-batch梯度下降法
#      2.3 使用具有動量的梯度下降算法
#      2.4 使用Adam算法
# -*- coding: utf-8 -*-

import numpy as np
import matplotlib.pyplot as plt
import scipy.io
import math
import sklearn
import sklearn.datasets
import opt_utils
import testCase

plt.rcParams['figure.figsize'] = (7.0, 4.0)
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
#梯度下降
def update_parameters_with_gd(parameters, grads, learning_rate):
    L=len(parameters)//2

    for l in range(1,L):
        parameters['W'+str(l+1)]=parameters['W'+str(l+1)]-learning_rate*grads['dW'+str(l+1)]
        parameters['b' + str(l+1)] = parameters['b' + str(l+1)] - learning_rate * grads['db' + str(l+1)]

    return parameters
#mini_batch梯度下降
def random_mini_batches(X,Y,mini_batches_size=64,seed=0):
    np.random.seed(seed)
    m=X.shape[1]
    mini_batches=[]
    #打亂順序
    permutation=list(np.random.permutation(m))#返回一個m長的隨機數列表
    shuffled_X=X[:,permutation]
    shuffled_Y=Y[:,permutation].reshape((1,m))
    #分割
    num_complete_minibatches =math.floor(m/mini_batches_size)#可能會出現無法整除的現象
    for k in range(0,num_complete_minibatches):
        mini_batch_X=shuffled_X[:,k*mini_batches_size:(k+1)*mini_batches_size]
        mini_batch_Y=shuffled_Y[:,k*mini_batches_size:(k+1)*mini_batches_size]
        mini_batche=(mini_batch_X,mini_batch_Y)
        mini_batches.append(mini_batche)
    #無法整除的現象
    if m/mini_batches_size!=0:
        mini_batch_X=shuffled_X[:,mini_batches_size*num_complete_minibatches:]
        mini_batch_Y=shuffled_Y[:,mini_batches_size*num_complete_minibatches:]
        mini_batche=(mini_batch_X,mini_batch_Y)
        mini_batches.append(mini_batche)
    return mini_batches
#包含動量的梯度下降
def initialize_velocity(parameters):
    L=len(parameters)//2
    v={}
    for l in range(L):
        v['dW'+str(l+1)]=np.zeros_like(parameters['W'+str(l+1)])
        v['db'+str(l+1)]=np.zeros_like(parameters['b'+str(l+1)])
    return v
def update_parameters_with_momentum(parameters,grads,v,beta,learning_rate):
    L=len(parameters)//2

    for l in range(L):
        v['dW'+str(l+1)]=beta*v['dW'+str(l+1)]+(1-beta)*grads['dW'+str(l+1)]
        v['db'+str(l+1)]=beta*v['db'+str(l+1)]+(1-beta)*grads['db'+str(l+1)]

        parameters['W'+str(l+1)]=parameters['W'+str(l+1)]-learning_rate*v['dW'+str(l+1)]
        parameters['b'+str(l+1)]=parameters['b'+str(l+1)]-learning_rate*v['db'+str(l+1)]
    return parameters,v
#Adam算法
def initialize_adam(parameters):
    L=len(parameters)//2
    v={}
    s={}

    for l in range(L):
        v['dW'+str(l+1)]=np.zeros_like(parameters['W'+str(l+1)])
        v['db'+str(l+1)]=np.zeros_like(parameters['b'+str(l+1)])

        s['dW'+str(l+1)]=np.zeros_like(parameters['W'+str(l+1)])
        s['db'+str(l+1)]=np.zeros_like(parameters['b'+str(l+1)])
    return (v,s)
def update_parameters_with_adam(parameters,grads,v,s,t,learning_rate=0.01,beta1=0.9,beta2=0.999,epsilon=1e-8):
    L=len(parameters)//2
    v_corrected={}
    s_corrected={}

    for l in range(L):
        v['dW'+str(l+1)]=beta1*v['dW'+str(l+1)]+(1-beta1)*grads['dW'+str(l+1)]
        v['db'+str(l+1)]=beta1*v['db'+str(l+1)]+(1-beta1)*grads['db'+str(l+1)]

        v_corrected['dW'+str(l+1)]=v['dW'+str(l+1)]/(1 - np.power(beta1,t))
        v_corrected['db'+str(l+1)]=v['db'+str(l+1)]/(1 - np.power(beta1,t))

        s['dW'+str(l+1)]=beta2*s['dW'+str(l+1)]+(1-beta2)*np.square(grads['dW'+str(l+1)])
        s['db'+str(l+1)]=beta2*s['db'+str(l+1)]+(1-beta2)*np.square(grads['db'+str(l+1)])

        s_corrected['dW'+str(l+1)]=s['dW'+str(l+1)]/(1-np.power(beta2,t))
        s_corrected['db'+str(l+1)]=s['db'+str(l+1)]/(1-np.power(beta2,t))

        parameters['W'+str(l+1)]=parameters['W'+str(l+1)]-learning_rate\
                                 *v_corrected['dW'+str(l+1)]/(np.sqrt(s_corrected['db'+str(l+1)])+epsilon)
        parameters['b'+str(l+1)]=parameters['b'+str(l+1)]-learning_rate\
                                 *v_corrected['db'+str(l+1)]/(np.sqrt(s_corrected['db'+str(l+1)])+epsilon)
    return parameters,v,s

train_X,train_Y=opt_utils.load_dataset(is_plot=False)
def model(X,Y,layers_dims,optimizer,learning_rate=0.0007,mini_batch_size=64
          ,beta=0.9,beta1=0.9,beta2=0.999,epsilon=1e-8,num_epochs=10000,print_cost=True,is_plot=True):
    L=len(layers_dims)
    costs=[]
    t=0#每次完成一個mini_batc便會加1
    seed=10

    #初始化
    parameters=opt_utils.initialize_parameters(layers_dims)
    #選擇優化器
    if optimizer=='gd':
        pass
    elif optimizer=='momentum':
        v=initialize_velocity(parameters)
    elif optimizer=='adam':
        v,s=initialize_adam(parameters)
    else:
        print('輸入錯誤')
        exit(1)
    for i in range(num_epochs):
        seed+=1#每次遍歷完一次數據後增加種子重新排序
        minibatches =random_mini_batches(X,Y,mini_batch_size,seed)
        for minibatch in minibatches :
            #選擇一個mini_batche
            (minibatch_X, minibatch_Y) = minibatch
            #前向傳播
            A3,cache=opt_utils.forward_propagation(minibatch_X,parameters)
            #計算誤差
            cost=opt_utils.compute_cost(A3,minibatch_Y)
            #反向傳播
            grads=opt_utils.backward_propagation(minibatch_X,minibatch_Y,cache)
            #更新參數
            if optimizer=='gd':
                parameters=update_parameters_with_gd(parameters,grads,learning_rate)
            elif optimizer=='momentum':
                parameters,v=update_parameters_with_momentum(parameters,grads,v,beta,learning_rate)
            elif optimizer=='adam':
                t=t+1
                parameters,v,s=update_parameters_with_adam(parameters,grads,v,s,t,learning_rate,beta1,beta2,epsilon)
        if i%100==0:
            costs.append(cost)
            if print_cost and i%1000==0:
                print('第',i,'次迭代,代價爲:'+str(cost))
    if is_plot:
        plt.plot(costs)
        plt.ylabel('cost')
        plt.xlabel('epochs (per 100)')
        plt.title("Learning rate = " + str(learning_rate))
        plt.show()
    return parameters


layers_dims = [train_X.shape[0], 5, 2, 1]
#使用Adam優化的梯度下降
parameters = model(train_X, train_Y, layers_dims, optimizer="adam",is_plot=True)



注:參考網址https://blog.csdn.net/u013733326/article/details/79907419

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章