DL梯度下降法(3種梯度下降,代碼實戰)

學了本文你能學到什麼?僅供學習,如有疑問,請留言。。。

 

# -*- coding: utf-8 -*-
# Author       :   szy
# Create Date  :   2019/10/30
#三種梯度下降法
#1.隨機梯度下降(stochastic_gradient_descent)
s ='''
import numpy as np

X = 2*np.random.rand(100, 1)
y = 4 + 3*X + np.random.randn(100, 1)
X_b = np.c_[np.ones((100, 1)), X]

n_epochs = 10000
m = 100
t0, t1 = 5, 500

# 定義一個函數來調整學習率
def learning_rate_schedule(t):
    return t0/(t+t1)

theta = np.random.randn(2, 1)
for epoch in range(n_epochs):
    # 在雙層for循環之間,每個輪次開始分批次迭代之前打亂數據索引順序
    arr = np.arange(len(X_b))
    np.random.shuffle(arr)
    X_b = X_b[arr]
    y = y[arr]
    for i in range(m):
        xi = X_b[i:i+1]
        yi = y[i:i+1]
        gradients = xi.T.dot(xi.dot(theta)-yi)
        learning_rate = learning_rate_schedule(epoch*m + i)
        theta = theta - learning_rate * gradients

print(theta)
'''
#2.小批量梯度下降mini_batch_gradient_descent
b = """
import numpy as np

X = 2*np.random.rand(100, 1)
y = 4 + 3*X + np.random.randn(100, 1)
X_b = np.c_[np.ones((100, 1)), X]

t0, t1 = 5, 500

# 定義一個函數來調整學習率
def learning_rate_schedule(t):
    return t0/(t+t1)

n_epochs = 100000
m = 100
batch_size = 10
num_batches = int(m / batch_size)

theta = np.random.randn(2, 1)
for epoch in range(n_epochs):
    arr = np.arange(len(X_b))
    np.random.shuffle(arr)
    X_b = X_b[arr]
    y = y[arr]
    for i in range(num_batches):
        x_batch = X_b[i*batch_size: i*batch_size + batch_size]
        y_batch = y[i*batch_size: i*batch_size + batch_size]
        gradients = x_batch.T.dot(x_batch.dot(theta)-y_batch)
        learning_rate = learning_rate_schedule(epoch * m + i)
        theta = theta - learning_rate*gradients

print(theta)
"""

#3.批量梯度下降batch_gradient_descent
import numpy as np


# 創建數據集X,y
np.random.seed(1)
X = np.random.rand(100, 1)
y = 4 + 3*X + np.random.randn(100, 1)
X_b = np.c_[np.ones((100, 1)), X]

# 創建超參數
n_iterations = 10000

t0, t1 = 5, 500


# 定義一個函數來調整學習率
def learning_rate_schedule(t):
    return t0/(t+t1)


# 1,初始化θ, W0...Wn,標準正太分佈創建W
theta = np.random.randn(2, 1)

# 4,判斷是否收斂,一般不會去設定閾值,而是直接採用設置相對大的迭代次數保證可以收斂
for i in range(n_iterations):
    # 2,求梯度,計算gradient
    gradients = X_b.T.dot(X_b.dot(theta)-y)
    # 3,應用梯度下降法的公式去調整θ值 θt+1=θt-η*gradient
    learning_rate = learning_rate_schedule(i)
    theta = theta - learning_rate * gradients

print(theta)






 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章