一、線性迴歸的基本要素
1、模型定義
建立基於輸入來計算輸出x1和x2來計算輸出 y 的表達式,也就是模型(model)。顧名思義,線性迴歸假設輸出與各個輸入之間是線性關係:
2、損失函數
通常選擇使用平方差作爲損失函數:
3、優化算法
當模型和損失函數形式較爲簡單時,上面的誤差最小化問題的解可以直接用公式表達出來。這類解叫作解析解(analytical solution)。本次使用的線性迴歸和平方誤差剛好屬於這個範疇。然而,大多數深度學習模型並沒有解析解,只能通過優化算法有限次迭代模型參數來儘可能降低損失函數的值。這類解叫作數值解(numerical solution)。
在求數值解的優化算法中,小批量隨機梯度下降(mini-batch stochastic gradient descent)在深度學習中被廣泛使用。它的算法很簡單:先選取一組模型參數的初始值,如隨機選取;接下來對參數進行多次迭代,使每次迭代都可能降低損失函數的值。在每次迭代中,先隨機均勻採樣一個由固定數目訓練數據樣本所組成的小批量(mini-batch)BB,然後求小批量中數據樣本的平均損失有關模型參數的導數(梯度),最後用此結果與預先設定的一個正數的乘積作爲模型參數在本次迭代的減小量。
代碼實現(註解)
import tensorflow as tf
print(tf.__version__) # 採用tensorflow2.1.0版本
from matplotlib import pyplot as plt
import random
num_inputs = 2 # 共有兩個特徵
num_examples = 1000 # 共有1000條數據
true_w = [2, -3.4] # 線性迴歸模型真是權重, w1=2 w2=-3.4
true_b = 4.2 # 線性迴歸的偏差
features = tf.random.normal((num_examples, num_inputs), stddev=1) # 生成數據
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b # 生成標籤
labels += tf.random.normal(labels.shape, stddev=0.01) # 對標籤增加服從均值爲0、標準差爲0.01的正態分佈的噪音
# 噪聲代表了數據集中無意義的干擾
def set_figsize(figsize=(3.5, 2.5)): # 展示數據圖像
plt.rcParams['figure.figsize'] = figsize
set_figsize()
plt.scatter(features[:, 1], labels, 1)
plt.show()
def data_iter(batch_size, features, labels): # 讀取數據
num_examples = len(features)
indices = list(range(num_examples))
random.shuffle(indices) # 打亂數據
for i in range(0, num_examples, batch_size):
j = indices[i: min(i + batch_size, num_examples)]
yield tf.gather(features, axis=0, indices=j), tf.gather(labels, axis=0, indices=j)
# 每次返回 batch_size (批量大小)個隨機樣本的特徵和標籤
batch_size = 10
# 讀取第一個小批量數據樣本並打印。每個批量的特徵形狀爲(10, 2),分別對應批量大小和輸入個數;標籤形狀爲批量大小
for X, y in data_iter(batch_size, features, labels):
print(X, y)
break
# 將權重初始化成均值爲0、標準差爲0.01的正態隨機數,偏差則初始化成0
w = tf.Variable(tf.random.normal((num_inputs, 1), stddev=0.01))
b = tf.Variable(tf.zeros((1,)))
def linreg(X, w, b): # 線性迴歸的矢量計算表達式
return tf.matmul(X, w) + b # tf.matmul 做乘法
def squared_loss(y_hat, y): # 平方損失來定義線性迴歸的損失函數
return (y_hat - tf.reshape(y, y_hat.shape)) ** 2 / 2
def sgd(params, lr, batch_size, grads): # 小批量隨機梯度下降算法
"""Mini-batch stochastic gradient descent."""
for i, param in enumerate(params):
param.assign_sub(lr * grads[i] / batch_size)
# 自動求梯度模塊計算得來的梯度是一個批量樣本的梯度和。我們將它除以批量大小來得到平均值
# 訓練模型
lr = 0.03 # 超參數
num_epochs = 3
net = linreg
loss = squared_loss
for epoch in range(num_epochs):
for X, y in data_iter(batch_size, features, labels):
with tf.GradientTape() as t:
t.watch([w, b])
l = loss(net(X, w, b), y)
grads = t.gradient(l, [w, b]) # 計算小批量隨機梯度
sgd([w, b], lr, batch_size, grads)
train_l = loss(net(features, w, b), labels)
print('epoch %d, loss %f' % (epoch + 1, tf.reduce_mean(train_l)))
數據圖像:
輸出:
tf.Tensor(
[[-0.34988216 1.7806039 ]
[-1.8917446 -0.13573697]
[-0.17453413 -0.34349066]
[ 0.5370717 1.1191437 ]
[-0.01104847 0.38538134]
[ 0.33266178 0.6521724 ]
[-0.33420295 -1.5981245 ]
[-0.70956767 -1.2573007 ]
[ 0.8754746 -1.1428775 ]
[-0.10740936 0.494466 ]], shape=(10, 2), dtype=float32) tf.Tensor(
[-2.5482752 0.8646454 5.0283184 1.4668634 2.8685973 2.6552503
8.958068 7.057969 9.846524 2.3113618], shape=(10,), dtype=float32)
epoch 1, loss 0.028413
epoch 2, loss 0.000093
epoch 3, loss 0.000049
代碼實現(無註解)
import tensorflow as tf
print(tf.__version__)
from matplotlib import pyplot as plt
import random
num_inputs = 2
num_examples = 1000
true_w = [2, -3.4]
true_b = 4.2
features = tf.random.normal((num_examples, num_inputs), stddev=1)
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += tf.random.normal(labels.shape, stddev=0.01)
def set_figsize(figsize=(3.5, 2.5)):
plt.rcParams['figure.figsize'] = figsize
set_figsize()
plt.scatter(features[:, 1], labels, 1)
plt.show()
def data_iter(batch_size, features, labels):
num_examples = len(features)
indices = list(range(num_examples))
random.shuffle(indices)
for i in range(0, num_examples, batch_size):
j = indices[i: min(i + batch_size, num_examples)]
yield tf.gather(features, axis=0, indices=j), tf.gather(labels, axis=0, indices=j)
batch_size = 10
for X, y in data_iter(batch_size, features, labels):
print(X, y)
break
w = tf.Variable(tf.random.normal((num_inputs, 1), stddev=0.01))
b = tf.Variable(tf.zeros((1,)))
def linreg(X, w, b):
return tf.matmul(X, w) + b
def squared_loss(y_hat, y):
return (y_hat - tf.reshape(y, y_hat.shape)) ** 2 / 2
def sgd(params, lr, batch_size, grads):
"""Mini-batch stochastic gradient descent."""
for i, param in enumerate(params):
param.assign_sub(lr * grads[i] / batch_size)
lr = 0.03
num_epochs = 3
net = linreg
loss = squared_loss
for epoch in range(num_epochs):
for X, y in data_iter(batch_size, features, labels):
with tf.GradientTape() as t:
t.watch([w, b])
l = loss(net(X, w, b), y)
grads = t.gradient(l, [w, b])
sgd([w, b], lr, batch_size, grads)
train_l = loss(net(features, w, b), labels)
print('epoch %d, loss %f' % (epoch + 1, tf.reduce_mean(train_l)))