Back-Propagation Neural Network BP反向傳播網絡算法

01 網絡描述

BP網絡由大量簡單處理單元廣泛互聯而成,是一種對非線性函數進行權值訓練的多層映射網絡。具有優良的非線性映射能力,理論上它能夠以任意精度逼近任意非線性函數。本文采用BP神經網絡解決下列函數擬合問題。

函數逼近:設計一個神經網絡擬合下列的函數:

02 BP神經網絡模型的建立

(1)樣本數據的產生

爲建立函數擬合的BP神經網絡模型,一般要考慮以下幾步:

樣本數據的產生,這裏用來兩種方法,

  • 第一種方法先在x屬於[-π,π]區間內生成500個均勻分佈的數據點,然後對這500個樣本點的標籤進行shuffle,然後取前30%個點爲測試集,後70%個點爲訓練集。如圖所示。

  • 第二種方法在x屬於[-π,π]區間內生成350個均勻分佈的數據點爲訓練集,在[-π,π]區間內生成150個均勻分佈的數據點爲測試集

(2) 數據的預處理

通常需要將樣本數據歸一化,由於這裏的數據變化範圍不大,所以暫不考慮對數據的歸一化。

(3)BP神經網絡

一個典型的3層BP神經網絡結果如圖1所示,包括輸入層、隱藏層和輸出層。其中隱藏層的狀態影響輸入輸出之間的關係,及通過改變隱藏層的係數,就可以改變多層神經網絡的性能。

BP神經網絡的學習過程由正向傳播和反向傳播組成。
通過正向傳播算法,即通過矩陣乘法計算出輸出值,並將真實值和輸出值對比得到兩者之間的差距。
通過反向傳播算法,計算每個損失函數對模型中每個參數的梯度,通過梯度下降算法來更新每一個參數。

梯度下降法即比如我們目前處在一個大山的某處,不知道如何下山,於是決定走一步算一步,也就是每走到一個位置,求當前位置的梯度,沿着梯度的負方向,即當前最陡峭的位置向下走一步,然後繼續重複上步;一直走到我們覺得我們走到了山腳,當然,這樣我們可能走不到山腳,而是走到某一個局部的山峯低處。

當然如果損失函數是凸函數,梯度下降 得到的解一定是全局最優解。

BP 算法的實質是求取誤差函數最小值問題,通過多個樣本的反覆訓練,一般採用梯度下降法,按誤差函數的負梯度方向修改係數。

(4) 訓練及測試

採用第一種採樣方法,產生訓練數據和測試數據,batch_size大小設置爲32 訓練輪數爲3000輪,學習率爲0.001,將訓練數據送入bp網絡中進行訓練。
之後將測試數據送入已訓練好參數的模型中,進行預測,得到上述兩個函數的結果。

03 存在的問題及解決方案

模型擬合邊界數據效果很不好,用第一種數據取樣方法,由於對邊界數據取樣少,所以使得模型對邊界數據欠擬合;
採取措施:採用交叉驗證的辦法,將數據集劃分成K份,然後K-1份爲訓練集,剩下的一份爲測試集,然後進行K次訓練;這樣充分利用了數據集進行訓練。

04 算法評估

(1) 問題複雜程度

(2)採樣方法

(3)學習率

(4)樣本數目

(5)批量數目

(6)激勵函數

(7)隱藏層層數及節點個數

05 代碼展示

# python- Back Propagation
# coding=utf-8
import numpy as np
import matplotlib.pyplot as plt


plt.rcParams['font.sans-serif'] = ['SimHei']  # 用於正常顯示中文標籤
plt.rcParams['axes.unicode_minus'] = False  # 用來正常顯示負號


# 定義數據集分割函數
def train_test_split(x, y, test_ratio=0.3, seed=None):
    if seed:
        np.random.seed(seed)
    shuffled_indexs = np.random.permutation(len(x))
    test_size = int(len(x) * test_ratio)
    train_index = shuffled_indexs[test_size:]
    test_index = shuffled_indexs[:test_size]
    train_index = np.sort(train_index)
    test_index = np.sort(test_index)
    return x[train_index], x[test_index], y[train_index], y[test_index]


# 定義function
def f(a, b, c, d, x):
    return a * np.sin(b * x) + c * np.cos(d * x)

def f1(a, b, c, d, x):
    return a * x * np.sin(b * x) + c * x * np.cos(d * x)

# 隨機採樣
def load_data(step, a, b, c, d):
    x = np.linspace(-np.pi, np.pi, step).T
    x = np.expand_dims(x, -1)
    y = f(a, b, c, d, x) + f(3,3,3,3,x)
    x_train, x_test, y_train, y_test = train_test_split(x, y, seed=2019)
    return x_train, y_train, x_test, y_test

# 均勻採樣
def load_train_data(step, a, b, c, d):
    x_train = np.linspace(-np.pi, np.pi, step).T
    x_train = np.expand_dims(x_train, -1)
    y_train = f(a, b, c, d, x_train)  + f(3,3,3,3,x_train)

    return x_train, y_train


def load_test_data(step, a, b, c, d):
    x_test = np.linspace(-np.pi, np.pi, step).T
    x_test = np.expand_dims(x_test, -1)
    y_test = f(a, b, c, d, x_test)
    y_test = f(a, b, c, d, x_test) + f(3, 3, 3, 3, x_test)
    return x_test, y_test

# 歸一化數據
def normalize(data):
    data_min, data_max = data.min(), data.max()
    data = (data - data_min) / (data_max - data_min)
    return data

# 激活函數tanh
def tanh(z):
    return np.tanh(z)


def tanh_derivative(z):
    return 1.0 - np.tanh(z) * np.tanh(z)


def sigmoid(z):
    return 1 / (1 + np.exp(-z))


def sigmoid_derivative(z):
    return sigmoid(z) * (1 - sigmoid(z))


def relu(z):
    return np.maximum(0, z)


def relu_derivative(z):
    if (z >= 0):
        return 1
    else:
        return 0


# 損失函數
def loss_derivative(output_activations, y):
    return 2 * (output_activations - y)


def mean_squared_error(predictY, realY):
    Y = np.array(realY)
    return np.sum((predictY - Y) ** 2) / realY.shape[0]


# BP神經網絡類
class BP:
    # BP神經網絡初始化
    def __init__(self, sizes, activity, activity_derivative, loss_derivative):
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.biases = [np.zeros((nueron, 1)) for nueron in sizes[1:]]
        self.weights = [np.random.randn(next_layer_nueron, nueron) for nueron, next_layer_nueron in
                        zip(sizes[:-1], sizes[1:])]
        self.activity = activity
        self.activity_derivative = activity_derivative
        self.loss_derivative = loss_derivative

    # 預測函數
    def predict(self, a):
        re = a.T
        n = len(self.biases) - 1
        for i in range(n):
            b, w = self.biases[i], self.weights[i]
            re = self.activity(np.dot(w, re) + b)
        re = np.dot(self.weights[n], re) + self.biases[n]
        return re.T

    # 更新一個batch的值
    def update_batch(self, batch, learning_rate):
        temp_b = [np.zeros(b.shape) for b in self.biases]
        temp_w = [np.zeros(w.shape) for w in self.weights]
        for x, y in batch:
            delta_temp_b, delta_temp_w = self.update_parameter(x, y)
            temp_w = [w + dw for w, dw in zip(temp_w, delta_temp_w)]
            temp_b = [b + db for b, db in zip(temp_b, delta_temp_b)]
        self.weights = [sw - (learning_rate / len(batch)) * w for sw, w in zip(self.weights, temp_w)]
        self.biases = [sb - (learning_rate / len(batch)) * b for sb, b in zip(self.biases, temp_b)]

    def update_parameter(self, x, y):
        temp_b = [np.zeros(b.shape) for b in self.biases]
        temp_w = [np.zeros(w.shape) for w in self.weights]
        activation = x
        activations = [x]
        zs = []
        n = len(self.biases)
        for i in range(n):
            b, w = self.biases[i], self.weights[i]
            z = np.dot(w, activation) + b
            zs.append(z)
            if i != n - 1:
                activation = self.activity(z)
            else:
                activation = z
            activations.append(activation)
        d = self.loss_derivative(activations[-1], y)
        temp_b[-1] = d
        temp_w[-1] = np.dot(d, activations[-2].T)
        for i in range(2, self.num_layers):
            z = zs[-i]
            d = np.dot(self.weights[-i + 1].T, d) * self.activity_derivative(z)
            temp_b[-i] = d
            temp_w[-i] = np.dot(d, activations[-i - 1].T)
        return (temp_b, temp_w)

    def fit(self, train_data, epochs, batch_size, learning_rate, validation_data=None):
        n = len(train_data)
        for j in range(epochs):
            np.random.shuffle(train_data)
            batches = [train_data[k:k + batch_size] for k in range(0, n, batch_size)]
            for batch in batches:
                self.update_batch(batch, learning_rate)
            if (validation_data != None):
                val_pre = self.predict(validation_data[0])
                print("Epoch", j + 1, '/', epochs,
                      '  val loss:%12.12f' % mean_squared_error(val_pre, validation_data[1]))
                losses.append(mean_squared_error(val_pre, validation_data[1]))
                epoches.append(j + 1)
        return epoches, losses


if __name__ == "__main__":
    losses = []
    epoches = []
    # 設置隨機種子
    np.random.seed(2019)
    # function函數係數設置
    a, b, c, d = 2, 2, 2, 2
    num_step = 500
    # 隨機採樣
    x_train, y_train, x_test, y_test = load_data(num_step, a, b, c, d)
    # 均勻採樣
    # rate = 0.3
    # num_train_step = num_step * (1 - rate)
    # num_test_step = num_step * (rate)
    # x_train, y_train = load_train_data(num_train_step, a, b, c, d)
    # x_test, y_test = load_test_data(num_test_step, a, b, c, d)
    print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)
    data = [(np.array([x_value]), np.array([y_value])) for x_value, y_value in zip(x_train, y_train)]
    # BP神經網絡參數設置
    beta = 1e-2
    layer = [1, 5, 5, 1]
    epochs = 1000
    model = BP(layer, tanh, tanh_derivative, loss_derivative)
    # BP神經網絡訓練
    epoches, losses = model.fit(train_data=data, epochs=epochs, batch_size=8, learning_rate=beta,
                                 validation_data=(x_test, y_test))
    # BP神經網絡預測
    predict = model.predict(x_test)
    # 預測誤差計算
    loss_p = abs(predict-y_test)
    sum  = sum(loss_p)
    sum = sum[0]
    print("誤差是:","%12.12f"%(sum/100.0))
    # 繪圖函數
    plt.figure()
    plt.title("BP神經網絡擬合非線性  y2= x2*sin*2*x2 +3*x2*cos4*x2曲線")
    plt.plot(x_test, y_test, "-r", linewidth=2, label='origin')
    plt.plot(x_test, predict, "-b", linewidth=1, label='predict')

    plt.legend()
    plt.grid(True)
    plt.show()

    plt.figure()
    plt.title("BP神經網絡誤差下降曲線")
    plt.plot(epoches, losses, "-r", linewidth=2, label="誤差曲線")
    plt.legend()
    plt.show()
發佈了489 篇原創文章 · 獲贊 515 · 訪問量 37萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章