臺大李宏毅教授機器學習HW2 作業二贏家還是輸家

首先數據文件https://pan.baidu.com/s/1BPCNWVSFUG_zryJuk4CiXA

好像在網上只能搜到這一個文件，是一個4000*58的表格，其中最後一列表示標籤，如果是0的話表示工資小於50k，如果是1表工資大於50k，前57列是一個人的不同屬性，我們需要用這57個屬性來預測一個人的工資是否會大於50k

由於只有一個文件，所以將文件的80%用作訓練，其餘部分用作測試。

import numpy as np
import pandas as pd


def read2train():
    path = 'F:\\python_book\\machine_learning\\spam_train.csv'
    t = pd.read_csv(path)
    t = t.iloc[:, 1:]
    data = np.array(t, float)
    index = int(data.shape[0]*0.8)
    train_x = data[:index, :-1]
    train_y = data[:index, -1]
    # 由於文件中的標籤只有一列，但是這是一個二分類問題，所以我在這裏進行了處理
    # 已知1表示大於50k 0表示小於50k，那麼1-train_y的值若是1就表示小於50k的概率爲1
    # 若是0就表示小於50k的概率爲0，這樣做是爲了與sigmoid的結果求誤差
    a = 1 - train_y
    train_y = np.vstack((train_y, a)).reshape(train_x.shape[0], -1)
    test_x = data[index:, :-1]
    test_y = data[index:, -1].reshape(test_x.shape[0], -1)
    a = 1 - test_y
    test_y = np.vstack((test_y, a)).reshape(test_x.shape[0], -1)
    return train_x, train_y, test_x, test_y

import numpy as np


def softmax(x):
    x = x - np.max(x)  # 溢出對策
    return np.exp(x) / np.sum(np.exp(x))


'''
由於one-hot表示中t爲0的元素的交叉熵也爲0，因此針對這些元素的計算可以忽略。
只需要獲得網絡中在正確解標籤處的輸出即可計算交叉熵誤差，1e-7 是爲了防止出現負無窮大的結果
'''


def cross_entry_error(y, t):
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)

    # 監督數據是one-hot-vector的情況下，轉換爲正確解標籤的索引
    if t.size == y.size:
        t = t.argmax(axis=1)

    batch_size = int(y.shape[0])
    return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size


# 動量法更新參數

class Momentum:
    def __init__(self, lr=0.01, momentum=0.9):
        self.lr = lr
        self.momentum = momentum
        self.v = None

    def update(self, params, grads):
        if self.v is None:
            self.v = {}
            for key, val in params.items():
                self.v[key] = np.zeros_like(val)

        for key in params.keys():
            self.v[key] = self.momentum * self.v[key] - self.lr * grads[key]
            params[key] += self.v[key]


class AdaGrad:
    def __init__(self, lr=0.01):
        self.lr = lr
        self.h = None

    def update(self, params, grads):
        if self.h is None:
            self.h = {}
        for key, val in params.items():
            self.h[key] = np.zeros_like(val)

        for key in params.keys():
            self.h[key] += grads[key]**2
            params[key] -= self.lr * grads[key] / (np.sqrt(self.h[key]) + 1e-7)


# 乘法層的實現，用來進行w*x+b的前向和後向傳播

class Affine:
    def __init__(self, w, b):
        self.w = w
        self.b = b
        self.x = None
        self.dw = None
        self.db = None

    def forward(self, x):
        self.x = x
        return np.dot(x, self.w) + self.b

    def backward(self, dout):
        self.dw = np.dot(self.x.T, dout)
        dx = np.dot(dout, self.w.T)
        self.db = np.sum(dout, axis=0)
        return dx


# Sigmoid用來實現Sigmoid函數的前向和後向傳播

class Sigmoid:
    def __init__(self):
        self.out = None

    def forward(self, x):
        self.out = 1 / (1 + np.exp(-x))
        return self.out

    def backward(self, dout):
        dx = dout * (1 - self.out) * self.out
        return dx


# 對進過進行softmax計算出概率之後，求出交叉熵誤差,這層反向傳播的結果是y-t
# 最後面除以batch_size，則傳給前面層的是單個數據的誤差
# 我試了一下不寫這句，好像收斂的幅度更大，但這種寫法是我在參考書上看的，所以就照做了

class SoftMaxWithLoss:
    def __init__(self):
        self.loss = None  # 交叉熵誤差
        self.y = None  # softmax的輸出
        self.t = None  # 監督數據

    def forward(self, x, t):
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entry_error(self.y, self.t)
        return self.loss

    def backward(self):
        batch_size = self.y.shape[0]
        dx = (self.y - self.t) / batch_size
        return dx

from util import *
from collections import OrderedDict


class Net:
    def __init__(self, input_size, output_size):
        self.params = {}
        self.params['w1'] = np.random.randn(input_size, output_size)
        self.params['b1'] = np.zeros(output_size)

        self.layers = OrderedDict()
        self.layers['Affine1'] = Affine(self.params['w1'], self.params['b1'])
        self.layers['Sigmoid1'] = Sigmoid()
        self.lastLayer = SoftMaxWithLoss()

    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)

        return x

    def loss(self, x, t):
        y = self.predict(x)
        return self.lastLayer.forward(y, t)

    def gradient(self, x, t):
        loss = self.loss(x, t)
        dout = self.lastLayer.backward()
        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        grads = {}
        grads['w1'] = self.layers['Affine1'].dw
        grads['b1'] = self.layers['Affine1'].db
        return grads

    def accuracy(self, x, t):
        y = self.predict(x)
        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy

from read_Data import *
from logistic_regression import *

train_x, train_y, test_x, test_y = read2train()
network = Net(train_x.shape[1], 2)
Ada = AdaGrad(lr=0.001)
Mom = Momentum()
for i in range(100000):
    grads = network.gradient(train_x, train_y)
    # Mom.update(network.params, grads)
    Ada.update(network.params, grads)
    if i % 1000 == 0:
        acc = network.accuracy(test_x, test_y)
        print(i, acc)

最後準確率能達到90以上

下面這些圖片來幫助大家理解相關公式的來由