首先數據文件https://pan.baidu.com/s/1BPCNWVSFUG_zryJuk4CiXA
好像在網上只能搜到這一個文件,是一個4000*58的表格,其中最後一列表示標籤,如果是0的話表示工資小於50k,如果是1表工資大於50k,前57列是一個人的不同屬性,我們需要用這57個屬性來預測一個人的工資是否會大於50k
由於只有一個文件,所以將文件的80%用作訓練,其餘部分用作測試。
import numpy as np
import pandas as pd
def read2train():
path = 'F:\\python_book\\machine_learning\\spam_train.csv'
t = pd.read_csv(path)
t = t.iloc[:, 1:]
data = np.array(t, float)
index = int(data.shape[0]*0.8)
train_x = data[:index, :-1]
train_y = data[:index, -1]
# 由於文件中的標籤只有一列,但是這是一個二分類問題,所以我在這裏進行了處理
# 已知1表示大於50k 0表示小於50k,那麼1-train_y的值若是1就表示小於50k的概率爲1
# 若是0就表示小於50k的概率爲0,這樣做是爲了與sigmoid的結果求誤差
a = 1 - train_y
train_y = np.vstack((train_y, a)).reshape(train_x.shape[0], -1)
test_x = data[index:, :-1]
test_y = data[index:, -1].reshape(test_x.shape[0], -1)
a = 1 - test_y
test_y = np.vstack((test_y, a)).reshape(test_x.shape[0], -1)
return train_x, train_y, test_x, test_y
import numpy as np
def softmax(x):
x = x - np.max(x) # 溢出對策
return np.exp(x) / np.sum(np.exp(x))
'''
由於one-hot表示中t爲0的元素的交叉熵也爲0,因此針對這些元素的計算可以忽略。
只需要獲得網絡中在正確解標籤處的輸出即可計算交叉熵誤差,1e-7 是爲了防止出現負無窮大的結果
'''
def cross_entry_error(y, t):
if y.ndim == 1:
t = t.reshape(1, t.size)
y = y.reshape(1, y.size)
# 監督數據是one-hot-vector的情況下,轉換爲正確解標籤的索引
if t.size == y.size:
t = t.argmax(axis=1)
batch_size = int(y.shape[0])
return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size
# 動量法更新參數
class Momentum:
def __init__(self, lr=0.01, momentum=0.9):
self.lr = lr
self.momentum = momentum
self.v = None
def update(self, params, grads):
if self.v is None:
self.v = {}
for key, val in params.items():
self.v[key] = np.zeros_like(val)
for key in params.keys():
self.v[key] = self.momentum * self.v[key] - self.lr * grads[key]
params[key] += self.v[key]
class AdaGrad:
def __init__(self, lr=0.01):
self.lr = lr
self.h = None
def update(self, params, grads):
if self.h is None:
self.h = {}
for key, val in params.items():
self.h[key] = np.zeros_like(val)
for key in params.keys():
self.h[key] += grads[key]**2
params[key] -= self.lr * grads[key] / (np.sqrt(self.h[key]) + 1e-7)
# 乘法層的實現,用來進行w*x+b的前向和後向傳播
class Affine:
def __init__(self, w, b):
self.w = w
self.b = b
self.x = None
self.dw = None
self.db = None
def forward(self, x):
self.x = x
return np.dot(x, self.w) + self.b
def backward(self, dout):
self.dw = np.dot(self.x.T, dout)
dx = np.dot(dout, self.w.T)
self.db = np.sum(dout, axis=0)
return dx
# Sigmoid用來實現Sigmoid函數的前向和後向傳播
class Sigmoid:
def __init__(self):
self.out = None
def forward(self, x):
self.out = 1 / (1 + np.exp(-x))
return self.out
def backward(self, dout):
dx = dout * (1 - self.out) * self.out
return dx
# 對進過進行softmax計算出概率之後,求出交叉熵誤差,這層反向傳播的結果是y-t
# 最後面除以batch_size,則傳給前面層的是單個數據的誤差
# 我試了一下不寫這句,好像收斂的幅度更大,但這種寫法是我在參考書上看的,所以就照做了
class SoftMaxWithLoss:
def __init__(self):
self.loss = None # 交叉熵誤差
self.y = None # softmax的輸出
self.t = None # 監督數據
def forward(self, x, t):
self.t = t
self.y = softmax(x)
self.loss = cross_entry_error(self.y, self.t)
return self.loss
def backward(self):
batch_size = self.y.shape[0]
dx = (self.y - self.t) / batch_size
return dx
from util import *
from collections import OrderedDict
class Net:
def __init__(self, input_size, output_size):
self.params = {}
self.params['w1'] = np.random.randn(input_size, output_size)
self.params['b1'] = np.zeros(output_size)
self.layers = OrderedDict()
self.layers['Affine1'] = Affine(self.params['w1'], self.params['b1'])
self.layers['Sigmoid1'] = Sigmoid()
self.lastLayer = SoftMaxWithLoss()
def predict(self, x):
for layer in self.layers.values():
x = layer.forward(x)
return x
def loss(self, x, t):
y = self.predict(x)
return self.lastLayer.forward(y, t)
def gradient(self, x, t):
loss = self.loss(x, t)
dout = self.lastLayer.backward()
layers = list(self.layers.values())
layers.reverse()
for layer in layers:
dout = layer.backward(dout)
grads = {}
grads['w1'] = self.layers['Affine1'].dw
grads['b1'] = self.layers['Affine1'].db
return grads
def accuracy(self, x, t):
y = self.predict(x)
accuracy = np.sum(y == t) / float(x.shape[0])
return accuracy
from read_Data import *
from logistic_regression import *
train_x, train_y, test_x, test_y = read2train()
network = Net(train_x.shape[1], 2)
Ada = AdaGrad(lr=0.001)
Mom = Momentum()
for i in range(100000):
grads = network.gradient(train_x, train_y)
# Mom.update(network.params, grads)
Ada.update(network.params, grads)
if i % 1000 == 0:
acc = network.accuracy(test_x, test_y)
print(i, acc)
最後準確率能達到90以上
下面這些圖片來幫助大家理解相關公式的來由
補充:由於本人是新手,且網上關於李宏毅教授的作業二的資料較少,我就寫了一個發出來,不管是代碼的深度還是規範程度上來講都得往後稍一稍,但我還是希望能幫助到在入門的各位同行們!衝就完事兒了
歡迎大家在下面評論討論共同進步!