一、問題描述
時裝類別識別問題是預測一張圖片中的時裝類別。
數據集:fashionMnist
訓練集:60000張時裝圖片,每張圖片是28*28的灰度矩陣,有一個{0,1,...,9}的類標籤,表示時裝的類別。測試數據:10000張測試數據。
二、實驗目的
導入fashionMnist數據。
設計神經網絡算法,完成時裝類別的預測問題。
注意:fashionMnist數據集的導入,會遇到一些問題,自主嘗試解決。
三、實驗內容
1. 數據導入:採用自動生成的數據
2.數據處理
四、實驗結果及分析
五、完整代碼
model.py
import numpy as np
import struct
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler
import machine_learning.homework.week13.neural as nn
def read_image(file_name):
file_handle=open(file_name,"rb") #以二進制打開文檔
file_content=file_handle.read() #讀取到緩衝區中
head = struct.unpack_from('>IIII', file_content, 0) # 取前4個整數,返回一個元組
offset = struct.calcsize('>IIII')
imgNum = head[1] #圖片數
width = head[2] #寬度
height = head[3] #高度
bits = imgNum * width * height # data一共有60000*28*28個像素值
bitsString = '>' + str(bits) + 'B' # fmt格式:'>47040000B'
imgs = struct.unpack_from(bitsString, file_content, offset) # 取data數據,返回一個元組
imgs_array=np.array(imgs).reshape((imgNum,width*height)) #最後將讀取的數據reshape成 【圖片數,圖片像素】二維數組
return imgs_array
def out_image(img):
plt.figure()
plt.imshow(img)
plt.show()
def read_label(file_name):
file_handle = open(file_name, "rb") # 以二進制打開文檔
file_content = file_handle.read() # 讀取到緩衝區中
head = struct.unpack_from('>II', file_content, 0) # 取前2個整數,返回一個元組
offset = struct.calcsize('>II')
labelNum = head[1] # label數
bitsString = '>' + str(labelNum) + 'B' # fmt格式:'>47040000B'
label = struct.unpack_from(bitsString, file_content, offset) # 取data數據,返回一個元組
return np.array(label)
def process_features(X):#特徵預處理
scaler=MinMaxScaler(feature_range=(0,1))
X=scaler.fit_transform(1.0*X)
return X
def get_data(m_train,m_test):
# 文件獲取D:/temp
train_image = "D:/PyCharm/Project/machine_learning/machine_learning/homework/week13/train-images-idx3-ubyte"
test_image = "D:/PyCharm/Project/machine_learning/machine_learning/homework/week13/t10k-images-idx3-ubyte"
train_label = "D:/PyCharm/Project/machine_learning/machine_learning/homework/week13/train-labels-idx1-ubyte"
test_label = "D:/PyCharm/Project/machine_learning/machine_learning/homework/week13/t10k-labels-idx1-ubyte"
# 讀取數據
train_x = read_image(train_image)
test_x = read_image(test_image)
train_y = read_label(train_label)
test_y = read_label(test_label)
train_x = train_x.reshape(-1,784)
test_x = test_x.reshape(-1,784)
train_x = (train_x / 255)[:m_train] # 特徵歸一化,選擇部分樣本
test_x = (test_x / 255)[:m_test]
train_x = (process_features(train_x)) # 特徵預處理
test_x = (process_features(test_x))
train_y = (train_y)[:m_train]
test_y = (test_y)[:m_test]
return train_x,test_x,train_y,test_y
def create_layers(): #創建神經網絡結構
n_features=28*28 #特徵維度
n_hidden1=300 #隱含層1神經元數
n_hidden2=100 #隱含層2神經元數
n_classes=10 #標籤數,輸出層神經元數
layers=[]
relu=nn.ReLUActivator() #激活函數
layers.append(nn.Layer(n_features,n_hidden1,activator=relu)) #第1層
layers.append(nn.Layer(n_hidden1,n_hidden2,activator=relu)) #第2層
layers.append(nn.Layer(n_hidden2,n_classes)) #第3層
return layers
def convert_to_vector(y):#類標籤向量化
m=len(y)
k=np.max(y)+1
v=np.zeros(m*k).reshape(m,k)
for i in range(m):
v[i][y[i]]=1
return v
def run():
m_train,m_test = 2000,200
X_train,X_test,y_train,y_test = get_data(m_train,m_test)
print(X_train.shape,X_test.shape,y_train.shape,y_test.shape) # 數據採樣格式測試
layers = create_layers()
loss = nn.SoftmaxCrossEntropy()
model = nn.NeuralNetwork(layers,loss)
iterations = 5000
model.fit(X_train,convert_to_vector(y_train),iterations,0.01)
v = model.predict(X_test)
proba = nn.softmax(v)
y_pred = np.argmax(proba,axis=1)
print(accuracy_score(y_test,y_pred))
if __name__ == "__main__":
run()
neural.py
import numpy as np
class IdentityActivator:
def value(self, s):
return s
def derivative(self, s):
return 1
class ReLUActivator:
def value(self, s):
return np.maximum(0, s)
def derivative(self, s):
return (s > 0).astype(np.int)
class Layer:
def __init__(self, n_input, n_output, activator=IdentityActivator()):
self.activator = activator
r = np.sqrt(6.0 / (n_input + n_output))
self.W = np.random.uniform(-r, r, (n_output, n_input))
self.b = np.zeros((n_output, 1))
self.outputs = np.zeros((n_output, 1))
def forward(self, inputs):
self.inputs = inputs
self.sums = self.W.dot(inputs) + self.b
self.outputs = self.activator.value(self.sums)
def back_propagation(self, delta_in, learning_rate):
d = self.activator.derivative(self.sums) * delta_in
self.delta_out = self.W.T.dot(d)
self.W_grad = d.dot(self.inputs.T)
self.b_grad = d
self.W -= learning_rate * self.W_grad
self.b -= learning_rate * self.b_grad
class MSE:
def value(self, y, v):
return (v - y) ** 2
def derivative(self, y, v):
return 2 * (v - y)
def softmax(v):
e = np.exp(v)
s = e.sum(axis=0)
for i in range(len(s)):
e[i] /= s[i]
return e
class SoftmaxCrossEntropy:
def value(self, y, v):
p = softmax(v)
return - (y * np.log(p)).sum()
def derivative(self, y, v):
p = softmax(v)
return p - y
class NeuralNetwork:
def __init__(self, layers, loss):
self.layers = layers
self.loss = loss
def forward(self, x):
layers = self.layers
inputs = x
for layer in layers:
layer.forward(inputs)
inputs = layer.outputs
return inputs
def back_propagation(self, y, outputs, learning_rate):
delta_in = self.loss.derivative(y, outputs)
for layer in self.layers[::-1]:
layer.back_propagation(delta_in, learning_rate)
delta_in = layer.delta_out
def fit(self, X, y, N, learning_rate):
for t in range(N):
i = np.random.randint(0, len(X))
outputs = self.forward(X[i].reshape(-1, 1))
self.back_propagation(y[i].reshape(-1, 1), outputs, learning_rate)
def predict(self, X):
y = []
for i in range(len(X)):
p = self.forward(X[i].reshape(-1, 1)).reshape(-1)
y.append(p)
return np.array(y)