這幾天又在玩樹莓派,先是搞了個物聯網,又在嘗試在樹莓派上搞一些簡單的神經網絡,這次搞得是LSTM識別mnist手寫數字識別
訓練代碼在電腦上,cpu就能訓練,很快的:
import torch import torch.nn as nn import torchvision import numpy as np import os from PIL import Image # 定義LSTM模型 class LSTMModel(nn.Module): def __init__(self, input_size, hidden_size, num_layers, num_classes): super(LSTMModel, self).__init__() self.hidden_size = hidden_size self.num_layers = num_layers self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True) self.fc = nn.Linear(hidden_size, num_classes) def forward(self, x): h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device) c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device) out, (h_n,c_n) = self.lstm(x, (h0, c0)) out = self.fc(out[:, -1, :]) return out # 設置超參數 input_size = 28 sequence_length = 28 hidden_size = 128 num_layers = 2 num_classes = 10 batch_size = 100 num_epochs = 1 learning_rate = 0.001 # 加載MNIST數據集 train_dataset = torchvision.datasets.MNIST(root='./data', train=True, transform=torchvision.transforms.ToTensor(), download=True) test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=torchvision.transforms.ToTensor()) # 創建數據加載器 train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) # 創建LSTM模型 model = LSTMModel(input_size, hidden_size, num_layers, num_classes) # 定義損失函數和優化器 criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # 訓練模型 total_step = len(train_loader) for epoch in range(num_epochs): for i, (images, labels) in enumerate(train_loader): images = images.reshape(-1, sequence_length, input_size) outputs = model(images) loss = criterion(outputs, labels) predictions = torch.argmax(outputs,dim=1) # acc = torch.eq(predictions,labels).sum().item() # print(acc) optimizer.zero_grad() loss.backward() optimizer.step() if (i + 1) % 100 == 0: print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch + 1, num_epochs, i + 1, total_step, loss.item())) # 保存模型 torch.save(model.state_dict(), 'model.pth') # 加載模型 model.load_state_dict(torch.load('model.pth')) with torch.no_grad(): for i, (images, labels) in enumerate(test_loader): images = images.reshape(-1, sequence_length, input_size) outputs = model(images) predictions = torch.argmax(outputs,dim=1) acc = torch.eq(predictions,labels).sum().item() print(acc) # folder_path = './mnist_pi' # 替換爲圖片所在的文件夾路徑 # def infer_images_in_folder(folder_path): # with torch.no_grad(): # for file_name in os.listdir(folder_path): # file_path = os.path.join(folder_path, file_name) # if os.path.isfile(file_path) and file_name.endswith(('.jpg', '.jpeg', '.png')): # image = Image.open(file_path) # label = file_name.split(".")[0].split("_")[1] # image = np.array(image)/255.0 # image = np.expand_dims(image,axis=0) # image= torch.tensor(image).to(torch.float32) # logits = model(image) # predicted_class = torch.argmax(logits) # print("file_path:",file_path,"img size:",image.shape,"label:",label,'Predicted class:', predicted_class) # break # infer_images_in_folder(folder_path) # 保存模型參數爲numpy的數組格式 model_params = {} # print(list(model.parameters())) for name, param in model.named_parameters(): model_params[name] = param.detach().numpy() print(name,param.shape) np.savez('model.npz', **model_params)
然後需要自己在dataset裏導出一些圖片:我保存在了mnist_pi文件夾下,“_”後面的是標籤,主要是在pc端導出保存到樹莓派下
樹莓派推理端的代碼,需要numpy手動重新搭建網絡,並且需要手動實現雙層的LSTM神經網絡,然後加載那些保存的矩陣參數,做矩陣乘法和加法
import numpy as np import os from PIL import Image # 加載模型參數 model_data = np.load('model.npz') ''' weight_ih_l[k] : the learnable input-hidden weights of the :math:`\text{k}^{th}` layer `(W_ii|W_if|W_ig|W_io)`, of shape `(4*hidden_size, input_size)` for `k = 0`. Otherwise, the shape is `(4*hidden_size, num_directions * hidden_size)`. If ``proj_size > 0`` was specified, the shape will be `(4*hidden_size, num_directions * proj_size)` for `k > 0` weight_hh_l[k] : the learnable hidden-hidden weights of the :math:`\text{k}^{th}` layer `(W_hi|W_hf|W_hg|W_ho)`, of shape `(4*hidden_size, hidden_size)`. If ``proj_size > 0`` was specified, the shape will be `(4*hidden_size, proj_size)`. bias_ih_l[k] : the learnable input-hidden bias of the :math:`\text{k}^{th}` layer `(b_ii|b_if|b_ig|b_io)`, of shape `(4*hidden_size)` bias_hh_l[k] : the learnable hidden-hidden bias of the :math:`\text{k}^{th}` layer `(b_hi|b_hf|b_hg|b_ho)`, of shape `(4*hidden_size)` ''' # 提取模型參數 lstm_weight_ih_l0 = model_data['lstm.weight_ih_l0'] lstm_weight_hh_l0 = model_data['lstm.weight_hh_l0'] lstm_bias_ih_l0 = model_data['lstm.bias_ih_l0'] lstm_bias_hh_l0 = model_data['lstm.bias_hh_l0'] lstm_weight_ih_l1 = model_data['lstm.weight_ih_l1'] lstm_weight_hh_l1 = model_data['lstm.weight_hh_l1'] lstm_bias_ih_l1 = model_data['lstm.bias_ih_l1'] lstm_bias_hh_l1 = model_data['lstm.bias_hh_l1'] fc_weight = model_data['fc.weight'] fc_bias = model_data['fc.bias'] # print(lstm_weight_ih_l0.shape,lstm_weight_hh_l0.shape) # print(lstm_bias_ih_l0.shape,lstm_bias_hh_l0.shape) # 定義LSTM模型 def lstm_model(inputs): ''' 踩到兩個坑,一個是矩陣形狀都是這種(4*hidden_size, hidden_size)合併的,需要拆分。 另一個坑是,兩層的lstm層需要每個時間步的輸出都輸入到下一層,而不是最後一個時間步的數據給下一層 ''' batch_size, sequence_length, input_size = inputs.shape hidden_size = lstm_weight_hh_l0.shape[1] num_classes = fc_weight.shape[0] h0 = np.zeros((batch_size, hidden_size)) c0 = np.zeros((batch_size, hidden_size)) # 第一層LSTM h_l0, c_l0 = np.zeros_like(h0), np.zeros_like(c0) out_0 = [] for t in range(sequence_length): x = inputs[:, t, :] ''' i_t = \sigma(W_{ii} x_t + b_{ii} + W_{hi} h_{t-1} + b_{hi}) \\ f_t = \sigma(W_{if} x_t + b_{if} + W_{hf} h_{t-1} + b_{hf}) \\ g_t = \tanh(W_{ig} x_t + b_{ig} + W_{hg} h_{t-1} + b_{hg}) \\ o_t = \sigma(W_{io} x_t + b_{io} + W_{ho} h_{t-1} + b_{ho}) \\ c_t = f_t \odot c_{t-1} + i_t \odot g_t \\ h_t = o_t \odot \tanh(c_t) \\ ''' # 輸入門 i_t = sigmoid(np.dot(x, lstm_weight_ih_l0[:128].T) + np.dot(h_l0, lstm_weight_hh_l0[:128].T) + lstm_bias_ih_l0[:128] + lstm_bias_hh_l0[:128]) # 遺忘門 f_t = sigmoid(np.dot(x, lstm_weight_ih_l0[128:256].T) + np.dot(h_l0, lstm_weight_hh_l0[128:256].T) + lstm_bias_ih_l0[128:256] + lstm_bias_hh_l0[128:256]) # 候選向量 g_t = np.tanh(np.dot(x, lstm_weight_ih_l0[256:256+128].T) + np.dot(h_l0, lstm_weight_hh_l0[256:256+128].T) + lstm_bias_ih_l0[256:256+128] + lstm_bias_hh_l0[256:256+128]) # 輸出門 o_t = sigmoid(np.dot(x, lstm_weight_ih_l0[256+128:512].T) + np.dot(h_l0, lstm_weight_hh_l0[256+128:512].T) + lstm_bias_ih_l0[256+128:512] + lstm_bias_hh_l0[256+128:512]) # 細胞狀態 c_l0 = f_t * c_l0 + i_t * g_t # 隱藏狀態 h_l0 = o_t * np.tanh(c_l0) out_0.append(h_l0) # 第二層LSTM h_l1, c_l1 = np.zeros_like(h0), np.zeros_like(c0) out_1 = [] for t in range(sequence_length): x = out_0[t] # 輸入門 i_t = sigmoid(np.dot(x, lstm_weight_ih_l1[:128].T) + np.dot(h_l1, lstm_weight_hh_l1[:128].T) + lstm_bias_ih_l1[:128] + lstm_bias_hh_l1[:128]) # 遺忘門 f_t = sigmoid(np.dot(x, lstm_weight_ih_l1[128:256].T) + np.dot(h_l1, lstm_weight_hh_l1[128:256].T) + lstm_bias_ih_l1[128:256] + lstm_bias_hh_l1[128:256]) # 候選向量 g_t = np.tanh(np.dot(x, lstm_weight_ih_l1[256:256+128].T) + np.dot(h_l1, lstm_weight_hh_l1[256:256+128].T) + lstm_bias_ih_l1[256:256+128] + lstm_bias_hh_l1[256:256+128]) # 輸出門 o_t = sigmoid(np.dot(x, lstm_weight_ih_l1[256+128:512].T) + np.dot(h_l1, lstm_weight_hh_l1[256+128:512].T) + lstm_bias_ih_l1[256+128:512] + lstm_bias_hh_l1[256+128:512]) # 細胞狀態 c_l1 = f_t * c_l1 + i_t * g_t # 隱藏狀態 h_l1 = o_t * np.tanh(c_l1) out_1.append(h_l1) # 全連接層 fc_output = np.dot(h_l1, fc_weight.T) + fc_bias predictions = np.argmax(fc_output, axis=1) return predictions # Sigmoid激活函數 def sigmoid(x): return 1 / (1 + np.exp(-x)) folder_path = './mnist_pi' # 替換爲圖片所在的文件夾路徑 def infer_images_in_folder(folder_path): for file_name in os.listdir(folder_path): file_path = os.path.join(folder_path, file_name) if os.path.isfile(file_path) and file_name.endswith(('.jpg', '.jpeg', '.png')): image = Image.open(file_path) label = file_name.split(".")[0].split("_")[1] image = np.array(image)/255.0 image = np.expand_dims(image,axis=0) predicted_class = lstm_model(image) print("file_path:",file_path,"img size:",image.shape,"label:",label,'Predicted class:', predicted_class) infer_images_in_folder(folder_path)
這代碼完全就是numpy推理,不需要安裝pytorch,樹莓派也裝不動pytorch,太重了,下面是推理結果,比之前的MLP網絡慢很多,主要是手動實現的LSTM網絡全靠循環實現。