在樹莓派上實現numpy的LSTM長短期記憶神經網絡做圖像分類，加載pytorch的模型參數，推理mnist手寫數字識別

這幾天又在玩樹莓派，先是搞了個物聯網，又在嘗試在樹莓派上搞一些簡單的神經網絡，這次搞得是LSTM識別mnist手寫數字識別

訓練代碼在電腦上，cpu就能訓練，很快的：

import torch
import torch.nn as nn
import torchvision
import numpy as np
import os
from PIL import Image

# 定義LSTM模型
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        out, (h_n,c_n) = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

# 設置超參數
input_size = 28
sequence_length = 28
hidden_size = 128
num_layers = 2
num_classes = 10
batch_size = 100
num_epochs = 1
learning_rate = 0.001

# 加載MNIST數據集
train_dataset = torchvision.datasets.MNIST(root='./data', train=True, transform=torchvision.transforms.ToTensor(), download=True)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=torchvision.transforms.ToTensor())

# 創建數據加載器
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# 創建LSTM模型
model = LSTMModel(input_size, hidden_size, num_layers, num_classes)

# 定義損失函數和優化器
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# 訓練模型
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.reshape(-1, sequence_length, input_size)
        outputs = model(images)
        loss = criterion(outputs, labels)

        predictions = torch.argmax(outputs,dim=1)
        # acc = torch.eq(predictions,labels).sum().item()
        # print(acc)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i + 1) % 100 == 0:
            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch + 1, num_epochs, i + 1, total_step, loss.item()))

# 保存模型
torch.save(model.state_dict(), 'model.pth')

# 加載模型
model.load_state_dict(torch.load('model.pth'))
with torch.no_grad():
    for i, (images, labels) in enumerate(test_loader):
        images = images.reshape(-1, sequence_length, input_size)
        outputs = model(images)
        predictions = torch.argmax(outputs,dim=1)
        acc = torch.eq(predictions,labels).sum().item()
        print(acc)

# folder_path = './mnist_pi'  # 替換爲圖片所在的文件夾路徑
# def infer_images_in_folder(folder_path):
#     with torch.no_grad():
#         for file_name in os.listdir(folder_path):
#             file_path = os.path.join(folder_path, file_name)
#             if os.path.isfile(file_path) and file_name.endswith(('.jpg', '.jpeg', '.png')):
#                 image = Image.open(file_path)
#                 label = file_name.split(".")[0].split("_")[1]
#                 image = np.array(image)/255.0
#                 image = np.expand_dims(image,axis=0)
#                 image= torch.tensor(image).to(torch.float32)
#                 logits = model(image)
#                 predicted_class = torch.argmax(logits)
#                 print("file_path:",file_path,"img size:",image.shape,"label:",label,'Predicted class:', predicted_class)
#             break

# infer_images_in_folder(folder_path)



# 保存模型參數爲numpy的數組格式
model_params = {}
# print(list(model.parameters()))
for name, param in model.named_parameters():
    model_params[name] = param.detach().numpy()
    print(name,param.shape)

np.savez('model.npz', **model_params)

然後需要自己在dataset裏導出一些圖片：我保存在了mnist_pi文件夾下,“_”後面的是標籤，主要是在pc端導出保存到樹莓派下

樹莓派推理端的代碼，需要numpy手動重新搭建網絡，並且需要手動實現雙層的LSTM神經網絡，然後加載那些保存的矩陣參數，做矩陣乘法和加法

import numpy as np
import os
from PIL import Image

# 加載模型參數
model_data = np.load('model.npz')

'''
weight_ih_l[k] : the learnable input-hidden weights of the :math:`\text{k}^{th}` layer
    `(W_ii|W_if|W_ig|W_io)`, of shape `(4*hidden_size, input_size)` for `k = 0`.
    Otherwise, the shape is `(4*hidden_size, num_directions * hidden_size)`. If
    ``proj_size > 0`` was specified, the shape will be
    `(4*hidden_size, num_directions * proj_size)` for `k > 0`
weight_hh_l[k] : the learnable hidden-hidden weights of the :math:`\text{k}^{th}` layer
    `(W_hi|W_hf|W_hg|W_ho)`, of shape `(4*hidden_size, hidden_size)`. If ``proj_size > 0``
    was specified, the shape will be `(4*hidden_size, proj_size)`.
bias_ih_l[k] : the learnable input-hidden bias of the :math:`\text{k}^{th}` layer
    `(b_ii|b_if|b_ig|b_io)`, of shape `(4*hidden_size)`
bias_hh_l[k] : the learnable hidden-hidden bias of the :math:`\text{k}^{th}` layer
    `(b_hi|b_hf|b_hg|b_ho)`, of shape `(4*hidden_size)`
'''


# 提取模型參數
lstm_weight_ih_l0 = model_data['lstm.weight_ih_l0']
lstm_weight_hh_l0 = model_data['lstm.weight_hh_l0']
lstm_bias_ih_l0 = model_data['lstm.bias_ih_l0']
lstm_bias_hh_l0 = model_data['lstm.bias_hh_l0']
lstm_weight_ih_l1 = model_data['lstm.weight_ih_l1']
lstm_weight_hh_l1 = model_data['lstm.weight_hh_l1']
lstm_bias_ih_l1 = model_data['lstm.bias_ih_l1']
lstm_bias_hh_l1 = model_data['lstm.bias_hh_l1']
fc_weight = model_data['fc.weight']
fc_bias = model_data['fc.bias']

# print(lstm_weight_ih_l0.shape,lstm_weight_hh_l0.shape)
# print(lstm_bias_ih_l0.shape,lstm_bias_hh_l0.shape)
# 定義LSTM模型
def lstm_model(inputs):
    '''
        踩到兩個坑，一個是矩陣形狀都是這種(4*hidden_size, hidden_size)合併的，需要拆分。
        另一個坑是，兩層的lstm層需要每個時間步的輸出都輸入到下一層，而不是最後一個時間步的數據給下一層
    '''

    batch_size, sequence_length, input_size = inputs.shape
    hidden_size = lstm_weight_hh_l0.shape[1]
    num_classes = fc_weight.shape[0]


    h0 = np.zeros((batch_size, hidden_size))
    c0 = np.zeros((batch_size, hidden_size))

    # 第一層LSTM
    h_l0, c_l0 = np.zeros_like(h0), np.zeros_like(c0)
    out_0 = []
    for t in range(sequence_length):
        x = inputs[:, t, :]
        '''
        i_t = \sigma(W_{ii} x_t + b_{ii} + W_{hi} h_{t-1} + b_{hi}) \\
        f_t = \sigma(W_{if} x_t + b_{if} + W_{hf} h_{t-1} + b_{hf}) \\
        g_t = \tanh(W_{ig} x_t + b_{ig} + W_{hg} h_{t-1} + b_{hg}) \\
        o_t = \sigma(W_{io} x_t + b_{io} + W_{ho} h_{t-1} + b_{ho}) \\
        c_t = f_t \odot c_{t-1} + i_t \odot g_t \\
        h_t = o_t \odot \tanh(c_t) \\
        '''
        # 輸入門
        i_t = sigmoid(np.dot(x, lstm_weight_ih_l0[:128].T) + np.dot(h_l0, lstm_weight_hh_l0[:128].T) + lstm_bias_ih_l0[:128] + lstm_bias_hh_l0[:128])
        # 遺忘門
        f_t = sigmoid(np.dot(x, lstm_weight_ih_l0[128:256].T) + np.dot(h_l0, lstm_weight_hh_l0[128:256].T) + lstm_bias_ih_l0[128:256] + lstm_bias_hh_l0[128:256])
        # 候選向量
        g_t = np.tanh(np.dot(x, lstm_weight_ih_l0[256:256+128].T) + np.dot(h_l0, lstm_weight_hh_l0[256:256+128].T) + lstm_bias_ih_l0[256:256+128] + lstm_bias_hh_l0[256:256+128])
        # 輸出門
        o_t = sigmoid(np.dot(x, lstm_weight_ih_l0[256+128:512].T) + np.dot(h_l0, lstm_weight_hh_l0[256+128:512].T) + lstm_bias_ih_l0[256+128:512] + lstm_bias_hh_l0[256+128:512])
        # 細胞狀態
        c_l0 = f_t * c_l0 + i_t * g_t
        # 隱藏狀態
        h_l0 = o_t * np.tanh(c_l0)
        out_0.append(h_l0)

    # 第二層LSTM
    h_l1, c_l1 = np.zeros_like(h0), np.zeros_like(c0)
    out_1 = []
    for t in range(sequence_length):
        x = out_0[t]
        # 輸入門
        i_t = sigmoid(np.dot(x, lstm_weight_ih_l1[:128].T) + np.dot(h_l1, lstm_weight_hh_l1[:128].T) + lstm_bias_ih_l1[:128] + lstm_bias_hh_l1[:128])
        # 遺忘門
        f_t = sigmoid(np.dot(x, lstm_weight_ih_l1[128:256].T) + np.dot(h_l1, lstm_weight_hh_l1[128:256].T) + lstm_bias_ih_l1[128:256] + lstm_bias_hh_l1[128:256])
        # 候選向量
        g_t = np.tanh(np.dot(x, lstm_weight_ih_l1[256:256+128].T) + np.dot(h_l1, lstm_weight_hh_l1[256:256+128].T) + lstm_bias_ih_l1[256:256+128] + lstm_bias_hh_l1[256:256+128])
        # 輸出門
        o_t = sigmoid(np.dot(x, lstm_weight_ih_l1[256+128:512].T) + np.dot(h_l1, lstm_weight_hh_l1[256+128:512].T) + lstm_bias_ih_l1[256+128:512] + lstm_bias_hh_l1[256+128:512])
        # 細胞狀態
        c_l1 = f_t * c_l1 + i_t * g_t
        # 隱藏狀態
        h_l1 = o_t * np.tanh(c_l1)
        out_1.append(h_l1)

    # 全連接層
    fc_output = np.dot(h_l1, fc_weight.T) + fc_bias
    predictions = np.argmax(fc_output, axis=1)
    return predictions

# Sigmoid激活函數
def sigmoid(x):
    return 1 / (1 + np.exp(-x))


folder_path = './mnist_pi'  # 替換爲圖片所在的文件夾路徑
def infer_images_in_folder(folder_path):
    for file_name in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file_name)
        if os.path.isfile(file_path) and file_name.endswith(('.jpg', '.jpeg', '.png')):
            image = Image.open(file_path)
            label = file_name.split(".")[0].split("_")[1]
            image = np.array(image)/255.0
            image = np.expand_dims(image,axis=0)
            predicted_class = lstm_model(image)
            print("file_path:",file_path,"img size:",image.shape,"label:",label,'Predicted class:', predicted_class)
            

infer_images_in_folder(folder_path)

這代碼完全就是numpy推理，不需要安裝pytorch，樹莓派也裝不動pytorch，太重了，下面是推理結果，比之前的MLP網絡慢很多，主要是手動實現的LSTM網絡全靠循環實現。

在樹莓派上實現numpy的LSTM長短期記憶神經網絡做圖像分類，加載pytorch的模型參數，推理mnist手寫數字識別

AI模型 Llama 3體驗筆記

【面試準備】又一次失敗的面試經歷，題目離譜～資深軟件測試工程師

dotnet 8 版本與銀河麒麟V10和UOS系統的 glibc 兼容性

deepspeed 訓練多機多卡報錯 ncclSystemError Last error

如何實現圖像搜索，文搜圖，圖搜圖，CLIP+faiss向量數據庫實現圖像高效搜索

使用單卡qlora混合精度訓練大模型chatGLM2-6b，解決qlora loss變成nan的問題！

我用numpy實現了VIT，手寫vision transformer, 可在樹莓派上運行，在hugging face上訓練模型保存參數成numpy格式，純numpy實現

我用numpy實現了GPT-2，GPT-2源碼，GPT-2模型加速推理，並且可以在樹莓派上運行，讀了不少hungging face源碼，手動實現了numpy的GPT2模型

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結