apple的m1芯片比以往cpu芯片在機器學習加速上聽說有15倍的提升,也就是可以使用apple mac訓練深度學習pytorch模型!!!驚呆了
安裝apple m1芯片版本的pytorch
然後使用chatGPT生成一個resnet101的訓練代碼,這裏注意,如果網絡特別輕的話是沒有加速效果的,還沒有cpu的計算來的快
這裏要選擇好設備不是"cuda"了,cuda是nvidia深度學習加速的配置
# 設置設備 # device = torch.device("cuda" if torch.cuda.is_available() else "cpu") device = torch.device("mps") #torch.device("cpu")
import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import DataLoader from torchvision.datasets import MNIST from torchvision.transforms import ToTensor from torchvision.models import resnet101 from tqdm import tqdm # 設置設備 # device = torch.device("cuda" if torch.cuda.is_available() else "cpu") device = torch.device("mps") #torch.device("cpu") # 加載 MNIST 數據集 train_dataset = MNIST(root="/Users/xinyuuliu/Desktop/test_python/", train=True, transform=ToTensor(), download=True) test_dataset = MNIST(root="/Users/xinyuuliu/Desktop/test_python/", train=False, transform=ToTensor()) # 創建數據加載器 train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True) test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False) # 定義 ResNet-101 模型 model = resnet101(pretrained=False) model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False) model.fc = nn.Linear(2048, 10) # 替換最後一層全連接層 model.to(device) # 定義損失函數和優化器 criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) # 訓練和評估函數 def train(model, dataloader, optimizer, criterion): model.train() running_loss = 0.0 for inputs, labels in tqdm(dataloader, desc="Training"): inputs = inputs.to(device) labels = labels.to(device) optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() * inputs.size(0) epoch_loss = running_loss / len(dataloader.dataset) return epoch_loss def evaluate(model, dataloader): model.eval() correct = 0 total = 0 with torch.no_grad(): for inputs, labels in tqdm(dataloader, desc="Evaluating"): inputs = inputs.to(device) labels = labels.to(device) outputs = model(inputs) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() accuracy = correct / total * 100 return accuracy # 訓練和評估 num_epochs = 10 for epoch in range(num_epochs): print(f"Epoch {epoch+1}/{num_epochs}") train_loss = train(model, train_loader, optimizer, criterion) print(f"Training Loss: {train_loss:.4f}") test_acc = evaluate(model, test_loader) print(f"Test Accuracy: {test_acc:.2f}%")
結果:
在mps device上,訓練時間在10分鐘左右
在cpu device上,訓練時間在50分鐘左右,明顯在mps device上速度快了5倍