1 Tensor 基本操作
-
Tensor與Numpy很相似,常用的操作如下
-
隨機生產一個指定行列的矩陣,每個值都會介於0-1之間
import torch
x = torch.randn(5,3)
print(x)
- 生成一個指定行列的全零0/1矩陣
x = torch.ones(4,4)
y = torch.zeros(4,4)
print(x)
print(y)
- 生成與另一個矩陣有相同行數和列數的矩陣
z = torch.rand_like(x)
print(z)
- 生成與另一個矩陣有相同行數和列數的全1矩陣
z = torch.ones_like(x)
print(z)
- Tensor的運算
x = torch.ones(4,4,dtype=torch.long) #使得每一個值都爲整數型
print(x.dtype)
x = torch.rand(5,3)
y = torch.randn_like(x)
z = x+y
print(z)
y.add(x)
print(y)
y.add_(x) #使用下劃線之後y的值會變
print(y)
- Tensor與numpy相互轉換
x = torch.ones(2,2)
y = x.numpy()
import numpy as np
a = np.ones((2,2))
b = torch.from_numpy(a)
2 torch構建神經網絡
- 使用torch進行簡單的兩層神經網絡構建
N, D_in,H,D_out = 64,1000,100,10 # N:樣本數、D_in:樣本維度、
# H:hidden layer的維度、D_out:輸出的維度
x =torch.randn(N,D_in,requires_grad = True) #隨機初始化樣本
y = torch.randn(N,D_out,requires_grad= True) #隨機初始化標籤值
w1 = torch.randn(D_in,H,requires_grad =True) #初始化第一層權重矩陣
w2 = torch.randn(H,D_out,requires_grad = True) #初始化第二層權重矩陣
learning_rate = 1e-6
for t in range(500):
h = x@w1 #計算hidden layer的各個神經元
h_relu = h.clamp(min=0) #clamp:activation function夾在最大值和最小值之間
y_pred = h_relu@w2 #計算預測值
loss = (y_pred-y).pow(2).sum() #平方損失函數
print(t,loss.item()) # .item()是獲取數值
loss.backward() #獲取loss的所有梯度
with torch.no_grad():#可以減緩內存壓力
w1 -= learning_rate*w1.grad
w2 -= learning_rate*w2.grad
w1.grad.zero_() #每次求完要清零,不然會接着上次的繼續計算梯度
w2.grad.zero_()
- 使用torch,nn建立同樣的神經網絡
import torch.nn
N, D_in,H,D_out = 64,1000,100,10 # N:樣本數、D_in:樣本維度、
# H:hidden layer的維度、D_out:輸出的維度
x =torch.randn(N,D_in,requires_grad = True) #隨機初始化樣本
y = torch.randn(N,D_out,requires_grad= True) #隨機初始化標籤值
model = torch.nn.Sequential(torch.nn.Linear(D_in,H),torch.nn.ReLU()
,torch.nn.Linear(H,D_out)) #第一層參數、激活函數、第二層參數
loss_fn = nn.MSELoss(reduction='sum')
learning_rate = 1e-4
for t in range(500):
y_pred = model(x)
loss = loss_fn(y_pred,y)
print(t,loss.item())
loss.backward()
model.zero_grad() #清空梯度
with torch.no_grad():
for param in model.parameters():
param -= learning_rate*param #更新每一個參數
- torch提供的梯度下降函數
N,D_in,H,D_out = 64,1000,100,10
x = torch.rand(N,D_in,requires_grad=True)
y = torch.rand(N,D_out,requires_grad=True)
model = torch.nn.Sequential(torch.nn.Linear(D_in,H),torch.nn.ReLU()
,torch.nn.Linear(H,D_out))
optimizer = torch.optim.Adam(model.parameters(),lr = 1e-4)
loss_fn = nn.MESLoss(reduction='sum') #Mean Square Error
for t in range(500):
y_pred = model(X)
loss = loss_fn(y_pred,y)
print(t,loss.item())
optimizer.zero_grad()
loss.backward()
optimizer.step()
- 自己設置框架
import torch
import torch.nn as nn
#1.構建一個模型
class TwoLayerNet(torch.nn.Module):
#在__init__當中將每一個有導數的層都放在這裏,也就是框架設置好
def __init__(self,D_in,H,D_out):
super(TwoLayerNet,self).__init__()
self.linear1 = torch.nn.Linear(D_in,H)
self.linear2 = torch.nn.Linear(H,D_out)
#前向傳播的模型
def forward(self,x):
y_pred = self.linear2(self.linear1(x).clamp(min=0))
return y_pred
model = TwoLayerNet(D_in,H,D_out)
#2.定義一個lost function
loss_fn = nn.MSELoss(reduction='sum')
learning_rate = 1e-4
#3.交給optim,找個優化方法
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)
#4.進行訓練,優化參數
for t in range(500):
y_pred =model(x)
loss = loss_fn(y_pred,y)
print(t,loss.item())
optimizer.zero_grad()
loss.backward()
optimizer.step() #一步到位
總結:
- Pytorch構建一個簡單神經網絡的流程如下:
- 首先準備好輸入和輸出
- 定義一個框架
class TwoLayerNet(torch.nn.Module):
#在__init__當中將每一個有導數的層都放在這裏,也就是框架設置好
def __init__(self,D_in,H,D_out):
super(TwoLayerNet,self).__init__()
self.linear1 = torch.nn.Linear(D_in,H)
self.linear2 = torch.nn.Linear(H,D_out)
#前向傳播的模型
def forward(self,x):
y_pred = self.linear2(self.linear1(x).clamp(min=0))
return y_pred
model = TwoLayerNet(D_in,H,D_out)
- 定義一個損失函數
loss_fn = nn.MSELoss(reduction='sum')
learning_rate = 1e-4
- 定義一個優化方法
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)
- 進行訓練(獲取一個y_pred -> 計算損失 -> 獲得所有梯度 -> 優化 ->清除當前梯度)
for t in range(500):
y_pred =model(x)
loss = loss_fn(y_pred,y)
print(t,loss.item())
optimizer.zero_grad()
loss.backward()
optimizer.step() #一步到位