Pytorch入門與實踐——Tensor和autograd

from __future__ import print_function
import torch as t
from torch.autograd import Variable as V
from torch.autograd import Function

import numpy as np
from matplotlib import pyplot as plt

#########創建Tensor#############
# a = t.Tensor(2, 3)
#
# #用list的數據創建Tensor
# b = t.Tensor([[1, 2, 3], [4, 5, 6]])    #傳入參數是一個list
# # print(b)
#
# c = b.tolist()  #把Tensor轉爲list
# # print(c)
#
# b_size = b.size()
# b_numel = b.numel()
# # print(b_size, b_numel)
#
# c = t.Tensor(b_size)
# d = t.Tensor((2, 3))    #創建一個元素爲2和3的tensor,傳入的是一個元組
# # print(c, d, c.shape)
#
# # print(t.ones(2, 3), '\n', t.zeros(2, 3), '\n', t.arange(1, 6, 2), '\n', t.linspace(1, 10, 3), \
# #       '\n', t.randn(2, 3), '\n', t.randperm(5), '\n', t.eye(2, 3))

#############常用Tensor操作#################
#tensor.view方法調整tensor形狀,必須保證調整前後元素總數一致,不會修改自身數據,返回的新tensor與源tensor共享內存,即改變一個,另一個也跟着改變
# a = t.arange(0, 6)
# b = a.view(2, 3)
# # c = a.view(-1, 2)   #當某一維爲-1時,會自動計算他的大小
# # print(b)
# c = b.unsqueeze(1)  #注意形狀,在第一維(下標從0開始)上增加"1"
# c[0][0][0] = 9
# c = c.unsqueeze(-2) #-2表示倒數第二個維度
# d = c.squeeze(1)    #壓縮第1維的'1'
# d = d.squeeze(1)    #壓縮第1維的'1'
# d[1] = t.Tensor((11, 22, 33))
# print(b.shape, c, c.shape, c.view(1,1,2,3), d)

#resize是另一種調整size的方法,它可以修改tensor的尺寸。新尺寸大會自動分配新的內存空間,小則之前的數據依舊會被保存
# b.resize_(1, 3)
# print(b, a)
# b.resize_(3, 3)
# print(b, a)

###########索引操作###################
# a = t.randn(3, 4)
# print(a, a[0:1, :2], a[0, :2], a>1)
# # print(a[a>1])   #等價於a.masked_select(a>1)
# print(a[t.LongTensor([0, 1])])  #第0行和第1行

#gather
# a = t.arange(0, 16).view(4, 4)
# index = t.LongTensor([[0, 1, 2, 3]])    #選取對角線的元素
# print(a, a.gather(0, index))
# index = t.LongTensor([[3, 2, 1, 0]]).t()    #選取反對角線上的元素
# print(a.gather(1, index))
# index = t.LongTensor([[3, 2, 1, 0]])    #選取反對角線上的元素,與上面的不同
# print(a.gather(0, index))
# index = t.LongTensor([[0, 1, 2, 3], [3, 2, 1, 0]]).t()  #選取兩個對角線上的元素
# b = a.gather(1, index)
# print(b)
# #與gather相對應的逆操作是scatter_;scatter_函數是inplace操作
# c = t.zeros(4, 4)
# c.scatter_(1, index, b.float())
# print(c)

##############高級索引############
#高級索引操作的結果一般不和原始的Tensor共享內存
# x = t.arange(0, 27).view(3, 3, 3)
# print(x)
# # print(x[[1, 2], [1, 2], [2, 0]])    #x[1][1][2]和x[2][2][0]
# # print(x[[2, 1, 0], [0], [1]])   #x
# # [2, 0, 1], x[1, 0, 1], x[0, 0, 1]
# print(x[[0, 2], ...])   #x[0]和x[2]

#############################Tensor類型#################
#設置默認tensor,注意參數是字符串
# t.set_default_tensor_type('torch.IntTensor')  #報錯,only floating-point types are supported as the default type
# a = t.Tensor(3, 3)
# b = a.int()     #等價於b=a.type(t.IntTensor)
# c = a.type_as(b)
# d = b.new(2, 3)     #等價於torch.IntTensor(2, 3)
# print(d)

#逐元素操作
# a = t.arange(0, 6).view(2, 3)
# print(t.cos(a.float()), a%3, a**2, t.clamp(a, min=3, max=5))

#歸併操作
# b = t.ones(2, 3)
# c = b.sum(dim=0, keepdim=True)
# d = b.sum(dim=0, keepdim=False)     #不保留維度"1",形狀與上面的不同
# # print(c, d)
# print(b.sum(dim=1))
# a = t.arange(0, 6).view(2, 3)
# print(a, a.cumsum(dim=1), a.cumsum(dim=0))   #每行沿着行累加,每列沿着列累加

#比較
# a = t.linspace(0, 15, 6).view(2, 3)
# b = t.linspace(15, 0, 6).view(2, 3)
# # print(a, b, a>b, a[a>b], t.max(a))
# print(t.max(b, dim=0), t.max(b, dim=1))     #第一個返回值是最大的元素,第二個返回值表示最大元素在該行或該列的索引
# # print(t.max(a, b), t.clamp(a, min=10))

#線性代數
# a = t.linspace(0, 15, 6).view(2, 3)
# b = a.t()   #矩陣的轉置會導致存儲空間不連續,需調用它的.contiguous方法將其轉爲連續
# print(b.is_contiguous())
# c = b.contiguous()
# print(c.is_contiguous())

#Tensor和Numpy
#Numpy和Tensor共享內存,當遇到Tensor不支持的操作時,先轉成Numpy數組,處理後在轉回tensor,其轉換開銷很小
# a = np.ones([2, 3], dtype=np.float32)
# print(a)
# b = t.from_numpy(a)     #numpy轉換爲tensor
# c = t.Tensor(a)     #可以直接將numpy對象傳入Tensor,這種情況下若Numpy類型不是Float32會新建
# print(b, c)
# b[0, 1] = 100
# print(a[0, 1], a, b)
#自動廣播法則和手動廣播法則。P72
# a = t.ones(3, 2)
# b = t.zeros(2, 3, 1)
# print(a+b)  #自動廣播法則
# c = a.unsqueeze(0).expand(2, 3, 2) + b.expand(2, 3, 2)      #手動廣播法則
# e = a.unsqueeze(0).expand(1000000, 3, 2)    #expand不會佔用額外空間,只會在需要時才擴充,可極大地節省內存
# print(c)


#內部結構
# a = t.arange(0, 6)
# b = a.view(2, 3)
# # print(a.storage(), b.storage())
# print(id(b.storage()) == id(a.storage()))      #一個對象的id值可以看作她的內存空間,a,b storage的內存地址一樣,即是同一個storage
# a[1] = 100  #a改變,b也隨之改變,因爲他們共享storage,即內存
# print(b)
# c = a[2:]
# print(c.storage(),c.dtype)  #.dtype查看c的具體類型
# print(c.data_ptr(), a.data_ptr())   #data_ptr返回tensor首元素的內存地址
# c[0] = -111 #a,c也共享內存空間,c[0]的內存地址對應的是a[2]的內存地址
# #d = t.Tensor(a.storage())   #使用storage來生成新tensor,報錯:這是因爲Tensor期待得到的值的類型是FloatTensor(類型6),而不是其他類型LongTensor(data type 4)
# #下面的操作會在將上面的值改成FloatTensor的基礎上進行,即在a = t.arange(0,6)後面添加.float(),然後從頭執行了一遍
# print(a.storage_offset(), c.storage_offset())   #獲取首元素相對於storage地址的偏移量
# e = b[::2,::2]  #隔兩行/列取元素來生成e
# print(e, id(e.storage()) ==id(a.storage()))     ##雖然值不相同,但是得到的storage是相同的
# print(b.stride(), e.stride())   #獲得步長信息
# print(b.is_contiguous(), e.is_contiguous())     #查看其值的內存空間是否連續,因爲e只取得了storage中的部分值,因此其是不連續的

#其他有關Tensor的話題
#持久化,Tensor的保存和加載
# if t.cuda.is_available():
#     a = a.cuda(1)    #把a轉爲GPU1上的tensor
#     t.save(a, 'a.pth')
#     b = t.load('a.pth') #加載爲b,存儲於GPU1上(因爲保存時tensor就在GPU1上)
#     c = t.load('a.pth', map_location=lambda storage, loc: storage)  #加載爲c,存儲於CPU
#     d = t.load('a.pth', map_location={'cuda:1':'cuda:0 '})  #加載爲d,存儲於GPU0

#向量化
# def for_loop_add(x, y):
#     result = []
#     for i,j in zip(x, y):
#         result.append(i + j)
#     return t.Tensor(result)

# x = t.zeros(100)
# y = t.ones(100)
#%timeit -n 10 for_loop_add(x, y)    #IPython中的魔法方法


########小試牛刀:線性迴歸##########
# t.manual_seed(1000) #設置隨機種子,保證在不同計算機上運行時下面的輸出一致
#
# def get_fake_data(batch_size=8):
#     x = t.rand(batch_size, 1)*20
#     y = x*2+(1+t.randn(batch_size, 1))*3
#     return x,y
#
# # x, y = get_fake_data()
# # plt.scatter(x.squeeze().numpy(), y.squeeze().numpy())
# # plt.show()
#
# #隨機初始化參數
# w = t.rand(1, 1)
# b = t.zeros(1, 1)
# lr = 0.001  #學習率
# print(w, b)
#
# for ii in range(20000):
#     x, y = get_fake_data()
#
#     #forward:計算loss
#     y_pred = x.mm(w) + b.expand_as(y)
#     loss = 0.5 * (y_pred - y) **2   #均方誤差
#     loss = loss.sum()
#
#     #backward:手動計算梯度
#     dloss = 1
#     dy_pred = dloss * (y_pred - y)
#
#     dw = x.t().mm(dy_pred)
#     db = dy_pred.sum()
#
#     #更新參數
#     w.sub_(lr*dw)
#     b.sub_(lr*db)
#
#     if(ii%1000 == 0):
#         #畫圖
#         x = t.arange(0, 20).view(-1, 1).float()
#         y = x.mm(w) + b.expand_as(x)
#         plt.plot(x.numpy(), y.numpy())  #predicted
#
#         x2, y2 = get_fake_data(batch_size = 20)
#         plt.scatter(x2.numpy(), y2.numpy())     #true data
#
#         plt.xlim(0, 20)
#         plt.ylim(0, 41)
#         plt.show()
#         plt.pause(0.5)
#         print(w.squeeze(), b.squeeze())


############################autograd################################
#torch.autograd是爲方便用戶,專門開發的一套自動求導引擎,能夠根據輸入和前向傳播過程自動構建計算圖,並執行反向傳播

#Variable
# a = V(t.ones(3, 4), requires_grad = True)
# b = V(t.zeros(3, 4))
# # print(a, b)
# c = a.add(b)    #函數的使用和tensor一致,也可以寫成c = a+b
# # print(c)
# d = c.sum()
# d.backward()    #反向傳播
# # print(c.data.sum().dtype, c.sum().dtype)
# # print(a.grad, a.requires_grad, b.requires_grad, c.requires_grad,)
# # print(a.is_leaf, b.is_leaf, c.is_leaf)  #由用戶創建的variable屬於葉子節點,對應的grad_dn是None
# # print(c.grad is None)   #c的梯度計算完之後即被釋放

# def f(x):
#     y = x**2 *t.exp(x)
#     return y
#
# def gradf(x):
#     #手動求導函數
#     dx = 2*x*t.exp(x) + x**2*t.exp(x)
#     return dx
#
# x = V(t.randn(3, 4), requires_grad = True)
# y = f(x)
# y.backward(t.ones(y.size()))    #grad_variables的形狀與一致
# print(y, x.grad, gradf(x))


#計算圖
# x = V(t.ones(1))
# b = V(t.rand(1), requires_grad = True)
# w = V(t.rand(1), requires_grad = True)
# y = w * x   #等價於y = w.mul(x)
# z = y + b   #等價於z = y.add(b)
# # print(x.requires_grad, b.requires_grad, w.requires_grad, y.requires_grad, z.requires_grad)
# # print(x.is_leaf, w.is_leaf, b.is_leaf, y.is_leaf, z.is_leaf)
# # print(z.grad_fn)    #z是add函數的輸出,所以它的反向傳播函數是AddBackward
# # print(z.grad_fn.next_functions)     #見P88
# # print(z.grad_fn.next_functions[0][0] == y.grad_fn)
# # print(y.grad_fn.next_functions, w.grad_fn, x.grad_fn)
# # z.grad_fn.saved_variable    #報錯了吶,爲什麼吶
# z.backward(retain_graph=True)
# z.backward()
# print(w.grad)

# def abs(x):
#     if x.data[0]>0: return x
#     else: return -x
# x = V(t.ones(1), requires_grad=True)
# y = abs(x)
# y.backward()
# print(x.grad)
# x = V(-1*t.ones(1), requires_grad=True)
# y = abs(x)
# y.backward()
# print(x.grad)

# def f(x):
#     result = 1
#     for ii in x:
#         if ii.data>0: result = ii*result
#     return result
# x = V(t.arange(-2,4).float(),requires_grad=True)
# y = f(x)
# y.backward()
# print(x.grad)

# x = V(t.ones(3), requires_grad=True)
# w = V(t.rand(3), requires_grad=True)
# y = x * w
# z = y.sum()
# # print(x.requires_grad, w.requires_grad, y.requires_grad)
# z.backward()
# # print((x.grad, w.grad, y.grad))     #非葉子結點grad計算完之後自動清空,y.grad是None;輸出可以以元組形式輸出

#查看非葉子結點變量的梯度的方法
#第一種方法:使用grad獲取中間變量的梯度
# print(t.autograd.grad(z, y))   #z對y的梯度,隱式調用backward()

#第二種方法:使用hook。  hook是一個函數,輸入是梯度,不應該有返回值
# def variable_hook(grad):
#     print('y的梯度:\r\n', grad)
# x = V(t.ones(3), requires_grad=True)
# w = V(t.rand(3), requires_grad=True)
# y = x * w
# hook_handle = y.register_hook(variable_hook)    #註冊hook
# z = y.sum()
# z.backward()
# hook_handle.remove()    #用完之後記得移除hook


#variable中grad屬性和backward函數grad_variables參數的含義
# x = V(t.arange(0, 3).float(), requires_grad = True)
# y = x**2 + x*2
# z = y.sum()
# # z.backward()
# # print(x.grad)
# y_grad_varaibles = V(t.Tensor([1,1,1])) #dz/dy
# y.backward(y_grad_varaibles)    #從y開始反向傳播
# print(x.grad)

#擴展autograd 懵逼!
# class MultiplyAdd(Function):
#     @staticmethod
#     def forward(ctx, w, x, b, x_requires_grad = True):
#         print('type in forward', type(x))
#         # ctx.x_requires_grad = x_requires_grad
#         ctx.save_for_backward(w, x)
#         output = w * x + b
#         return output
#
#     @staticmethod
#     def backward(ctx, grad_output):
#         w, x = ctx.saved_variables
#         print('type in backward', type(x))
#         grad_w = grad_output * x
#         grad_x = grad_output * w
#         grad_b = grad_output * 1
#         return grad_w, grad_x, grad_b
#
# x = V(t.ones(1))
# w = V(t.rand(1), requires_grad = True)
# b = V(t.rand(1), requires_grad = True)
# print('開始前向傳播')
# z = MultiplyAdd.apply(w, x, b)
# print('開始反向傳播')
# z.backward()
# print(x.grad, w.grad, b.grad)


##############用Variable實現線性迴歸###############
t.manual_seed(1000)

def get_fake_data(batch_size=8):
    x = t.rand(batch_size, 1)*20
    y = x * 2 + (1 + t.randn(batch_size, 1))*3
    return x, y

# x, y = get_fake_data()
# plt.scatter(x.squeeze().numpy(), y.squeeze().numpy())
# plt.show()

#隨機初始化參數
w = V(t.rand(1, 1), requires_grad=True)
b = V(t.zeros(1, 1), requires_grad=True)
lr = 0.001  #學習率

for ii in range(8000):
    x, y = get_fake_data()
    x, y = V(x), V(y)

    #forward:計算loss
    y_pred = x.mm(w) + b.expand_as(y)
    loss = 0.5 * (y_pred - y) ** 2
    loss = loss.sum()

    #backward:自動計算梯度
    loss.backward()

    #更新參數
    w.data.sub_(lr*w.grad.data)
    b.data.sub_(lr*b.grad.data)

    #梯度清零
    w.grad.data.zero_()
    b.grad.data.zero_()

    if ii%1000 == 0:
        #畫圖
        x = t.arange(0, 20).view(-1, 1).float()
        y = x.mm(w.data) + b.data.expand_as(x)
        plt.plot(x.numpy(), y.numpy())  #predicted

        x2, y2 = get_fake_data(batch_size=20)
        plt.scatter(x2.numpy(), y2.numpy()) #true data

        plt.xlim(0, 20)
        plt.ylim(0, 41)
        # plt.show()
        # plt.pause(0.5)
print(w.data.squeeze(), b.data.squeeze())
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章