Pytorch相似度計算

1 餘弦相似度
餘弦相似度是一種我們較爲常用的計算向量相似度的方法。下面就是計算餘弦相似度的公式:
在這裏插入圖片描述

import torch
import torch.nn as nn
import math
class ConineSimilarity(nn.Module):

    def forward(self, tensor_1,tensor_2):
        normalized_tensor_1=tensor_1/tensor_1.norm(dim=-1,keepdim=True)
        normalized_tensor_2=tensor_2/tensor_2.norm(dim=-1,keepdim=True)
        return (normalized_tensor_1*normalized_tensor_2).sum(dim=-1)
        
input_1=torch.randn(3,5,requires_grad=True)
print(input_1)
input_2=torch.randn(3,5,requires_grad=True)
print(input_2)
con=ConineSimilarity()
CS=con(input_1,input_2)
CS2=torch.cosine_similarity(input_1,input_2)
print(CS)
print(CS2)

輸出:

tensor([[ 0.5973,  0.1187, -0.8476,  1.1966,  0.8405],
        [ 0.4578, -0.1856, -0.7641,  0.1625, -2.2154],
        [-0.2436, -1.2263,  2.2282,  0.5284,  0.4729]], requires_grad=True)
tensor([[ 0.6518, -0.3185, -0.7898,  0.0890,  0.5226],
        [-1.8873,  0.7438,  0.5145, -1.0182,  2.0877],
        [ 0.5931, -0.5107, -1.3426,  1.4966, -1.9130]], requires_grad=True)
tensor([ 0.7290, -0.8242, -0.3432], grad_fn=<SumBackward2>)
tensor([-0.8748, -0.5227, -0.5036], grad_fn=<DivBackward0>)

2 點乘相似度
計算公式如下:
在這裏插入圖片描述

class DotProductSimilarity(nn.Module):
    def __init__(self,scale_output=False):
        super(DotProductSimilarity,self).__init__()
        self.scale_output=scale_output
    def forward(self,tensor_1,tensor_2):
        result=(tensor_1*tensor_2).sum(dim=-1)
        if(self.scale_output):
            result/=math.sqrt(tensor_1.size(-1))
        return  result
input_1=torch.randn(3,5,requires_grad=True)
print(input_1)
input_2=torch.randn(3,5,requires_grad=True)
print(input_2)
con=ConineSimilarity()
CS=con(input_1,input_2)
print(CS)
dot=DotProductSimilarity()
DS=dot(input_1,input_2)
print(DS)

輸出:

tensor([[ 0.2148,  0.2645,  0.8717, -1.4637,  1.0932],
        [-0.2975, -0.1585,  0.1567, -1.8238, -0.1437],
        [-0.8243, -0.0170, -0.7533, -1.7643,  2.2571]], requires_grad=True)
tensor([[-1.8424, -1.4204, -1.5639,  0.2739,  0.7922],
        [-0.3378, -2.3861, -1.7581, -0.8220,  0.3456],
        [ 0.5536,  0.8324, -1.0632, -1.5567,  0.6374]], requires_grad=True)
tensor([-0.2780,  0.2843,  0.6595], grad_fn=<SumBackward2>)
tensor([-1.6696,  1.6526,  4.5157], grad_fn=<SumBackward2>)

餘弦相似度的大小固定在-1-1之間,但是點乘卻沒有。
3 雙線性相似度

計算公式如下:
b=x^T W y + b

class BiLinearSimilarity(nn.Module):
    def __init__(self,tensor_1_dim,tensor_2_dim,activation=None):
        super(BiLinearSimilarity,self).__init__()
        self.weight_matrix=nn.Parameter(torch.Tensor(tensor_1_dim,tensor_2_dim))
        self.bias=nn.Parameter(torch.Tensor(1))
        self.activation=activation
        self.reset_parameters()
    def reset_parameters(self):
        nn.init.xavier_uniform_(self.weight_matrix)
        self.bias.data.fill_(0)
    def forward(self, tensor_1,tensor_2):
        intermediate=torch.matmul(tensor_1,self.weight_matrix)
        result=(intermediate*tensor_2).sum(dim=-1)+self.bias
        if self.activation is not None:
            result=self.activation(result)
        return result
input_1=torch.randn(3,5,requires_grad=True)
print(input_1)
input_2=torch.randn(3,5,requires_grad=True)
print(input_2)
con=ConineSimilarity()
CS=con(input_1,input_2)
print(CS)
dot=DotProductSimilarity()
DS=dot(input_1,input_2)
print(DS)
bilinear=BiLinearSimilarity(5,5)
BS=bilinear(input_1,input_2)
print(BS)
bilinear=BiLinearSimilarity(5,5)
BS=bilinear(input_1,input_2)
print(BS)

輸出:

tensor([[ 0.1465, -0.8367,  0.5901, -0.7421,  0.7775],
        [ 0.2198,  1.5482,  1.5923,  1.0962, -1.6559],
        [-1.8222, -0.7740, -0.4819, -1.8031,  1.6413]], requires_grad=True)
tensor([[ 0.0053,  2.1358,  0.9435,  0.2379,  0.0689],
        [-0.3503,  1.3089,  0.5033, -2.2339,  1.7092],
        [-0.0255,  1.0277,  1.4797,  0.2870, -0.4866]], requires_grad=True)
tensor([-0.3862, -0.2676, -0.4630], grad_fn=<SumBackward2>)
tensor([-1.3525, -2.5285, -2.7782], grad_fn=<SumBackward2>)
tensor([ 0.0766,  3.2140, -2.2844], grad_fn=<AddBackward0>)

從上可見,雙線性也並沒有取值範圍的限定。
以下是我根據自己理解上面的函數寫的公式,感覺跟官方一些的對不上。
在這裏插入圖片描述
4 歐式距離

input_1=torch.randn(3,5,requires_grad=True)
print(input_1)
input_2=torch.randn(3,5,requires_grad=True)
print(input_2)
con=ConineSimilarity()
CS=con(input_1,input_2)
print(CS)
dot=DotProductSimilarity()
DS=dot(input_1,input_2)
print(DS)
bilinear=BiLinearSimilarity(5,5)
BS=bilinear(input_1,input_2)
print(BS)
bilinear=BiLinearSimilarity(5,5)
BS=bilinear(input_1,input_2)
print(BS)
import torch.nn.functional as F
e=F.pairwise_distance(input_1,input_2)
print(e)

輸出:

tensor([[-1.0758,  1.2539, -1.4611, -0.5173, -0.3719],
        [-0.3430,  1.0673,  0.3724,  0.2630, -0.0029],
        [ 0.0570,  0.9101,  0.3809,  0.9970,  0.1919]], requires_grad=True)
tensor([[ 0.0319, -0.7283, -1.2269,  0.0922,  1.3412],
        [-1.9286,  0.4859, -1.2347,  2.1949, -0.3295],
        [-0.3811,  0.1348, -0.4247, -1.1917,  0.6632]], requires_grad=True)
tensor([ 0.0663,  0.3326, -0.5333], grad_fn=<SumBackward2>)
tensor([ 0.2986,  1.2986, -1.1216], grad_fn=<SumBackward2>)
tensor([ 0.4084, -0.2896, -0.4954], grad_fn=<AddBackward0>)
tensor([ 0.8389,  0.9224, -0.4486], grad_fn=<AddBackward0>)
tensor([2.9184, 3.0453, 2.5405], grad_fn=<NormBackward1>)

歐式距離輸出範圍也是不確定的。

5 皮爾遜相關係數
計算公式如下:
在這裏插入圖片描述

class PearsonCorrelation(nn.Module):
    def forward(self,tensor_1,tensor_2):
        x = tensor_1
        y = tensor_2

        vx = x - torch.mean(x)
        vy = y - torch.mean(y)

        cost = torch.sum(vx * vy) / (torch.sqrt(torch.sum(vx ** 2)) * torch.sqrt(torch.sum(vy ** 2)))
        return cost
input_1=torch.randn(3,5,requires_grad=True)
print(input_1)
input_2=torch.randn(3,5,requires_grad=True)
print(input_2)
pearson=PearsonCorrelation()
PC=pearson(input_1,input_2)
print(PC)

輸出:

tensor([[-0.3064,  0.5541,  0.5944,  1.2654,  0.0281],
        [ 1.0192, -0.8178,  0.2624, -1.4695,  0.3750],
        [-1.7883,  0.5552, -0.1381, -0.6302,  0.6420]], requires_grad=True)
tensor([[ 0.8128, -0.3573, -0.8618,  1.2499,  0.5793],
        [ 1.7729,  0.1705, -1.6051, -2.5970,  1.2015],
        [-0.4328,  0.4294,  0.7364,  1.7886,  0.4203]], requires_grad=True)
tensor(0.4113, grad_fn=<DivBackward0>)

皮爾遜相關係數的值也限定在【-1,1】之間。

參考文獻:
常用的計算向量相似度的函數(pytorch版本)
Pytorch歐式距離euclidean distance實現
皮爾遜在pytorch的應用問題

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章