01標籤
import torch
import torch.nn as nn
# 輸入x是一個二維張量,每一行表示一個樣本的分數,每一列表示一個特徵或維度
x = torch.tensor([[0.5, 0.7], [0.9, 0.8], [0.6, 0.4], [0.3, 0.6], [0.8, 0.7], [0.4, 0.5]])
# 標籤y是一個一維張量,表示樣本之間的順序關係(-1或1)
y = torch.tensor([1, -1, 1, -1, -1, 1])
# 創建一個marginRankingLoss對象,設置邊界爲0.2
loss_fn = nn.MarginRankingLoss(margin=0.2)
# 使用torch.combinations函數生成所有可能的樣本對
pairs = torch.combinations(x)
# 將pairs分成兩個張量,分別表示第一個和第二個樣本的分數
x1 = pairs[:, :2]
x2 = pairs[:, 2:]
# 計算損失值
loss = loss_fn(x1, x2, y)
print(loss) # 輸出:tensor(3.)
多標籤
import torch
import torch.nn as nn
# 定義一個batch內數據的大小和維度
batch_size = 16
input_dim = 128
# 隨機生成一個batch內數據的特徵向量
x = torch.randn(batch_size, input_dim)
# 隨機生成一個batch內數據的標籤(0,1,2,3或4)
y = torch.randint(0, 5, (batch_size,))
# 定義一個預測模型,比如一個線性層
model = nn.Linear(input_dim, 1)
# 得到預測分數
scores = model(x)
# 定義pairwise loss函數,比如MarginRankingLoss
loss_fn = nn.MarginRankingLoss(margin=1.0)
# 初始化pairwise loss爲0
loss = 0
# 對於每個query,從batch中選擇兩個document,一個正例(標籤大於0),一個負例(標籤等於0)
for i in range(batch_size):
# 找到正例和負例的索引
pos_idx = (y > 0) & (y != y[i])
neg_idx = (y == 0) & (y != y[i])
# 如果找不到正例或負例,則跳過該query
if not pos_idx.any() or not neg_idx.any():
continue
# 隨機選擇一個正例和一個負例
pos_score = scores[pos_idx].squeeze()[torch.randint(0, pos_idx.sum(), (1,))]
neg_score = scores[neg_idx].squeeze()[torch.randint(0, neg_idx.sum(), (1,))]
# 計算正例和負例之間的邊緣損失,並累加到pairwise loss中
target = torch.tensor([1.0])
loss += loss_fn(pos_score, neg_score, target)
# 對所有query和document對求平均,得到最終的pairwise loss
loss /= batch_size
print(loss)