前言
在上面一篇文章中,我們對訓練代碼中的inputs和outputs獲得做了簡單分析。有了inputs和outpus後,就可以開始計算loss值了。這也是本文重點。
圖片預測函數的實現
主要講解下面這個代碼。中文註釋添加在裏面。
def generate_images_pred(self, inputs, outputs):
"""Generate the warped (reprojected) color images for a minibatch.
Generated images are saved into the `outputs` dictionary.
"""
//outputs["disp"]直接輸出的就是視差圖,並且仍然多尺度[0,1,2,3]分佈。
for scale in self.opt.scales:
disp = outputs[("disp", scale)]
if self.opt.v1_multiscale:
source_scale = scale
else:
disp = F.interpolate(
disp, [self.opt.height, self.opt.width], mode="bilinear", align_corners=False)
source_scale = 0
//將disp值映射到[0.01,10],並求倒數就能得到深度值
_, depth = disp_to_depth(disp, self.opt.min_depth, self.opt.max_depth)
//將深度值存放到outputs["depth"...]中
outputs[("depth", 0, scale)] = depth
//在stereo traning時, frame_id恆爲"s"。
for i, frame_id in enumerate(self.opt.frame_ids[1:]):
if frame_id == "s":
T = inputs["stereo_T"]
else:
T = outputs[("cam_T_cam", 0, frame_id)]
# from the authors of https://arxiv.org/abs/1712.00175
if self.opt.pose_model_type == "posecnn":
axisangle = outputs[("axisangle", 0, frame_id)]
translation = outputs[("translation", 0, frame_id)]
inv_depth = 1 / depth
mean_inv_depth = inv_depth.mean(3, True).mean(2, True)
T = transformation_from_parameters(
axisangle[:, 0], translation[:, 0] * mean_inv_depth[:, 0], frame_id < 0)
//將深度圖投影成3維點雲
cam_points = self.backproject_depth[source_scale](
depth, inputs[("inv_K", source_scale)])
//將3維點雲投影成二維圖像
pix_coords = self.project_3d[source_scale](
cam_points, inputs[("K", source_scale)], T)
//將二維圖像賦值給outputs[("sample"..)]
outputs[("sample", frame_id, scale)] = pix_coords
//outputs上某點(x,y)的三個通道像素值來自於inputs上的(x',y')
//而x'和y'則由outputs(x,y)的最低維[0]和[1]
outputs[("color", frame_id, scale)] = F.grid_sample(
inputs[("color", frame_id, source_scale)],
outputs[("sample", frame_id, scale)],
padding_mode="border")
if not self.opt.disable_automasking:
outputs[("color_identity", frame_id, scale)] = \
inputs[("color", frame_id, source_scale)]
loss值計算
loss值由下面這個函數來獲取。
def compute_losses(self, inputs, outputs):
"""Compute the reprojection and smoothness losses for a minibatch
"""
losses = {}
total_loss = 0
//按尺度來計算loss
for scale in self.opt.scales:
loss = 0
reprojection_losses = []
if self.opt.v1_multiscale:
source_scale = scale
else:
source_scale = 0
//按尺度獲得視差圖
disp = outputs[("disp", scale)]
//按尺度獲得原始輸入圖
color = inputs[("color", 0, scale)]
//0尺度的原始輸入圖
target = inputs[("color", 0, source_scale)]
//在stereo-training時,frame_id恆爲“s”
for frame_id in self.opt.frame_ids[1:]:
//按尺度獲得對應圖像的預測圖(即深度圖轉換到點雲再轉到二維圖像最後採樣得到的彩圖
pred = outputs[("color", frame_id, scale)]
//根據pred多尺度圖和0尺度
reprojection_losses.append(self.compute_reprojection_loss(pred, target))
reprojection_losses = torch.cat(reprojection_losses, 1)
//直接對inputs["color",0,0]和["color",s,0]計算identity loss
if not self.opt.disable_automasking:
identity_reprojection_losses = []
for frame_id in self.opt.frame_ids[1:]:
pred = inputs[("color", frame_id, source_scale)]
identity_reprojection_losses.append(
self.compute_reprojection_loss(pred, target))
identity_reprojection_losses = torch.cat(identity_reprojection_losses, 1)
if self.opt.avg_reprojection:
identity_reprojection_loss = identity_reprojection_losses.mean(1, keepdim=True)
else:
# save both images, and do min all at once below
identity_reprojection_loss = identity_reprojection_losses
elif self.opt.predictive_mask:
# use the predicted mask
mask = outputs["predictive_mask"]["disp", scale]
if not self.opt.v1_multiscale:
mask = F.interpolate(
mask, [self.opt.height, self.opt.width],
mode="bilinear", align_corners=False)
reprojection_losses *= mask
# add a loss pushing mask to 1 (using nn.BCELoss for stability)
weighting_loss = 0.2 * nn.BCELoss()(mask, torch.ones(mask.shape).cuda())
loss += weighting_loss.mean()
if self.opt.avg_reprojection:
reprojection_loss = reprojection_losses.mean(1, keepdim=True)
else:
reprojection_loss = reprojection_losses
if not self.opt.disable_automasking:
# add random numbers to break ties
identity_reprojection_loss += torch.randn(
identity_reprojection_loss.shape).cuda() * 0.00001
combined = torch.cat((identity_reprojection_loss, reprojection_loss), dim=1)
else:
combined = reprojection_loss
if combined.shape[1] == 1:
to_optimise = combined
else:
to_optimise, idxs = torch.min(combined, dim=1)
if not self.opt.disable_automasking:
outputs["identity_selection/{}".format(scale)] = (
idxs > identity_reprojection_loss.shape[1] - 1).float()
loss += to_optimise.mean()
mean_disp = disp.mean(2, True).mean(3, True)
norm_disp = disp / (mean_disp + 1e-7)
smooth_loss = get_smooth_loss(norm_disp, color)
loss += self.opt.disparity_smoothness * smooth_loss / (2 ** scale)
total_loss += loss
losses["loss/{}".format(scale)] = loss
total_loss /= self.num_scales
losses["loss"] = total_loss
return losses
結論
關於圖片預測和損失值計算,本文只是在代碼裏面做了一些簡單的註釋,其實還有很多細節沒有深入研究,比如說SSIM值得計算等,留待後面文章的深入。