1.基本流程
pytorch在訓練過程有一個很基本的流程,正常情況下就按這個流程就能夠訓練模型:
1.加載模型,2初始化數據,3.預定義優化器,4.訓練
# 模型加載
model = Darknet(opt.model_config_path)
# pytroch函數 Module.apply 對所有子模型初始化
# https://pytorch.org/docs/stable/nn.html?highlight=apply#torch.nn.Module.apply
model.apply(weights_init_normal)
if torch.cuda.is_available() and opt.use_cuda:
model = model.cuda()
# 優化器
optimizer = torch.optim.Adam(
filter(lambda p: p.requires_grad, model.parameters()))
for epoch in range(opt.epochs):
for ii, (_, imgs, targets) in tqdm(enumerate(dataloader)):
imgs = imgs.cuda()
targets = targets.type(torch.cuda.FloatTensor)
optimizer.zero_grad()
loss = model(imgs, targets)
loss.backward()
optimizer.step()
2.模型
本圖引用:https://blog.csdn.net/leviopku/article/details/82660381
yolov3採用了配置文件來構制網絡層,由於yolov3較大的網絡層數目,由配置文件來編寫網絡層可以使得代碼量少很多.配置文件中每個網絡塊都有一個題名,按照題名劃分來構建網絡層.一共六種模塊,對應與yolov3網絡的不同結構
def create_modules(module_defs):
# yolov3網絡使用了非常多的卷積層,爲了減少構建模型的麻煩,
# 原作者使用配置文件來輔助構建網絡,減小了模型構建所需要的代碼量
hyperparams = module_defs.pop(0)
output_filters = [int(hyperparams["channels"])]
module_list = nn.ModuleList() # 模型序列
for i, module_def in enumerate(module_defs):
modules = nn.Sequential() # 子序列 代表一個子結構
if module_def["type"] == "convolutional": # 卷積塊 conv bn leaky
bn = int(module_def["batch_normalize"])
filters = int(module_def["filters"])
kernel_size = int(module_def["size"])
pad = (kernel_size - 1) // 2 if int(module_def["pad"]) else 0
modules.add_module(
"conv_%d" % i,
nn.Conv2d(
in_channels=output_filters[-1],
out_channels=filters,
kernel_size=kernel_size,
stride=int(module_def["stride"]),
padding=pad,
bias=not bn,
),
)
if bn:
modules.add_module("batch_norm_%d" %
i, nn.BatchNorm2d(filters))
if module_def["activation"] == "leaky":
modules.add_module("leaky_%d" % i, nn.LeakyReLU(0.1))
elif module_def["type"] == "maxpool": # 池化層 maxpooling
kernel_size = int(module_def["size"])
stride = int(module_def["stride"])
if kernel_size == 2 and stride == 1:
padding = nn.ZeroPad2d((0, 1, 0, 1))
modules.add_module("_debug_padding_%d" % i, padding)
maxpool = nn.MaxPool2d(
kernel_size=int(module_def["size"]),
stride=int(module_def["stride"]),
padding=int((kernel_size - 1) // 2),
)
modules.add_module("maxpool_%d" % i, maxpool)
elif module_def["type"] == "upsample": # 上採樣
upsample = nn.Upsample(scale_factor=int(
module_def["stride"]), mode="nearest")
modules.add_module("upsample_%d" % i, upsample)
elif module_def["type"] == "route": # 空層
layers = [int(x) for x in module_def["layers"].split(",")]
filters = sum([output_filters[layer_i] for layer_i in layers])
modules.add_module("route_%d" % i, EmptyLayer())
elif module_def["type"] == "shortcut": # 空層
filters = output_filters[int(module_def["from"])]
modules.add_module("shortcut_%d" % i, EmptyLayer())
elif module_def["type"] == "yolo": # 最後一個檢測層
anchor_idxs = [int(x) for x in module_def["mask"].split(",")]
# Extract anchors
anchors = [int(x) for x in module_def["anchors"].split(",")]
anchors = [(anchors[i], anchors[i + 1])
for i in range(0, len(anchors), 2)]
anchors = [anchors[i] for i in anchor_idxs]
num_classes = int(module_def["classes"])
img_height = int(hyperparams["height"])
# Define detection layer
yolo_layer = YOLOLayer(anchors, num_classes, img_height)
modules.add_module("yolo_%d" % i, yolo_layer)
# Register module list and number of output filters
module_list.append(modules)
output_filters.append(filters)
return hyperparams, module_list
3.YOLO層
該層對應的是網絡的最後一層(y1,y2,y3).首先獲得預測結果prediction(x,y,w,h,con,cls).
prediction = x.view(nB, nA, self.bbox_attrs, nG, nG).permute(
0, 1, 3, 4, 2).contiguous() # 維度轉換, contiguous()相當於複製
# prediction.shape:(1, 3, 13, 13, 85)
# 輸出預測結果,說明的是x,y是預測的b-box中心點相對於網格單元左上角的相對座標
x = torch.sigmoid(prediction[..., 0]) # Center x (1,3,13,13)
y = torch.sigmoid(prediction[..., 1]) # Center y
w = prediction[..., 2] # Width
h = prediction[..., 3] # Height
pred_conf = torch.sigmoid(prediction[..., 4]) # bbox的置信度
pred_cls = torch.sigmoid(prediction[..., 5:]) # 每個類別的概率
再計算網格單元左上角座標和錨節點對應比例,這個錨節點是聚類計算過的大小,大小固定,所以直接可以使用.
grid_x = torch.arange(nG).repeat(nG, 1).view(
[1, 1, nG, nG]).type(FloatTensor)
grid_y = torch.arange(nG).repeat(nG, 1).t().view(
[1, 1, nG, nG]).type(FloatTensor) # 五個單元左上角座標
scaled_anchors = FloatTensor(
[(a_w / stride, a_h / stride) for a_w, a_h in self.anchors])
anchor_w = scaled_anchors[:, 0:1].view((1, nA, 1, 1))
anchor_h = scaled_anchors[:, 1:2].view((1, nA, 1, 1)) # prior先驗
在通過相對座標和偏移量計算實際座標.
pred_boxes = FloatTensor(prediction[..., :4].shape)
pred_boxes[..., 0] = x.data + grid_x
pred_boxes[..., 1] = y.data + grid_y
pred_boxes[..., 2] = torch.exp(w.data) * anchor_w
pred_boxes[..., 3] = torch.exp(h.data) * anchor_h # 計算出實際座標
再計算真值標籤相對於gird的真值標籤.
最後計算損失
loss_x = self.mse_loss(x[mask], tx[mask])
loss_y = self.mse_loss(y[mask], ty[mask])
loss_w = self.mse_loss(w[mask], tw[mask])
loss_h = self.mse_loss(h[mask], th[mask])
loss_conf = self.bce_loss(pred_conf[conf_mask_false],tconf[conf_mask_false]) + self.bce_loss(
pred_conf[conf_mask_true], tconf[conf_mask_true]
)
loss_cls = (1 / nB) * \
self.ce_loss(pred_cls[mask], torch.argmax(tcls[mask], 1))
loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
4.總網絡
總網絡編寫了卷積層,cat連接層,點加層,輸出層.
def forward(self, x, targets=None):
is_training = targets is not None
output = []
self.losses = defaultdict(float)
layer_outputs = []
for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
if module_def["type"] in ["convolutional", "upsample", "maxpool"]:
x = module(x)
elif module_def["type"] == "route": # 拼接層
layer_i = [int(x) for x in module_def["layers"].split(",")]
x = torch.cat([layer_outputs[i] for i in layer_i], 1)
elif module_def["type"] == "shortcut": # add層
layer_i = int(module_def["from"])
x = layer_outputs[-1] + layer_outputs[layer_i]
elif module_def["type"] == "yolo":
# Train phase: get loss
if is_training:
# 返回YOLO層損失
xx = module[0](x, targets)
x = xx[0] # 總損失
losses = xx[1:] # 其他部分損失
for name, loss in zip(self.loss_names, losses):
self.losses[name] += loss
# Test phase: Get detections
else:
x = module(x)
output.append(x) # 每個輸出的損失
layer_outputs.append(x)
self.losses["recall"] /= 3
self.losses["precision"] /= 3
return sum(output) if is_training else torch.cat(output, 1)