pytorch 從頭開始YOLOV3(二):訓練模型

1.基本流程

pytorch在訓練過程有一個很基本的流程,正常情況下就按這個流程就能夠訓練模型:

1.加載模型,2初始化數據,3.預定義優化器,4.訓練

    # 模型加載
    model = Darknet(opt.model_config_path)
    # pytroch函數 Module.apply 對所有子模型初始化
    # https://pytorch.org/docs/stable/nn.html?highlight=apply#torch.nn.Module.apply
    model.apply(weights_init_normal)
    if torch.cuda.is_available() and opt.use_cuda:
        model = model.cuda()

    # 優化器
    optimizer = torch.optim.Adam(
        filter(lambda p: p.requires_grad, model.parameters()))

    for epoch in range(opt.epochs):
        for ii, (_, imgs, targets) in tqdm(enumerate(dataloader)):
            imgs = imgs.cuda()
            targets = targets.type(torch.cuda.FloatTensor)
            optimizer.zero_grad()
            loss = model(imgs, targets)
            loss.backward()
            optimizer.step()

2.模型

本圖引用:https://blog.csdn.net/leviopku/article/details/82660381

yolov3採用了配置文件來構制網絡層,由於yolov3較大的網絡層數目,由配置文件來編寫網絡層可以使得代碼量少很多.配置文件中每個網絡塊都有一個題名,按照題名劃分來構建網絡層.一共六種模塊,對應與yolov3網絡的不同結構

def create_modules(module_defs):
    # yolov3網絡使用了非常多的卷積層,爲了減少構建模型的麻煩,
    # 原作者使用配置文件來輔助構建網絡,減小了模型構建所需要的代碼量
    hyperparams = module_defs.pop(0)
    output_filters = [int(hyperparams["channels"])]
    module_list = nn.ModuleList()  # 模型序列
    for i, module_def in enumerate(module_defs):
        modules = nn.Sequential()  # 子序列 代表一個子結構
        if module_def["type"] == "convolutional":  # 卷積塊 conv bn leaky
            bn = int(module_def["batch_normalize"])
            filters = int(module_def["filters"])
            kernel_size = int(module_def["size"])
            pad = (kernel_size - 1) // 2 if int(module_def["pad"]) else 0
            modules.add_module(
                "conv_%d" % i,
                nn.Conv2d(
                    in_channels=output_filters[-1],
                    out_channels=filters,
                    kernel_size=kernel_size,
                    stride=int(module_def["stride"]),
                    padding=pad,
                    bias=not bn,
                ),
            )
            if bn:
                modules.add_module("batch_norm_%d" %
                                   i, nn.BatchNorm2d(filters))
            if module_def["activation"] == "leaky":
                modules.add_module("leaky_%d" % i, nn.LeakyReLU(0.1))

        elif module_def["type"] == "maxpool":  # 池化層  maxpooling
            kernel_size = int(module_def["size"])
            stride = int(module_def["stride"])
            if kernel_size == 2 and stride == 1:
                padding = nn.ZeroPad2d((0, 1, 0, 1))
                modules.add_module("_debug_padding_%d" % i, padding)
            maxpool = nn.MaxPool2d(
                kernel_size=int(module_def["size"]),
                stride=int(module_def["stride"]),
                padding=int((kernel_size - 1) // 2),
            )
            modules.add_module("maxpool_%d" % i, maxpool)

        elif module_def["type"] == "upsample":  # 上採樣
            upsample = nn.Upsample(scale_factor=int(
                module_def["stride"]), mode="nearest")
            modules.add_module("upsample_%d" % i, upsample)

        elif module_def["type"] == "route":  # 空層
            layers = [int(x) for x in module_def["layers"].split(",")]
            filters = sum([output_filters[layer_i] for layer_i in layers])
            modules.add_module("route_%d" % i, EmptyLayer())

        elif module_def["type"] == "shortcut":  # 空層
            filters = output_filters[int(module_def["from"])]
            modules.add_module("shortcut_%d" % i, EmptyLayer())

        elif module_def["type"] == "yolo":  # 最後一個檢測層
            anchor_idxs = [int(x) for x in module_def["mask"].split(",")]
            # Extract anchors
            anchors = [int(x) for x in module_def["anchors"].split(",")]
            anchors = [(anchors[i], anchors[i + 1])
                       for i in range(0, len(anchors), 2)]
            anchors = [anchors[i] for i in anchor_idxs]
            num_classes = int(module_def["classes"])
            img_height = int(hyperparams["height"])
            # Define detection layer
            yolo_layer = YOLOLayer(anchors, num_classes, img_height)
            modules.add_module("yolo_%d" % i, yolo_layer)
        # Register module list and number of output filters
        module_list.append(modules)
        output_filters.append(filters)

    return hyperparams, module_list

3.YOLO層

該層對應的是網絡的最後一層(y1,y2,y3).首先獲得預測結果prediction(x,y,w,h,con,cls).

prediction = x.view(nB, nA, self.bbox_attrs, nG, nG).permute(
     0, 1, 3, 4, 2).contiguous()  # 維度轉換, contiguous()相當於複製
# prediction.shape:(1, 3, 13, 13, 85)
# 輸出預測結果,說明的是x,y是預測的b-box中心點相對於網格單元左上角的相對座標
x = torch.sigmoid(prediction[..., 0])  # Center x (1,3,13,13)
y = torch.sigmoid(prediction[..., 1])  # Center y
w = prediction[..., 2]  # Width
h = prediction[..., 3]  # Height
pred_conf = torch.sigmoid(prediction[..., 4])  # bbox的置信度
pred_cls = torch.sigmoid(prediction[..., 5:])  # 每個類別的概率

再計算網格單元左上角座標和錨節點對應比例,這個錨節點是聚類計算過的大小,大小固定,所以直接可以使用.

        grid_x = torch.arange(nG).repeat(nG, 1).view(
            [1, 1, nG, nG]).type(FloatTensor)
        grid_y = torch.arange(nG).repeat(nG, 1).t().view(
            [1, 1, nG, nG]).type(FloatTensor)  # 五個單元左上角座標
        scaled_anchors = FloatTensor(
            [(a_w / stride, a_h / stride) for a_w, a_h in self.anchors])
        anchor_w = scaled_anchors[:, 0:1].view((1, nA, 1, 1))
        anchor_h = scaled_anchors[:, 1:2].view((1, nA, 1, 1))  # prior先驗

在通過相對座標和偏移量計算實際座標.

        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + grid_x
        pred_boxes[..., 1] = y.data + grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * anchor_h  # 計算出實際座標

再計算真值標籤相對於gird的真值標籤.

最後計算損失

            loss_x = self.mse_loss(x[mask], tx[mask])
            loss_y = self.mse_loss(y[mask], ty[mask])
            loss_w = self.mse_loss(w[mask], tw[mask])
            loss_h = self.mse_loss(h[mask], th[mask])
            loss_conf = self.bce_loss(pred_conf[conf_mask_false],tconf[conf_mask_false]) + self.bce_loss(
                pred_conf[conf_mask_true], tconf[conf_mask_true]
            )
            loss_cls = (1 / nB) * \
                self.ce_loss(pred_cls[mask], torch.argmax(tcls[mask], 1))
            loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

4.總網絡

總網絡編寫了卷積層,cat連接層,點加層,輸出層.

    def forward(self, x, targets=None):
        is_training = targets is not None
        output = []
        self.losses = defaultdict(float)
        layer_outputs = []
        for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
            if module_def["type"] in ["convolutional", "upsample", "maxpool"]:
                x = module(x)
            elif module_def["type"] == "route":  # 拼接層
                layer_i = [int(x) for x in module_def["layers"].split(",")]
                x = torch.cat([layer_outputs[i] for i in layer_i], 1)
            elif module_def["type"] == "shortcut":  # add層
                layer_i = int(module_def["from"])
                x = layer_outputs[-1] + layer_outputs[layer_i]
            elif module_def["type"] == "yolo":
                # Train phase: get loss
                if is_training:
                    # 返回YOLO層損失
                    xx = module[0](x, targets)
                    x = xx[0]  # 總損失
                    losses = xx[1:]  # 其他部分損失
                    for name, loss in zip(self.loss_names, losses):
                        self.losses[name] += loss
                # Test phase: Get detections
                else:
                    x = module(x)
                output.append(x)  # 每個輸出的損失
            layer_outputs.append(x)

        self.losses["recall"] /= 3
        self.losses["precision"] /= 3
        return sum(output) if is_training else torch.cat(output, 1)

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章