base model第四彈:專爲目標檢測設計的DarkNet和VovNet

文章目錄

DarkNet

DarkNet系列網絡只有兩個網絡:DarkNet19和DarkNet53,分別在YOLOv2和YOLOv3論文中提出。需要說明的是,作者使用256x256的分辨率在ImageNet上訓練和測試DarkNet網絡。
DarkNet網絡結構如下:

import torch
import torch.nn as nn

root_path = 'empty'

__all__ = [
    'darknet19',
    'darknet53',
]

model_urls = {
    'darknet19':
    '{}/darknet/darknet19-input256-epoch100-acc73.868.pth'.format(root_path),
    'darknet53':
    '{}/darknet/darknet53-input256-epoch100-acc77.008.pth'.format(root_path),
}


class ConvBnActBlock(nn.Module):
    def __init__(self,
                 inplanes,
                 planes,
                 kernel_size,
                 stride,
                 padding=1,
                 groups=1,
                 has_bn=True,
                 has_act=True):
        super().__init__()
        self.has_bn = has_bn
        self.has_act = has_act
        self.conv = nn.Conv2d(inplanes,
                              planes,
                              kernel_size,
                              stride=stride,
                              padding=padding,
                              groups=groups,
                              bias=False)
        if self.has_bn:
            self.bn = nn.BatchNorm2d(planes)
        if self.has_act:
            self.act = nn.LeakyReLU(0.1, inplace=True)

    def forward(self, x):
        x = self.conv(x)
        if self.has_bn:
            x = self.bn(x)
        if self.has_act:
            x = self.act(x)

        return x


class Darknet19Block(nn.Module):
    def __init__(self, inplanes, planes, layer_num, use_maxpool=False):
        super(Darknet19Block, self).__init__()
        self.use_maxpool = use_maxpool
        layers = []
        for i in range(0, layer_num):
            if i % 2 == 0:
                layers.append(
                    ConvBnActBlock(inplanes,
                                   planes,
                                   kernel_size=3,
                                   stride=1,
                                   padding=1,
                                   groups=1,
                                   has_bn=True,
                                   has_act=True))
            else:
                layers.append(
                    ConvBnActBlock(planes,
                                   inplanes,
                                   kernel_size=1,
                                   stride=1,
                                   padding=0,
                                   groups=1,
                                   has_bn=True,
                                   has_act=True))

        self.Darknet19Block = nn.Sequential(*layers)
        self.MaxPool = nn.MaxPool2d(kernel_size=2, stride=2)

    def forward(self, x):
        x = self.Darknet19Block(x)

        if self.use_maxpool:
            x = self.MaxPool(x)

        return x


class Darknet53Block(nn.Module):
    def __init__(self, inplanes):
        super(Darknet53Block, self).__init__()
        reduced_planes = int(inplanes * 0.5)
        self.conv1 = ConvBnActBlock(inplanes,
                                    reduced_planes,
                                    kernel_size=1,
                                    stride=1,
                                    padding=0,
                                    groups=1,
                                    has_bn=True,
                                    has_act=True)
        self.conv2 = ConvBnActBlock(reduced_planes,
                                    inplanes,
                                    kernel_size=3,
                                    stride=1,
                                    padding=1,
                                    groups=1,
                                    has_bn=True,
                                    has_act=True)

    def forward(self, inputs):
        x = self.conv1(inputs)
        x = self.conv2(x)
        x = x + inputs

        return x


class Darknet19(nn.Module):
    def __init__(self, num_classes=1000):
        super(Darknet19, self).__init__()
        self.layer1 = ConvBnActBlock(3,
                                     32,
                                     kernel_size=3,
                                     stride=1,
                                     padding=1,
                                     groups=1,
                                     has_bn=True,
                                     has_act=True)
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer2 = Darknet19Block(32, 64, 1, use_maxpool=True)
        self.layer3 = Darknet19Block(64, 128, 3, use_maxpool=True)
        self.layer4 = Darknet19Block(128, 256, 3, use_maxpool=True)
        self.layer5 = Darknet19Block(256, 512, 5, use_maxpool=True)
        self.layer6 = Darknet19Block(512, 1024, 5, use_maxpool=False)
        self.layer7 = ConvBnActBlock(1024,
                                     1000,
                                     kernel_size=1,
                                     stride=1,
                                     padding=0,
                                     groups=1,
                                     has_bn=False,
                                     has_act=False)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight,
                                        mode='fan_out',
                                        nonlinearity='leaky_relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.layer1(x)
        x = self.maxpool1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        x = self.layer6(x)
        x = self.layer7(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)

        return x


class Darknet53(nn.Module):
    def __init__(self, num_classes=1000):
        super(Darknet53, self).__init__()
        self.conv1 = ConvBnActBlock(3,
                                    32,
                                    kernel_size=3,
                                    stride=1,
                                    padding=1,
                                    groups=1,
                                    has_bn=True,
                                    has_act=True)
        self.conv2 = ConvBnActBlock(32,
                                    64,
                                    kernel_size=3,
                                    stride=2,
                                    padding=1,
                                    groups=1,
                                    has_bn=True,
                                    has_act=True)
        self.block1 = self.make_layer(inplanes=64, num_blocks=1)
        self.conv3 = ConvBnActBlock(64,
                                    128,
                                    kernel_size=3,
                                    stride=2,
                                    padding=1,
                                    groups=1,
                                    has_bn=True,
                                    has_act=True)
        self.block2 = self.make_layer(inplanes=128, num_blocks=2)
        self.conv4 = ConvBnActBlock(128,
                                    256,
                                    kernel_size=3,
                                    stride=2,
                                    padding=1,
                                    groups=1,
                                    has_bn=True,
                                    has_act=True)
        self.block3 = self.make_layer(inplanes=256, num_blocks=8)
        self.conv5 = ConvBnActBlock(256,
                                    512,
                                    kernel_size=3,
                                    stride=2,
                                    padding=1,
                                    groups=1,
                                    has_bn=True,
                                    has_act=True)
        self.block4 = self.make_layer(inplanes=512, num_blocks=8)
        self.conv6 = ConvBnActBlock(512,
                                    1024,
                                    kernel_size=3,
                                    stride=2,
                                    padding=1,
                                    groups=1,
                                    has_bn=True,
                                    has_act=True)
        self.block5 = self.make_layer(inplanes=1024, num_blocks=4)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(1024, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight,
                                        mode='fan_out',
                                        nonlinearity='leaky_relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.block1(x)
        x = self.conv3(x)
        x = self.block2(x)
        x = self.conv4(x)
        x = self.block3(x)
        x = self.conv5(x)
        x = self.block4(x)
        x = self.conv6(x)
        x = self.block5(x)
        x = self.avgpool(x)
        x = x.view(-1, 1024)
        x = self.fc(x)

        return x

    def make_layer(self, inplanes, num_blocks):
        layers = []
        for _ in range(0, num_blocks):
            layers.append(Darknet53Block(inplanes))
        return nn.Sequential(*layers)


def darknet19(pretrained=False, progress=True, **kwargs):
    model = Darknet19(**kwargs)
    # only load state_dict()
    if pretrained:
        model.load_state_dict(
            torch.load(model_urls['darknet19'],
                       map_location=torch.device('cpu')))
    return model


def darknet53(pretrained=False, progress=True, **kwargs):
    model = Darknet53(**kwargs)
    # only load state_dict()
    if pretrained:
        model.load_state_dict(
            torch.load(model_urls['darknet53'],
                       map_location=torch.device('cpu')))
    return model

我使用第一彈中ResNet的超參數和訓練方式,僅將輸入尺寸改爲256x256,結果如下:

Network Top-1 error
DarkNet-19 26.132
DarkNet-53 22.992

在YOLOv3論文中,作者列出了自己在ImagNet上預訓練的DarkNet19爲25.9,DarkNet53爲22.8,和我的結果相差不大。

所有DarkNet預訓練模型已上傳到百度雲,歡迎下載:鏈接:https://pan.baidu.com/s/18E8LMwwQhcjQBbnYFlet4A 提取碼:hcpp 。

VovNet

VovNet來自這篇論文:An Energy and GPU-Computation Efficient Backbone Network for Real-Time Object Detection 。同時作者還開源了代碼:https://github.com/youngwanLEE/vovnet-detectron2/blob/master/vovnet/vovnet.py 。根據開源代碼,我復現了VovNetv2,網絡結構如下:

import torch
import torch.nn as nn
import torch.nn.functional as F

root_path = 'empty'

__all__ = [
    'VoVNet19_slim_depthwise_se',
    'VoVNet19_depthwise_se',
    'VoVNet19_slim_se',
    'VoVNet19_se',
    'VoVNet39_se',
    'VoVNet57_se',
    'VoVNet99_se',
]

model_urls = {
    'VoVNet19_slim_depthwise_se':
    '{}/vovnet/VoVNet19_slim_depthwise_se-epoch100-acc66.724.pth'.format(
        root_path),
    'VoVNet19_depthwise_se':
    '{}/vovnet/VoVNet19_depthwise_se-epoch100-acc73.042.pth'.format(root_path),
    'VoVNet19_slim_se':
    '{}/vovnet/VoVNet19_slim_se-epoch100-acc69.354.pth'.format(root_path),
    'VoVNet19_se':
    '{}/vovnet/VoVNet19_se-epoch100-acc74.636.pth'.format(root_path),
    'VoVNet39_se':
    '{}/vovnet/VoVNet39_se-epoch100-acc77.338.pth'.format(root_path),
    'VoVNet57_se':
    '{}/vovnet/VoVNet57_se-epoch100-acc77.986.pth'.format(root_path),
    'VoVNet99_se':
    '{}/vovnet/VoVNet99_se-epoch100-acc78.392.pth'.format(root_path),
}

vovnet_configs = {
    'VoVNet19_slim_depthwise_se': {
        'stem_channel': [64, 64, 64],
        'per_stage_inter_channels': [64, 80, 96, 112],
        'per_stage_inout_channels': [112, 256, 384, 512],
        'per_block_layer_nums': 3,
        'per_stage_block_nums': [1, 1, 1, 1],
        "has_se": True,
        'has_depthwise': True,
    },
    'VoVNet19_depthwise_se': {
        'stem_channel': [64, 64, 64],
        'per_stage_inter_channels': [128, 160, 192, 224],
        'per_stage_inout_channels': [256, 512, 768, 1024],
        'per_block_layer_nums': 3,
        'per_stage_block_nums': [1, 1, 1, 1],
        "has_se": True,
        'has_depthwise': True,
    },
    'VoVNet19_slim_se': {
        'stem_channel': [64, 64, 128],
        'per_stage_inter_channels': [64, 80, 96, 112],
        'per_stage_inout_channels': [112, 256, 384, 512],
        'per_block_layer_nums': 3,
        'per_stage_block_nums': [1, 1, 1, 1],
        "has_se": True,
        'has_depthwise': False,
    },
    'VoVNet19_se': {
        'stem_channel': [64, 64, 128],
        'per_stage_inter_channels': [128, 160, 192, 224],
        'per_stage_inout_channels': [256, 512, 768, 1024],
        'per_block_layer_nums': 3,
        'per_stage_block_nums': [1, 1, 1, 1],
        "has_se": True,
        'has_depthwise': False,
    },
    'VoVNet39_se': {
        'stem_channel': [64, 64, 128],
        'per_stage_inter_channels': [128, 160, 192, 224],
        'per_stage_inout_channels': [256, 512, 768, 1024],
        'per_block_layer_nums': 5,
        'per_stage_block_nums': [1, 1, 2, 2],
        "has_se": True,
        'has_depthwise': False,
    },
    'VoVNet57_se': {
        'stem_channel': [64, 64, 128],
        'per_stage_inter_channels': [128, 160, 192, 224],
        'per_stage_inout_channels': [256, 512, 768, 1024],
        'per_block_layer_nums': 5,
        'per_stage_block_nums': [1, 1, 4, 3],
        "has_se": True,
        'has_depthwise': False,
    },
    'VoVNet99_se': {
        'stem_channel': [64, 64, 128],
        'per_stage_inter_channels': [128, 160, 192, 224],
        'per_stage_inout_channels': [256, 512, 768, 1024],
        'per_block_layer_nums': 5,
        'per_stage_block_nums': [1, 3, 9, 3],
        "has_se": True,
        'has_depthwise': False,
    },
}


class Conv3x3Block(nn.Module):
    def __init__(self,
                 inplanes,
                 planes,
                 kernel_size=3,
                 stride=1,
                 padding=1,
                 has_bn=True,
                 has_act=True,
                 has_depthwise=False):
        super().__init__()
        self.has_bn = has_bn
        self.has_act = has_act
        if has_depthwise:
            self.conv = nn.Sequential(
                nn.Conv2d(inplanes,
                          planes,
                          kernel_size,
                          stride=stride,
                          padding=padding,
                          groups=planes,
                          bias=False),
                nn.Conv2d(planes,
                          planes,
                          kernel_size=1,
                          stride=1,
                          padding=0,
                          groups=1,
                          bias=False))
        else:
            self.conv = nn.Conv2d(inplanes,
                                  planes,
                                  kernel_size,
                                  stride=stride,
                                  padding=padding,
                                  groups=1,
                                  bias=False)
        if self.has_bn:
            self.bn = nn.BatchNorm2d(planes)
        if self.has_act:
            self.act = nn.ReLU(inplace=True)

    def forward(self, x):
        x = self.conv(x)
        if self.has_bn:
            x = self.bn(x)
        if self.has_act:
            x = self.act(x)

        return x


class Conv1x1Block(nn.Module):
    def __init__(self,
                 inplanes,
                 planes,
                 kernel_size=1,
                 stride=1,
                 padding=0,
                 has_bn=True,
                 has_act=True):
        super().__init__()
        self.has_bn = has_bn
        self.has_act = has_act
        self.conv = nn.Conv2d(inplanes,
                              planes,
                              kernel_size,
                              stride=stride,
                              padding=padding,
                              groups=1,
                              bias=False)
        if self.has_bn:
            self.bn = nn.BatchNorm2d(planes)
        if self.has_act:
            self.act = nn.ReLU(inplace=True)

    def forward(self, x):
        x = self.conv(x)
        if self.has_bn:
            x = self.bn(x)
        if self.has_act:
            x = self.act(x)

        return x


class HardSigmoid(nn.Module):
    def __init__(self, inplace=True):
        super(HardSigmoid, self).__init__()
        self.inplace = inplace

    def forward(self, x):
        return F.relu6(x + 3.0, inplace=self.inplace) / 6.0


class eSEBlock(nn.Module):
    def __init__(self, inplanes, planes):
        super(eSEBlock, self).__init__()
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.conv = nn.Conv2d(inplanes,
                              planes,
                              kernel_size=1,
                              stride=1,
                              padding=0,
                              bias=True)
        self.hardsigmoid = HardSigmoid()

    def forward(self, inputs):
        x = self.avgpool(inputs)
        x = self.conv(x)
        x = self.hardsigmoid(x)
        x = inputs * x

        return x


class OSABlock(nn.Module):
    def __init__(self,
                 inplanes,
                 interplanes,
                 planes,
                 per_block_layer_nums,
                 has_se=False,
                 has_depthwise=False,
                 has_identity=False):
        super(OSABlock, self).__init__()
        self.inplanes = inplanes
        self.interplanes = interplanes
        self.has_se = has_se
        self.has_depthwise = has_depthwise
        self.has_identity = has_identity

        if self.has_depthwise and self.inplanes != self.interplanes:
            self.reduce_conv = Conv1x1Block(inplanes,
                                            interplanes,
                                            kernel_size=1,
                                            stride=1,
                                            padding=0,
                                            has_bn=True,
                                            has_act=True)
        self.OSABlocklayers = nn.ModuleList()

        input_planes = inplanes
        for _ in range(per_block_layer_nums):
            if self.has_depthwise:
                input_planes = interplanes
            self.OSABlocklayers.append(
                Conv3x3Block(input_planes,
                             interplanes,
                             kernel_size=3,
                             stride=1,
                             padding=1,
                             has_bn=True,
                             has_act=True,
                             has_depthwise=self.has_depthwise))
            input_planes = interplanes

        concatplanes = inplanes + per_block_layer_nums * interplanes

        self.concat_conv = Conv1x1Block(concatplanes,
                                        planes,
                                        kernel_size=1,
                                        stride=1,
                                        padding=0,
                                        has_bn=True,
                                        has_act=True)
        if self.has_se:
            self.ese = eSEBlock(planes, planes)

    def forward(self, inputs):
        outputs = []
        outputs.append(inputs)
        if self.has_depthwise and self.inplanes != self.interplanes:
            x = self.reduce_conv(inputs)
        else:
            x = inputs
        for layer in self.OSABlocklayers:
            x = layer(x)
            outputs.append(x)

        x = torch.cat(outputs, dim=1)
        x = self.concat_conv(x)

        if self.has_se:
            x = self.ese(x)

        if self.has_identity:
            x = x + inputs

        return x


class OSAStage(nn.Module):
    def __init__(self,
                 inplanes,
                 interplanes,
                 planes,
                 per_stage_block_nums,
                 per_block_layer_nums,
                 has_se=False,
                 has_depthwise=False,
                 first_stage=False):
        super(OSAStage, self).__init__()
        self.first_stage = first_stage
        if not self.first_stage:
            self.pool = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)

        self.has_depthwise = has_depthwise
        self.has_se = has_se
        if per_stage_block_nums > 1:
            self.has_se = False

        identity = False
        input_planes = inplanes
        layers = []
        for i in range(per_stage_block_nums):
            if i > 0:
                input_planes = planes
                identity = True
            if i != per_stage_block_nums - 1:
                self.has_se = False
            layers.append(
                OSABlock(input_planes,
                         interplanes,
                         planes,
                         per_block_layer_nums,
                         has_se=self.has_se,
                         has_depthwise=self.has_depthwise,
                         has_identity=identity))

        self.OSAStageblocks = nn.Sequential(*layers)

    def forward(self, x):
        if not self.first_stage:
            x = self.pool(x)

        x = self.OSAStageblocks(x)

        return x


class VoVNet(nn.Module):
    def __init__(self, vovnet_type, num_classes=1000):
        super(VoVNet, self).__init__()
        vovnet_type_config = vovnet_configs[vovnet_type]
        self.stem_channel = vovnet_type_config['stem_channel']
        self.per_stage_inter_channels = vovnet_type_config[
            'per_stage_inter_channels']
        self.per_stage_inout_channels = vovnet_type_config[
            'per_stage_inout_channels']
        self.per_block_layer_nums = vovnet_type_config['per_block_layer_nums']
        self.per_stage_block_nums = vovnet_type_config['per_stage_block_nums']
        self.has_se = vovnet_type_config['has_se']
        self.has_depthwise = vovnet_type_config['has_depthwise']

        self.stem = nn.Sequential(
            Conv3x3Block(3,
                         self.stem_channel[0],
                         kernel_size=3,
                         stride=2,
                         padding=1,
                         has_bn=True,
                         has_act=True,
                         has_depthwise=False),
            Conv3x3Block(self.stem_channel[0],
                         self.stem_channel[1],
                         kernel_size=3,
                         stride=1,
                         padding=1,
                         has_bn=True,
                         has_act=True,
                         has_depthwise=self.has_depthwise),
            Conv3x3Block(self.stem_channel[1],
                         self.stem_channel[2],
                         kernel_size=3,
                         stride=2,
                         padding=1,
                         has_bn=True,
                         has_act=True,
                         has_depthwise=self.has_depthwise))

        input_planes = self.stem_channel[2]
        first_stage = True
        self.stages = nn.ModuleList([])
        for i in range(4):
            if i > 0:
                input_planes = self.per_stage_inout_channels[i - 1]
                first_stage = False
            self.stages.append(
                OSAStage(input_planes,
                         self.per_stage_inter_channels[i],
                         self.per_stage_inout_channels[i],
                         self.per_stage_block_nums[i],
                         self.per_block_layer_nums,
                         has_se=self.has_se,
                         has_depthwise=self.has_depthwise,
                         first_stage=first_stage))

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(self.per_stage_inout_channels[3], num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight,
                                        mode='fan_out',
                                        nonlinearity='relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.stem(x)
        for stage in self.stages:
            x = stage(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x


def _vovnet(arch, pretrained, progress, **kwargs):
    model = VoVNet(arch, **kwargs)
    # only load state_dict()
    if pretrained:
        model.load_state_dict(
            torch.load(model_urls[arch], map_location=torch.device('cpu')))

    return model


def VoVNet19_slim_depthwise_se(pretrained=False, progress=True, **kwargs):
    return _vovnet('VoVNet19_slim_depthwise_se', pretrained, progress,
                   **kwargs)


def VoVNet19_depthwise_se(pretrained=False, progress=True, **kwargs):
    return _vovnet('VoVNet19_depthwise_se', pretrained, progress, **kwargs)


def VoVNet19_slim_se(pretrained=False, progress=True, **kwargs):
    return _vovnet('VoVNet19_slim_se', pretrained, progress, **kwargs)


def VoVNet19_se(pretrained=False, progress=True, **kwargs):
    return _vovnet('VoVNet19_se', pretrained, progress, **kwargs)


def VoVNet39_se(pretrained=False, progress=True, **kwargs):
    return _vovnet('VoVNet39_se', pretrained, progress, **kwargs)


def VoVNet57_se(pretrained=False, progress=True, **kwargs):
    return _vovnet('VoVNet57_se', pretrained, progress, **kwargs)


def VoVNet99_se(pretrained=False, progress=True, **kwargs):
    return _vovnet('VoVNet99_se', pretrained, progress, **kwargs)

我使用ResNet的超參數和訓練方式(輸入爲224x224分辨率),訓練結果如下:

Network Top-1 error
VovNet-19-depthwise-se 26.958
VovNet-19-slim-depthwise-se 33.276
VovNet-19-slim-se 30.646
VovNet-19-se 25.364
VovNet-39-se 22.662
VovNet-57-se 22.014
VovNet-99-se 21.608

由於VovNet論文中並沒有列出其在ImageNet上的預訓練點數,所以無法比較。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章