DarkNet
DarkNet系列網絡只有兩個網絡:DarkNet19和DarkNet53,分別在YOLOv2和YOLOv3論文中提出。需要說明的是,作者使用256x256的分辨率在ImageNet上訓練和測試DarkNet網絡。
DarkNet網絡結構如下:
import torch
import torch.nn as nn
root_path = 'empty'
__all__ = [
'darknet19',
'darknet53',
]
model_urls = {
'darknet19':
'{}/darknet/darknet19-input256-epoch100-acc73.868.pth'.format(root_path),
'darknet53':
'{}/darknet/darknet53-input256-epoch100-acc77.008.pth'.format(root_path),
}
class ConvBnActBlock(nn.Module):
def __init__(self,
inplanes,
planes,
kernel_size,
stride,
padding=1,
groups=1,
has_bn=True,
has_act=True):
super().__init__()
self.has_bn = has_bn
self.has_act = has_act
self.conv = nn.Conv2d(inplanes,
planes,
kernel_size,
stride=stride,
padding=padding,
groups=groups,
bias=False)
if self.has_bn:
self.bn = nn.BatchNorm2d(planes)
if self.has_act:
self.act = nn.LeakyReLU(0.1, inplace=True)
def forward(self, x):
x = self.conv(x)
if self.has_bn:
x = self.bn(x)
if self.has_act:
x = self.act(x)
return x
class Darknet19Block(nn.Module):
def __init__(self, inplanes, planes, layer_num, use_maxpool=False):
super(Darknet19Block, self).__init__()
self.use_maxpool = use_maxpool
layers = []
for i in range(0, layer_num):
if i % 2 == 0:
layers.append(
ConvBnActBlock(inplanes,
planes,
kernel_size=3,
stride=1,
padding=1,
groups=1,
has_bn=True,
has_act=True))
else:
layers.append(
ConvBnActBlock(planes,
inplanes,
kernel_size=1,
stride=1,
padding=0,
groups=1,
has_bn=True,
has_act=True))
self.Darknet19Block = nn.Sequential(*layers)
self.MaxPool = nn.MaxPool2d(kernel_size=2, stride=2)
def forward(self, x):
x = self.Darknet19Block(x)
if self.use_maxpool:
x = self.MaxPool(x)
return x
class Darknet53Block(nn.Module):
def __init__(self, inplanes):
super(Darknet53Block, self).__init__()
reduced_planes = int(inplanes * 0.5)
self.conv1 = ConvBnActBlock(inplanes,
reduced_planes,
kernel_size=1,
stride=1,
padding=0,
groups=1,
has_bn=True,
has_act=True)
self.conv2 = ConvBnActBlock(reduced_planes,
inplanes,
kernel_size=3,
stride=1,
padding=1,
groups=1,
has_bn=True,
has_act=True)
def forward(self, inputs):
x = self.conv1(inputs)
x = self.conv2(x)
x = x + inputs
return x
class Darknet19(nn.Module):
def __init__(self, num_classes=1000):
super(Darknet19, self).__init__()
self.layer1 = ConvBnActBlock(3,
32,
kernel_size=3,
stride=1,
padding=1,
groups=1,
has_bn=True,
has_act=True)
self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
self.layer2 = Darknet19Block(32, 64, 1, use_maxpool=True)
self.layer3 = Darknet19Block(64, 128, 3, use_maxpool=True)
self.layer4 = Darknet19Block(128, 256, 3, use_maxpool=True)
self.layer5 = Darknet19Block(256, 512, 5, use_maxpool=True)
self.layer6 = Darknet19Block(512, 1024, 5, use_maxpool=False)
self.layer7 = ConvBnActBlock(1024,
1000,
kernel_size=1,
stride=1,
padding=0,
groups=1,
has_bn=False,
has_act=False)
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight,
mode='fan_out',
nonlinearity='leaky_relu')
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
def forward(self, x):
x = self.layer1(x)
x = self.maxpool1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.layer5(x)
x = self.layer6(x)
x = self.layer7(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
return x
class Darknet53(nn.Module):
def __init__(self, num_classes=1000):
super(Darknet53, self).__init__()
self.conv1 = ConvBnActBlock(3,
32,
kernel_size=3,
stride=1,
padding=1,
groups=1,
has_bn=True,
has_act=True)
self.conv2 = ConvBnActBlock(32,
64,
kernel_size=3,
stride=2,
padding=1,
groups=1,
has_bn=True,
has_act=True)
self.block1 = self.make_layer(inplanes=64, num_blocks=1)
self.conv3 = ConvBnActBlock(64,
128,
kernel_size=3,
stride=2,
padding=1,
groups=1,
has_bn=True,
has_act=True)
self.block2 = self.make_layer(inplanes=128, num_blocks=2)
self.conv4 = ConvBnActBlock(128,
256,
kernel_size=3,
stride=2,
padding=1,
groups=1,
has_bn=True,
has_act=True)
self.block3 = self.make_layer(inplanes=256, num_blocks=8)
self.conv5 = ConvBnActBlock(256,
512,
kernel_size=3,
stride=2,
padding=1,
groups=1,
has_bn=True,
has_act=True)
self.block4 = self.make_layer(inplanes=512, num_blocks=8)
self.conv6 = ConvBnActBlock(512,
1024,
kernel_size=3,
stride=2,
padding=1,
groups=1,
has_bn=True,
has_act=True)
self.block5 = self.make_layer(inplanes=1024, num_blocks=4)
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(1024, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight,
mode='fan_out',
nonlinearity='leaky_relu')
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.block1(x)
x = self.conv3(x)
x = self.block2(x)
x = self.conv4(x)
x = self.block3(x)
x = self.conv5(x)
x = self.block4(x)
x = self.conv6(x)
x = self.block5(x)
x = self.avgpool(x)
x = x.view(-1, 1024)
x = self.fc(x)
return x
def make_layer(self, inplanes, num_blocks):
layers = []
for _ in range(0, num_blocks):
layers.append(Darknet53Block(inplanes))
return nn.Sequential(*layers)
def darknet19(pretrained=False, progress=True, **kwargs):
model = Darknet19(**kwargs)
# only load state_dict()
if pretrained:
model.load_state_dict(
torch.load(model_urls['darknet19'],
map_location=torch.device('cpu')))
return model
def darknet53(pretrained=False, progress=True, **kwargs):
model = Darknet53(**kwargs)
# only load state_dict()
if pretrained:
model.load_state_dict(
torch.load(model_urls['darknet53'],
map_location=torch.device('cpu')))
return model
我使用第一彈中ResNet的超參數和訓練方式,僅將輸入尺寸改爲256x256,結果如下:
Network | Top-1 error |
---|---|
DarkNet-19 | 26.132 |
DarkNet-53 | 22.992 |
在YOLOv3論文中,作者列出了自己在ImagNet上預訓練的DarkNet19爲25.9,DarkNet53爲22.8,和我的結果相差不大。
所有DarkNet預訓練模型已上傳到百度雲,歡迎下載:鏈接:https://pan.baidu.com/s/18E8LMwwQhcjQBbnYFlet4A 提取碼:hcpp 。
VovNet
VovNet來自這篇論文:An Energy and GPU-Computation Efficient Backbone Network for Real-Time Object Detection 。同時作者還開源了代碼:https://github.com/youngwanLEE/vovnet-detectron2/blob/master/vovnet/vovnet.py 。根據開源代碼,我復現了VovNetv2,網絡結構如下:
import torch
import torch.nn as nn
import torch.nn.functional as F
root_path = 'empty'
__all__ = [
'VoVNet19_slim_depthwise_se',
'VoVNet19_depthwise_se',
'VoVNet19_slim_se',
'VoVNet19_se',
'VoVNet39_se',
'VoVNet57_se',
'VoVNet99_se',
]
model_urls = {
'VoVNet19_slim_depthwise_se':
'{}/vovnet/VoVNet19_slim_depthwise_se-epoch100-acc66.724.pth'.format(
root_path),
'VoVNet19_depthwise_se':
'{}/vovnet/VoVNet19_depthwise_se-epoch100-acc73.042.pth'.format(root_path),
'VoVNet19_slim_se':
'{}/vovnet/VoVNet19_slim_se-epoch100-acc69.354.pth'.format(root_path),
'VoVNet19_se':
'{}/vovnet/VoVNet19_se-epoch100-acc74.636.pth'.format(root_path),
'VoVNet39_se':
'{}/vovnet/VoVNet39_se-epoch100-acc77.338.pth'.format(root_path),
'VoVNet57_se':
'{}/vovnet/VoVNet57_se-epoch100-acc77.986.pth'.format(root_path),
'VoVNet99_se':
'{}/vovnet/VoVNet99_se-epoch100-acc78.392.pth'.format(root_path),
}
vovnet_configs = {
'VoVNet19_slim_depthwise_se': {
'stem_channel': [64, 64, 64],
'per_stage_inter_channels': [64, 80, 96, 112],
'per_stage_inout_channels': [112, 256, 384, 512],
'per_block_layer_nums': 3,
'per_stage_block_nums': [1, 1, 1, 1],
"has_se": True,
'has_depthwise': True,
},
'VoVNet19_depthwise_se': {
'stem_channel': [64, 64, 64],
'per_stage_inter_channels': [128, 160, 192, 224],
'per_stage_inout_channels': [256, 512, 768, 1024],
'per_block_layer_nums': 3,
'per_stage_block_nums': [1, 1, 1, 1],
"has_se": True,
'has_depthwise': True,
},
'VoVNet19_slim_se': {
'stem_channel': [64, 64, 128],
'per_stage_inter_channels': [64, 80, 96, 112],
'per_stage_inout_channels': [112, 256, 384, 512],
'per_block_layer_nums': 3,
'per_stage_block_nums': [1, 1, 1, 1],
"has_se": True,
'has_depthwise': False,
},
'VoVNet19_se': {
'stem_channel': [64, 64, 128],
'per_stage_inter_channels': [128, 160, 192, 224],
'per_stage_inout_channels': [256, 512, 768, 1024],
'per_block_layer_nums': 3,
'per_stage_block_nums': [1, 1, 1, 1],
"has_se": True,
'has_depthwise': False,
},
'VoVNet39_se': {
'stem_channel': [64, 64, 128],
'per_stage_inter_channels': [128, 160, 192, 224],
'per_stage_inout_channels': [256, 512, 768, 1024],
'per_block_layer_nums': 5,
'per_stage_block_nums': [1, 1, 2, 2],
"has_se": True,
'has_depthwise': False,
},
'VoVNet57_se': {
'stem_channel': [64, 64, 128],
'per_stage_inter_channels': [128, 160, 192, 224],
'per_stage_inout_channels': [256, 512, 768, 1024],
'per_block_layer_nums': 5,
'per_stage_block_nums': [1, 1, 4, 3],
"has_se": True,
'has_depthwise': False,
},
'VoVNet99_se': {
'stem_channel': [64, 64, 128],
'per_stage_inter_channels': [128, 160, 192, 224],
'per_stage_inout_channels': [256, 512, 768, 1024],
'per_block_layer_nums': 5,
'per_stage_block_nums': [1, 3, 9, 3],
"has_se": True,
'has_depthwise': False,
},
}
class Conv3x3Block(nn.Module):
def __init__(self,
inplanes,
planes,
kernel_size=3,
stride=1,
padding=1,
has_bn=True,
has_act=True,
has_depthwise=False):
super().__init__()
self.has_bn = has_bn
self.has_act = has_act
if has_depthwise:
self.conv = nn.Sequential(
nn.Conv2d(inplanes,
planes,
kernel_size,
stride=stride,
padding=padding,
groups=planes,
bias=False),
nn.Conv2d(planes,
planes,
kernel_size=1,
stride=1,
padding=0,
groups=1,
bias=False))
else:
self.conv = nn.Conv2d(inplanes,
planes,
kernel_size,
stride=stride,
padding=padding,
groups=1,
bias=False)
if self.has_bn:
self.bn = nn.BatchNorm2d(planes)
if self.has_act:
self.act = nn.ReLU(inplace=True)
def forward(self, x):
x = self.conv(x)
if self.has_bn:
x = self.bn(x)
if self.has_act:
x = self.act(x)
return x
class Conv1x1Block(nn.Module):
def __init__(self,
inplanes,
planes,
kernel_size=1,
stride=1,
padding=0,
has_bn=True,
has_act=True):
super().__init__()
self.has_bn = has_bn
self.has_act = has_act
self.conv = nn.Conv2d(inplanes,
planes,
kernel_size,
stride=stride,
padding=padding,
groups=1,
bias=False)
if self.has_bn:
self.bn = nn.BatchNorm2d(planes)
if self.has_act:
self.act = nn.ReLU(inplace=True)
def forward(self, x):
x = self.conv(x)
if self.has_bn:
x = self.bn(x)
if self.has_act:
x = self.act(x)
return x
class HardSigmoid(nn.Module):
def __init__(self, inplace=True):
super(HardSigmoid, self).__init__()
self.inplace = inplace
def forward(self, x):
return F.relu6(x + 3.0, inplace=self.inplace) / 6.0
class eSEBlock(nn.Module):
def __init__(self, inplanes, planes):
super(eSEBlock, self).__init__()
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.conv = nn.Conv2d(inplanes,
planes,
kernel_size=1,
stride=1,
padding=0,
bias=True)
self.hardsigmoid = HardSigmoid()
def forward(self, inputs):
x = self.avgpool(inputs)
x = self.conv(x)
x = self.hardsigmoid(x)
x = inputs * x
return x
class OSABlock(nn.Module):
def __init__(self,
inplanes,
interplanes,
planes,
per_block_layer_nums,
has_se=False,
has_depthwise=False,
has_identity=False):
super(OSABlock, self).__init__()
self.inplanes = inplanes
self.interplanes = interplanes
self.has_se = has_se
self.has_depthwise = has_depthwise
self.has_identity = has_identity
if self.has_depthwise and self.inplanes != self.interplanes:
self.reduce_conv = Conv1x1Block(inplanes,
interplanes,
kernel_size=1,
stride=1,
padding=0,
has_bn=True,
has_act=True)
self.OSABlocklayers = nn.ModuleList()
input_planes = inplanes
for _ in range(per_block_layer_nums):
if self.has_depthwise:
input_planes = interplanes
self.OSABlocklayers.append(
Conv3x3Block(input_planes,
interplanes,
kernel_size=3,
stride=1,
padding=1,
has_bn=True,
has_act=True,
has_depthwise=self.has_depthwise))
input_planes = interplanes
concatplanes = inplanes + per_block_layer_nums * interplanes
self.concat_conv = Conv1x1Block(concatplanes,
planes,
kernel_size=1,
stride=1,
padding=0,
has_bn=True,
has_act=True)
if self.has_se:
self.ese = eSEBlock(planes, planes)
def forward(self, inputs):
outputs = []
outputs.append(inputs)
if self.has_depthwise and self.inplanes != self.interplanes:
x = self.reduce_conv(inputs)
else:
x = inputs
for layer in self.OSABlocklayers:
x = layer(x)
outputs.append(x)
x = torch.cat(outputs, dim=1)
x = self.concat_conv(x)
if self.has_se:
x = self.ese(x)
if self.has_identity:
x = x + inputs
return x
class OSAStage(nn.Module):
def __init__(self,
inplanes,
interplanes,
planes,
per_stage_block_nums,
per_block_layer_nums,
has_se=False,
has_depthwise=False,
first_stage=False):
super(OSAStage, self).__init__()
self.first_stage = first_stage
if not self.first_stage:
self.pool = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)
self.has_depthwise = has_depthwise
self.has_se = has_se
if per_stage_block_nums > 1:
self.has_se = False
identity = False
input_planes = inplanes
layers = []
for i in range(per_stage_block_nums):
if i > 0:
input_planes = planes
identity = True
if i != per_stage_block_nums - 1:
self.has_se = False
layers.append(
OSABlock(input_planes,
interplanes,
planes,
per_block_layer_nums,
has_se=self.has_se,
has_depthwise=self.has_depthwise,
has_identity=identity))
self.OSAStageblocks = nn.Sequential(*layers)
def forward(self, x):
if not self.first_stage:
x = self.pool(x)
x = self.OSAStageblocks(x)
return x
class VoVNet(nn.Module):
def __init__(self, vovnet_type, num_classes=1000):
super(VoVNet, self).__init__()
vovnet_type_config = vovnet_configs[vovnet_type]
self.stem_channel = vovnet_type_config['stem_channel']
self.per_stage_inter_channels = vovnet_type_config[
'per_stage_inter_channels']
self.per_stage_inout_channels = vovnet_type_config[
'per_stage_inout_channels']
self.per_block_layer_nums = vovnet_type_config['per_block_layer_nums']
self.per_stage_block_nums = vovnet_type_config['per_stage_block_nums']
self.has_se = vovnet_type_config['has_se']
self.has_depthwise = vovnet_type_config['has_depthwise']
self.stem = nn.Sequential(
Conv3x3Block(3,
self.stem_channel[0],
kernel_size=3,
stride=2,
padding=1,
has_bn=True,
has_act=True,
has_depthwise=False),
Conv3x3Block(self.stem_channel[0],
self.stem_channel[1],
kernel_size=3,
stride=1,
padding=1,
has_bn=True,
has_act=True,
has_depthwise=self.has_depthwise),
Conv3x3Block(self.stem_channel[1],
self.stem_channel[2],
kernel_size=3,
stride=2,
padding=1,
has_bn=True,
has_act=True,
has_depthwise=self.has_depthwise))
input_planes = self.stem_channel[2]
first_stage = True
self.stages = nn.ModuleList([])
for i in range(4):
if i > 0:
input_planes = self.per_stage_inout_channels[i - 1]
first_stage = False
self.stages.append(
OSAStage(input_planes,
self.per_stage_inter_channels[i],
self.per_stage_inout_channels[i],
self.per_stage_block_nums[i],
self.per_block_layer_nums,
has_se=self.has_se,
has_depthwise=self.has_depthwise,
first_stage=first_stage))
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(self.per_stage_inout_channels[3], num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight,
mode='fan_out',
nonlinearity='relu')
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
def forward(self, x):
x = self.stem(x)
for stage in self.stages:
x = stage(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.fc(x)
return x
def _vovnet(arch, pretrained, progress, **kwargs):
model = VoVNet(arch, **kwargs)
# only load state_dict()
if pretrained:
model.load_state_dict(
torch.load(model_urls[arch], map_location=torch.device('cpu')))
return model
def VoVNet19_slim_depthwise_se(pretrained=False, progress=True, **kwargs):
return _vovnet('VoVNet19_slim_depthwise_se', pretrained, progress,
**kwargs)
def VoVNet19_depthwise_se(pretrained=False, progress=True, **kwargs):
return _vovnet('VoVNet19_depthwise_se', pretrained, progress, **kwargs)
def VoVNet19_slim_se(pretrained=False, progress=True, **kwargs):
return _vovnet('VoVNet19_slim_se', pretrained, progress, **kwargs)
def VoVNet19_se(pretrained=False, progress=True, **kwargs):
return _vovnet('VoVNet19_se', pretrained, progress, **kwargs)
def VoVNet39_se(pretrained=False, progress=True, **kwargs):
return _vovnet('VoVNet39_se', pretrained, progress, **kwargs)
def VoVNet57_se(pretrained=False, progress=True, **kwargs):
return _vovnet('VoVNet57_se', pretrained, progress, **kwargs)
def VoVNet99_se(pretrained=False, progress=True, **kwargs):
return _vovnet('VoVNet99_se', pretrained, progress, **kwargs)
我使用ResNet的超參數和訓練方式(輸入爲224x224分辨率),訓練結果如下:
Network | Top-1 error |
---|---|
VovNet-19-depthwise-se | 26.958 |
VovNet-19-slim-depthwise-se | 33.276 |
VovNet-19-slim-se | 30.646 |
VovNet-19-se | 25.364 |
VovNet-39-se | 22.662 |
VovNet-57-se | 22.014 |
VovNet-99-se | 21.608 |
由於VovNet論文中並沒有列出其在ImageNet上的預訓練點數,所以無法比較。