ResNet50的mxnet实现

ResNet50的mxnet实现

具体网络结构可参见https://blog.csdn.net/qq_21046135/article/details/81674605

和 https://blog.csdn.net/seven_year_promise/article/details/69360488

from mxnet.gluon import nn
from mxnet import nd, init, gluon, autograd
import mxnet as mx
import gluonbook as gb
from mxnet.gluon.data.vision import transforms


lr = 0.1
num_epochs = 100
batch_size = 128
ctx = mx.gpu()

tansformer = transforms.Compose([
    transforms.ToTensor()
])

# 加载数据
train_data = gluon.data.vision.ImageFolderDataset("/home/user/cf/st/train")
test_data = gluon.data.vision.ImageFolderDataset("/home/user/cf/st/test")

train_iter = gluon.data.DataLoader(train_data.transform_first(tansformer), shuffle=True, batch_size=batch_size)
test_iter = gluon.data.DataLoader(test_data.transform_first(tansformer),shuffle=True, batch_size=batch_size)

class Residual(nn.Block):
    def __init__(self, num_channels, use_1x1conv=False, strides=1, **kwargs):
        super(Residual, self).__init__(**kwargs)
        self.conv1 = nn.Conv2D(num_channels, kernel_size=1, strides=strides)
        self.conv2 = nn.Conv2D(num_channels, kernel_size=3, strides=1, padding=1)
        self.conv3 = nn.Conv2D(num_channels*4, kernel_size=1, strides=1)
        if use_1x1conv:
            self.conv4 = nn.Conv2D(num_channels*4, kernel_size=1, strides=strides)
            self.bn4 = nn.BatchNorm()
        else:
            self.conv4 = None
            self.bn4 = None
        self.bn1 = nn.BatchNorm()
        self.bn2 = nn.BatchNorm()
        self.bn3 = nn.BatchNorm()

    def forward(self, x):
        y = nd.relu(self.bn1(self.conv1(x)))
        y = nd.relu(self.bn2(self.conv2(y)))
        y = self.bn3(self.conv3(y))
        if self.conv4:
            x = self.bn4(self.conv4(x))
        return nd.relu(x+y)

def resnet_block(num_channels, num_residuals, stride=1):
    blk = nn.Sequential()
    for i in range(num_residuals):
        if i==0 :
            blk.add(Residual(num_channels, use_1x1conv=True, strides=stride))   
        else:
            blk.add(Residual(num_channels))
    return blk

net = nn.Sequential()
net.add(
    nn.Conv2D(64, kernel_size=7, strides=2, padding=3),
    nn.BatchNorm(),
    nn.Activation('relu'),
    nn.MaxPool2D(pool_size=3, strides=2)
)
net.add(
    resnet_block(64, 3, stride=1),
    resnet_block(128, 4, stride=2),
    resnet_block(256, 6, stride=2),
    resnet_block(512, 3, stride=2)
)
# 平均池化
net.add(nn.GlobalAvgPool2D(),
        nn.Dense(2))

net.initialize(init=init.Xavier(), force_reinit=True, ctx=ctx)
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate':lr})
loss = gluon.loss.SoftmaxCrossEntropyLoss()
gb.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)

当self.bn4定义在self.bn3的后面时,会出现错误:mxnet.gluon.parameter.DeferredInitializationError: Parameter 'batchnorm8_gamma' has not been initialized yet because initialization was deferred. Actual initialization happens during the first forward pass. Please pass one batch of data through the network before accessing Parameters. You can also avoid deferred initialization by specifying in_units, num_features, etc., for network layers.

这是因为定义在后面的bn4,当条件判断为false的时候,不会使用到self.bn4,会出现DeferredInitializationError的错误,所以需要同时定义两种情况下的self.bn4

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章