动态图搭建 shufflet-net-v2 训练完成后验证准确率低且不稳定,怀疑是参数加载问题
Created by: xiangyubo
动态图搭建 shufflet-net-v2 训练完成后验证准确率低且不稳定,怀疑是参数加载问题 使用的是 paddle 1.5.1,shuffle-net 是我自己写的。之前在写其他网络的时候碰到过类似问题,当时是因为有层不在 sublayer 中,这次特别注意了。网络结构见代码:
# -*- coding: UTF-8 -*-
"""
动态图构建的 shuffle-net-v2
"""
import paddle.fluid as fluid
import numpy as np
from paddle.fluid import ParamAttr
from paddle.fluid.initializer import MSRA
def channel_shuffle(x, groups):
"""
shuffle channels
:param x: input feature map
:param groups: shuffle group count
:return:
"""
batch_size, num_channels, height, width = x.shape[0], x.shape[1], x.shape[2], x.shape[3]
channels_per_group = num_channels // groups
# reshape
x = fluid.layers.reshape(x=x, shape=[batch_size, groups, channels_per_group, height, width])
x = fluid.layers.transpose(x=x, perm=[0, 2, 1, 3, 4])
# flatten
x = fluid.layers.reshape(x=x, shape=[batch_size, num_channels, height, width])
return x
class ConvBnLayer(fluid.dygraph.Layer):
"""
卷积 + batch_normal 层
"""
def __init__(self, name_scope, num_filters, filter_size, stride=1,
padding=0, dilation=1, groups=1, act="relu", use_cudnn=True,
param_attr=None, bias_attr=None):
super(ConvBnLayer, self).__init__(name_scope)
self._conv2d = fluid.dygraph.Conv2D(
self.full_name(),
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=padding,
dilation=dilation,
groups=groups,
param_attr=ParamAttr(initializer=MSRA()),
bias_attr=False,
use_cudnn=use_cudnn)
self._batch_normal = fluid.dygraph.BatchNorm(
self.full_name(),
num_channels=num_filters,
act=act,
param_attr=ParamAttr(),
bias_attr=ParamAttr()
)
def forward(self, inputs):
x = self._conv2d(inputs)
x = self._batch_normal(x)
return x
class InvertedResidualUnitA(fluid.dygraph.Layer):
"""
反向残差单元,作为 shuffle-net 的基本单元
"""
def __init__(self, name_scope, num_filters, stride):
super(InvertedResidualUnitA, self).__init__(name_scope)
_oup_inc = num_filters // 2
self._conv_pw = ConvBnLayer(
self.full_name(), num_filters=_oup_inc, filter_size=1,
stride=1, padding=0, act='relu')
self._conv_dw = ConvBnLayer(
self.full_name(), num_filters=_oup_inc, filter_size=3,
stride=stride, padding=1, groups=_oup_inc, act=None
)
self._conv_liner = ConvBnLayer(
self.full_name(), num_filters=_oup_inc, filter_size=1,
stride=1, padding=0, act='relu'
)
def forward(self, inputs):
x1, x2 = fluid.layers.split(
inputs,
num_or_sections=[inputs.shape[1] // 2, inputs.shape[1] // 2],
dim=1)
out = self._conv_pw(x2)
out = self._conv_dw(out)
out = self._conv_liner(out)
out = fluid.layers.concat([x1, out], axis=1)
out = channel_shuffle(out, 2)
return out
class InvertedResidualUnitB(fluid.dygraph.Layer):
"""
反向残差单元,作为 shuffle-net 的基本单元
"""
def __init__(self, name_scope, num_filters, stride):
super(InvertedResidualUnitB, self).__init__(name_scope)
_oup_inc = num_filters // 2
self.num_filters = num_filters
self.stride = stride
self._conv_liner_1 = ConvBnLayer(
self.full_name(), num_filters=_oup_inc, filter_size=1,
stride=1, padding=0, act='relu'
)
# branch 2
self._conv_pw_2 = ConvBnLayer(
self.full_name(), num_filters=_oup_inc, filter_size=1,
stride=1, padding=0, act='relu')
self._conv_dw_2 = ConvBnLayer(
self.full_name(), num_filters=_oup_inc, filter_size=3,
stride=stride, padding=1, groups=_oup_inc, act=None
)
self._conv_liner_2 = ConvBnLayer(
self.full_name(), num_filters=_oup_inc, filter_size=1,
stride=1, padding=0, act='relu'
)
def _build_once(self, *args):
stride = self.stride
input = args[0]
inp = input.shape[1]
# branch 1
self._conv_dw_1 = ConvBnLayer(
self.full_name(), num_filters=inp, filter_size=3,
stride=stride, padding=1, groups=inp, act=None
)
def forward(self, inputs):
# branch 1
out1 = self._conv_dw_1(inputs)
out1 = self._conv_liner_1(out1)
# branch 2
out2 = self._conv_pw_2(inputs)
out2 = self._conv_dw_2(out2)
out2 = self._conv_liner_2(out2)
out = fluid.layers.concat([out1, out2], axis=1)
out = channel_shuffle(out, 2)
return out
class ShuffleNetV2(fluid.dygraph.Layer):
"""
shuffle-net-v2
"""
def __init__(self, name_scope, class_dim, scale=1.0):
super(ShuffleNetV2, self).__init__(name_scope)
stage_repeats = [4, 8, 4]
if scale == 0.25:
stage_out_channels = [-1, 24, 24, 48, 96, 512]
elif scale == 0.33:
stage_out_channels = [-1, 24, 32, 64, 128, 512]
elif scale == 0.5:
stage_out_channels = [-1, 24, 48, 96, 192, 1024]
elif scale == 1.0:
stage_out_channels = [-1, 24, 116, 232, 464, 1024]
elif scale == 1.5:
stage_out_channels = [-1, 24, 176, 352, 704, 1024]
elif scale == 2.0:
stage_out_channels = [-1, 24, 224, 488, 976, 2048]
else:
raise ValueError("""{} groups is not supported for 1x1 Grouped Convolutions""".format(scale))
input_channel = stage_out_channels[1]
self._conv1 = ConvBnLayer(self.full_name(), num_filters=input_channel, filter_size=3, stride=2, padding=1)
self._pool1 = fluid.dygraph.Pool2D(self.full_name(), pool_size=3, pool_stride=2, pool_padding=1)
self._conv_list = []
for idxstage in range(len(stage_repeats)):
numrepeat = stage_repeats[idxstage]
output_channel = stage_out_channels[idxstage + 2]
for i in range(numrepeat):
if i == 0:
conv = InvertedResidualUnitB(self.full_name(), num_filters=output_channel, stride=2)
self.add_sublayer("InvertedResidualUnitB_{}_{}".format(idxstage, i), conv)
self._conv_list.append(conv)
else:
conv = InvertedResidualUnitA(self.full_name(), num_filters=output_channel, stride=1)
self.add_sublayer("InvertedResidualUnitA_{}_{}".format(idxstage, i), conv)
self._conv_list.append(conv)
self._conv_last = ConvBnLayer(self.full_name(),
num_filters=stage_out_channels[-1],
filter_size=1,
stride=1,
padding=0)
self._pool_last = fluid.dygraph.Pool2D(self.full_name(),
pool_type='avg',
pool_stride=1,
pool_size=7,
pool_padding=0)
self.fc = fluid.dygraph.FC(self.full_name(),
size=class_dim,
param_attr=ParamAttr(initializer=MSRA()),
act='softmax')
def forward(self, inputs, label=None):
out = self._conv1(inputs)
out = self._pool1(out)
for conv in self._conv_list:
out = conv(out)
out = self._conv_last(out)
out = self._pool_last(out)
out = self.fc(out)
if label is not None:
acc = fluid.layers.accuracy(input=out, label=label)
return out, acc
else:
return out
if __name__ == '__main__':
with fluid.dygraph.guard():
shuffle_net_v2 = ShuffleNetV2('shuffle-net-v2', 3)
img = np.zeros([2, 3, 224, 224]).astype('float32')
label = np.zeros([2, 1]).astype('int64')
img = fluid.dygraph.to_variable(img)
outs = shuffle_net_v2(img)
label = fluid.dygraph.to_variable(label)
label.stop_gradient = True
loss = fluid.layers.cross_entropy(outs, label)
avg_loss = fluid.layers.mean(loss)
# 通过这句话求出损失函数对于 out 的梯度,
avg_loss.backward()
print(outs.numpy())
s = "InvertedResidualUnitA_{}_{}".format(4, 5)
print(s)