动态图搭建 shufflet-net-v2 训练完成后验证准确率低且不稳定，怀疑是参数加载问题 (#20934) · Issue · PaddlePaddle / Paddle

动态图搭建 shufflet-net-v2 训练完成后验证准确率低且不稳定，怀疑是参数加载问题

Created by: xiangyubo
动态图搭建 shufflet-net-v2 训练完成后验证准确率低且不稳定，怀疑是参数加载问题使用的是 paddle 1.5.1，shuffle-net 是我自己写的。之前在写其他网络的时候碰到过类似问题，当时是因为有层不在 sublayer 中，这次特别注意了。网络结构见代码：
# -*- coding: UTF-8 -*-
"""
动态图构建的 shuffle-net-v2
"""
import paddle.fluid as fluid
import numpy as np
from paddle.fluid import ParamAttr
from paddle.fluid.initializer import MSRA


def channel_shuffle(x, groups):
    """
    shuffle channels
    :param x: input feature map
    :param groups: shuffle group count
    :return:
    """
    batch_size, num_channels, height, width = x.shape[0], x.shape[1], x.shape[2], x.shape[3]
    channels_per_group = num_channels // groups

    # reshape
    x = fluid.layers.reshape(x=x, shape=[batch_size, groups, channels_per_group, height, width])

    x = fluid.layers.transpose(x=x, perm=[0, 2, 1, 3, 4])

    # flatten
    x = fluid.layers.reshape(x=x, shape=[batch_size, num_channels, height, width])

    return x


class ConvBnLayer(fluid.dygraph.Layer):
    """
    卷积 + batch_normal 层
    """
    def __init__(self, name_scope, num_filters, filter_size, stride=1,
                 padding=0, dilation=1, groups=1, act="relu", use_cudnn=True,
                 param_attr=None, bias_attr=None):
        super(ConvBnLayer, self).__init__(name_scope)

        self._conv2d = fluid.dygraph.Conv2D(
            self.full_name(),
            num_filters=num_filters,
            filter_size=filter_size,
            stride=stride,
            padding=padding,
            dilation=dilation,
            groups=groups,
            param_attr=ParamAttr(initializer=MSRA()),
            bias_attr=False,
            use_cudnn=use_cudnn)

        self._batch_normal = fluid.dygraph.BatchNorm(
            self.full_name(),
            num_channels=num_filters,
            act=act,
            param_attr=ParamAttr(),
            bias_attr=ParamAttr()
        )

    def forward(self, inputs):
        x = self._conv2d(inputs)
        x = self._batch_normal(x)
        return x


class InvertedResidualUnitA(fluid.dygraph.Layer):
    """
    反向残差单元，作为 shuffle-net 的基本单元
    """
    def __init__(self, name_scope, num_filters, stride):
        super(InvertedResidualUnitA, self).__init__(name_scope)

        _oup_inc = num_filters // 2
        self._conv_pw = ConvBnLayer(
            self.full_name(), num_filters=_oup_inc, filter_size=1,
            stride=1, padding=0, act='relu')

        self._conv_dw = ConvBnLayer(
            self.full_name(), num_filters=_oup_inc, filter_size=3,
            stride=stride, padding=1, groups=_oup_inc, act=None
        )

        self._conv_liner = ConvBnLayer(
            self.full_name(), num_filters=_oup_inc, filter_size=1,
            stride=1, padding=0, act='relu'
        )

    def forward(self, inputs):
        x1, x2 = fluid.layers.split(
            inputs,
            num_or_sections=[inputs.shape[1] // 2, inputs.shape[1] // 2],
            dim=1)
        out = self._conv_pw(x2)
        out = self._conv_dw(out)
        out = self._conv_liner(out)
        out = fluid.layers.concat([x1, out], axis=1)
        out = channel_shuffle(out, 2)
        return out


class InvertedResidualUnitB(fluid.dygraph.Layer):
    """
    反向残差单元，作为 shuffle-net 的基本单元
    """
    def __init__(self, name_scope, num_filters, stride):
        super(InvertedResidualUnitB, self).__init__(name_scope)

        _oup_inc = num_filters // 2
        self.num_filters = num_filters
        self.stride = stride

        self._conv_liner_1 = ConvBnLayer(
            self.full_name(), num_filters=_oup_inc, filter_size=1,
            stride=1, padding=0, act='relu'
        )

        # branch 2
        self._conv_pw_2 = ConvBnLayer(
            self.full_name(), num_filters=_oup_inc, filter_size=1,
            stride=1, padding=0, act='relu')

        self._conv_dw_2 = ConvBnLayer(
            self.full_name(), num_filters=_oup_inc, filter_size=3,
            stride=stride, padding=1, groups=_oup_inc, act=None
        )

        self._conv_liner_2 = ConvBnLayer(
            self.full_name(), num_filters=_oup_inc, filter_size=1,
            stride=1, padding=0, act='relu'
        )

    def _build_once(self, *args):

        stride = self.stride
        input = args[0]
        inp = input.shape[1]
        # branch 1
        self._conv_dw_1 = ConvBnLayer(
            self.full_name(), num_filters=inp, filter_size=3,
            stride=stride, padding=1, groups=inp, act=None
        )

    def forward(self, inputs):

        # branch 1
        out1 = self._conv_dw_1(inputs)
        out1 = self._conv_liner_1(out1)

        # branch 2
        out2 = self._conv_pw_2(inputs)
        out2 = self._conv_dw_2(out2)
        out2 = self._conv_liner_2(out2)

        out = fluid.layers.concat([out1, out2], axis=1)
        out = channel_shuffle(out, 2)
        return out


class ShuffleNetV2(fluid.dygraph.Layer):
    """
    shuffle-net-v2
    """
    def __init__(self, name_scope, class_dim, scale=1.0):
        super(ShuffleNetV2, self).__init__(name_scope)

        stage_repeats = [4, 8, 4]
        if scale == 0.25:
            stage_out_channels = [-1, 24, 24, 48, 96, 512]
        elif scale == 0.33:
            stage_out_channels = [-1, 24, 32, 64, 128, 512]
        elif scale == 0.5:
            stage_out_channels = [-1, 24, 48, 96, 192, 1024]
        elif scale == 1.0:
            stage_out_channels = [-1, 24, 116, 232, 464, 1024]
        elif scale == 1.5:
            stage_out_channels = [-1, 24, 176, 352, 704, 1024]
        elif scale == 2.0:
            stage_out_channels = [-1, 24, 224, 488, 976, 2048]
        else:
            raise ValueError("""{} groups is not supported for 1x1 Grouped Convolutions""".format(scale))

        input_channel = stage_out_channels[1]
        self._conv1 = ConvBnLayer(self.full_name(), num_filters=input_channel, filter_size=3, stride=2, padding=1)
        self._pool1 = fluid.dygraph.Pool2D(self.full_name(), pool_size=3, pool_stride=2, pool_padding=1)

        self._conv_list = []
        for idxstage in range(len(stage_repeats)):
            numrepeat = stage_repeats[idxstage]
            output_channel = stage_out_channels[idxstage + 2]
            for i in range(numrepeat):
                if i == 0:
                    conv = InvertedResidualUnitB(self.full_name(), num_filters=output_channel, stride=2)
                    self.add_sublayer("InvertedResidualUnitB_{}_{}".format(idxstage, i), conv)
                    self._conv_list.append(conv)
                else:
                    conv = InvertedResidualUnitA(self.full_name(), num_filters=output_channel, stride=1)
                    self.add_sublayer("InvertedResidualUnitA_{}_{}".format(idxstage, i), conv)
                    self._conv_list.append(conv)

        self._conv_last = ConvBnLayer(self.full_name(),
                                      num_filters=stage_out_channels[-1],
                                      filter_size=1,
                                      stride=1,
                                      padding=0)
        self._pool_last = fluid.dygraph.Pool2D(self.full_name(),
                                               pool_type='avg',
                                               pool_stride=1,
                                               pool_size=7,
                                               pool_padding=0)
        self.fc = fluid.dygraph.FC(self.full_name(),
                                   size=class_dim,
                                   param_attr=ParamAttr(initializer=MSRA()),
                                   act='softmax')

    def forward(self, inputs, label=None):
        out = self._conv1(inputs)
        out = self._pool1(out)
        for conv in self._conv_list:
            out = conv(out)
        out = self._conv_last(out)
        out = self._pool_last(out)
        out = self.fc(out)
        if label is not None:
            acc = fluid.layers.accuracy(input=out, label=label)
            return out, acc
        else:
            return out


if __name__ == '__main__':
    with fluid.dygraph.guard():
        shuffle_net_v2 = ShuffleNetV2('shuffle-net-v2', 3)
        img = np.zeros([2, 3, 224, 224]).astype('float32')
        label = np.zeros([2, 1]).astype('int64')
        img = fluid.dygraph.to_variable(img)
        outs = shuffle_net_v2(img)
        label = fluid.dygraph.to_variable(label)
        label.stop_gradient = True
        loss = fluid.layers.cross_entropy(outs, label)
        avg_loss = fluid.layers.mean(loss)

        # 通过这句话求出损失函数对于 out 的梯度，
        avg_loss.backward()

        print(outs.numpy())
        s = "InvertedResidualUnitA_{}_{}".format(4, 5)
        print(s)
PaddlePaddle / Paddle 1 年多 前同步成功

动态图搭建 shufflet-net-v2 训练完成后验证准确率低且不稳定，怀疑是参数加载问题

PaddlePaddle / Paddle
1 年多前同步成功