Skip to content

  • 体验新版
    • 正在加载...
  • 登录
  • PaddlePaddle
  • Paddle
  • Issue
  • #20934

P
Paddle
  • 项目概览

PaddlePaddle / Paddle
大约 2 年 前同步成功

通知 2325
Star 20933
Fork 5424
  • 代码
    • 文件
    • 提交
    • 分支
    • Tags
    • 贡献者
    • 分支图
    • Diff
  • Issue 1423
    • 列表
    • 看板
    • 标记
    • 里程碑
  • 合并请求 543
  • Wiki 0
    • Wiki
  • 分析
    • 仓库
    • DevOps
  • 项目成员
  • Pages
P
Paddle
  • 项目概览
    • 项目概览
    • 详情
    • 发布
  • 仓库
    • 仓库
    • 文件
    • 提交
    • 分支
    • 标签
    • 贡献者
    • 分支图
    • 比较
  • Issue 1,423
    • Issue 1,423
    • 列表
    • 看板
    • 标记
    • 里程碑
  • 合并请求 543
    • 合并请求 543
  • Pages
  • 分析
    • 分析
    • 仓库分析
    • DevOps
  • Wiki 0
    • Wiki
  • 成员
    • 成员
  • 收起侧边栏
  • 动态
  • 分支图
  • 创建新Issue
  • 提交
  • Issue看板
已关闭
开放中
Opened 10月 31, 2019 by saxon_zh@saxon_zhGuest

动态图搭建 shufflet-net-v2 训练完成后验证准确率低且不稳定,怀疑是参数加载问题

Created by: xiangyubo

动态图搭建 shufflet-net-v2 训练完成后验证准确率低且不稳定,怀疑是参数加载问题 使用的是 paddle 1.5.1,shuffle-net 是我自己写的。之前在写其他网络的时候碰到过类似问题,当时是因为有层不在 sublayer 中,这次特别注意了。网络结构见代码:

# -*- coding: UTF-8 -*-
"""
动态图构建的 shuffle-net-v2
"""
import paddle.fluid as fluid
import numpy as np
from paddle.fluid import ParamAttr
from paddle.fluid.initializer import MSRA


def channel_shuffle(x, groups):
    """
    shuffle channels
    :param x: input feature map
    :param groups: shuffle group count
    :return:
    """
    batch_size, num_channels, height, width = x.shape[0], x.shape[1], x.shape[2], x.shape[3]
    channels_per_group = num_channels // groups

    # reshape
    x = fluid.layers.reshape(x=x, shape=[batch_size, groups, channels_per_group, height, width])

    x = fluid.layers.transpose(x=x, perm=[0, 2, 1, 3, 4])

    # flatten
    x = fluid.layers.reshape(x=x, shape=[batch_size, num_channels, height, width])

    return x


class ConvBnLayer(fluid.dygraph.Layer):
    """
    卷积 + batch_normal 层
    """
    def __init__(self, name_scope, num_filters, filter_size, stride=1,
                 padding=0, dilation=1, groups=1, act="relu", use_cudnn=True,
                 param_attr=None, bias_attr=None):
        super(ConvBnLayer, self).__init__(name_scope)

        self._conv2d = fluid.dygraph.Conv2D(
            self.full_name(),
            num_filters=num_filters,
            filter_size=filter_size,
            stride=stride,
            padding=padding,
            dilation=dilation,
            groups=groups,
            param_attr=ParamAttr(initializer=MSRA()),
            bias_attr=False,
            use_cudnn=use_cudnn)

        self._batch_normal = fluid.dygraph.BatchNorm(
            self.full_name(),
            num_channels=num_filters,
            act=act,
            param_attr=ParamAttr(),
            bias_attr=ParamAttr()
        )

    def forward(self, inputs):
        x = self._conv2d(inputs)
        x = self._batch_normal(x)
        return x


class InvertedResidualUnitA(fluid.dygraph.Layer):
    """
    反向残差单元,作为 shuffle-net 的基本单元
    """
    def __init__(self, name_scope, num_filters, stride):
        super(InvertedResidualUnitA, self).__init__(name_scope)

        _oup_inc = num_filters // 2
        self._conv_pw = ConvBnLayer(
            self.full_name(), num_filters=_oup_inc, filter_size=1,
            stride=1, padding=0, act='relu')

        self._conv_dw = ConvBnLayer(
            self.full_name(), num_filters=_oup_inc, filter_size=3,
            stride=stride, padding=1, groups=_oup_inc, act=None
        )

        self._conv_liner = ConvBnLayer(
            self.full_name(), num_filters=_oup_inc, filter_size=1,
            stride=1, padding=0, act='relu'
        )

    def forward(self, inputs):
        x1, x2 = fluid.layers.split(
            inputs,
            num_or_sections=[inputs.shape[1] // 2, inputs.shape[1] // 2],
            dim=1)
        out = self._conv_pw(x2)
        out = self._conv_dw(out)
        out = self._conv_liner(out)
        out = fluid.layers.concat([x1, out], axis=1)
        out = channel_shuffle(out, 2)
        return out


class InvertedResidualUnitB(fluid.dygraph.Layer):
    """
    反向残差单元,作为 shuffle-net 的基本单元
    """
    def __init__(self, name_scope, num_filters, stride):
        super(InvertedResidualUnitB, self).__init__(name_scope)

        _oup_inc = num_filters // 2
        self.num_filters = num_filters
        self.stride = stride

        self._conv_liner_1 = ConvBnLayer(
            self.full_name(), num_filters=_oup_inc, filter_size=1,
            stride=1, padding=0, act='relu'
        )

        # branch 2
        self._conv_pw_2 = ConvBnLayer(
            self.full_name(), num_filters=_oup_inc, filter_size=1,
            stride=1, padding=0, act='relu')

        self._conv_dw_2 = ConvBnLayer(
            self.full_name(), num_filters=_oup_inc, filter_size=3,
            stride=stride, padding=1, groups=_oup_inc, act=None
        )

        self._conv_liner_2 = ConvBnLayer(
            self.full_name(), num_filters=_oup_inc, filter_size=1,
            stride=1, padding=0, act='relu'
        )

    def _build_once(self, *args):

        stride = self.stride
        input = args[0]
        inp = input.shape[1]
        # branch 1
        self._conv_dw_1 = ConvBnLayer(
            self.full_name(), num_filters=inp, filter_size=3,
            stride=stride, padding=1, groups=inp, act=None
        )

    def forward(self, inputs):

        # branch 1
        out1 = self._conv_dw_1(inputs)
        out1 = self._conv_liner_1(out1)

        # branch 2
        out2 = self._conv_pw_2(inputs)
        out2 = self._conv_dw_2(out2)
        out2 = self._conv_liner_2(out2)

        out = fluid.layers.concat([out1, out2], axis=1)
        out = channel_shuffle(out, 2)
        return out


class ShuffleNetV2(fluid.dygraph.Layer):
    """
    shuffle-net-v2
    """
    def __init__(self, name_scope, class_dim, scale=1.0):
        super(ShuffleNetV2, self).__init__(name_scope)

        stage_repeats = [4, 8, 4]
        if scale == 0.25:
            stage_out_channels = [-1, 24, 24, 48, 96, 512]
        elif scale == 0.33:
            stage_out_channels = [-1, 24, 32, 64, 128, 512]
        elif scale == 0.5:
            stage_out_channels = [-1, 24, 48, 96, 192, 1024]
        elif scale == 1.0:
            stage_out_channels = [-1, 24, 116, 232, 464, 1024]
        elif scale == 1.5:
            stage_out_channels = [-1, 24, 176, 352, 704, 1024]
        elif scale == 2.0:
            stage_out_channels = [-1, 24, 224, 488, 976, 2048]
        else:
            raise ValueError("""{} groups is not supported for 1x1 Grouped Convolutions""".format(scale))

        input_channel = stage_out_channels[1]
        self._conv1 = ConvBnLayer(self.full_name(), num_filters=input_channel, filter_size=3, stride=2, padding=1)
        self._pool1 = fluid.dygraph.Pool2D(self.full_name(), pool_size=3, pool_stride=2, pool_padding=1)

        self._conv_list = []
        for idxstage in range(len(stage_repeats)):
            numrepeat = stage_repeats[idxstage]
            output_channel = stage_out_channels[idxstage + 2]
            for i in range(numrepeat):
                if i == 0:
                    conv = InvertedResidualUnitB(self.full_name(), num_filters=output_channel, stride=2)
                    self.add_sublayer("InvertedResidualUnitB_{}_{}".format(idxstage, i), conv)
                    self._conv_list.append(conv)
                else:
                    conv = InvertedResidualUnitA(self.full_name(), num_filters=output_channel, stride=1)
                    self.add_sublayer("InvertedResidualUnitA_{}_{}".format(idxstage, i), conv)
                    self._conv_list.append(conv)

        self._conv_last = ConvBnLayer(self.full_name(),
                                      num_filters=stage_out_channels[-1],
                                      filter_size=1,
                                      stride=1,
                                      padding=0)
        self._pool_last = fluid.dygraph.Pool2D(self.full_name(),
                                               pool_type='avg',
                                               pool_stride=1,
                                               pool_size=7,
                                               pool_padding=0)
        self.fc = fluid.dygraph.FC(self.full_name(),
                                   size=class_dim,
                                   param_attr=ParamAttr(initializer=MSRA()),
                                   act='softmax')

    def forward(self, inputs, label=None):
        out = self._conv1(inputs)
        out = self._pool1(out)
        for conv in self._conv_list:
            out = conv(out)
        out = self._conv_last(out)
        out = self._pool_last(out)
        out = self.fc(out)
        if label is not None:
            acc = fluid.layers.accuracy(input=out, label=label)
            return out, acc
        else:
            return out


if __name__ == '__main__':
    with fluid.dygraph.guard():
        shuffle_net_v2 = ShuffleNetV2('shuffle-net-v2', 3)
        img = np.zeros([2, 3, 224, 224]).astype('float32')
        label = np.zeros([2, 1]).astype('int64')
        img = fluid.dygraph.to_variable(img)
        outs = shuffle_net_v2(img)
        label = fluid.dygraph.to_variable(label)
        label.stop_gradient = True
        loss = fluid.layers.cross_entropy(outs, label)
        avg_loss = fluid.layers.mean(loss)

        # 通过这句话求出损失函数对于 out 的梯度,
        avg_loss.backward()

        print(outs.numpy())
        s = "InvertedResidualUnitA_{}_{}".format(4, 5)
        print(s)
指派人
分配到
无
里程碑
无
分配里程碑
工时统计
无
截止日期
无
标识: paddlepaddle/Paddle#20934
渝ICP备2023009037号

京公网安备11010502055752号

网络110报警服务 Powered by GitLab CE v13.7
开源知识
Git 入门 Pro Git 电子书 在线学 Git
Markdown 基础入门 IT 技术知识开源图谱
帮助
使用手册 反馈建议 博客
《GitCode 隐私声明》 《GitCode 服务条款》 关于GitCode
Powered by GitLab CE v13.7