shufflenet_v2.py 11.1 KB
Newer Older
1
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
W
WuHaobo 已提交
2
#
3 4 5
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
W
WuHaobo 已提交
6 7 8
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
9 10 11 12 13
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
W
WuHaobo 已提交
14 15 16 17 18

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

19 20
import numpy as np
import paddle
21 22 23 24 25 26
from paddle import ParamAttr
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import Conv2d, BatchNorm, Linear, Dropout
from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d
from paddle.nn.initializer import MSRA
27
import math
W
WuHaobo 已提交
28 29

__all__ = [
30
    "ShuffleNetV2_x0_25", "ShuffleNetV2_x0_33", "ShuffleNetV2_x0_5",
31
    "ShuffleNetV2", "ShuffleNetV2_x1_5", "ShuffleNetV2_x2_0",
32
    "ShuffleNetV2_swish"
W
WuHaobo 已提交
33 34 35
]


36 37 38 39 40 41
def channel_shuffle(x, groups):
    batchsize, num_channels, height, width = x.shape[0], x.shape[1], x.shape[
        2], x.shape[3]
    channels_per_group = num_channels // groups

    # reshape
42
    x = paddle.reshape(
43 44
        x=x, shape=[batchsize, groups, channels_per_group, height, width])

45
    x = paddle.transpose(x=x, perm=[0, 2, 1, 3, 4])
46
    # flatten
47
    x = paddle.reshape(x=x, shape=[batchsize, num_channels, height, width])
48 49 50
    return x


51
class ConvBNLayer(nn.Layer):
52 53 54 55 56 57 58 59 60 61
    def __init__(self,
                 num_channels,
                 filter_size,
                 num_filters,
                 stride,
                 padding,
                 channels=None,
                 num_groups=1,
                 if_act=True,
                 act='relu',
62
                 name=None):
63 64 65 66 67 68
        super(ConvBNLayer, self).__init__()
        self._if_act = if_act
        assert act in ['relu', 'swish'], \
            "supported act are {} but your act is {}".format(
                ['relu', 'swish'], act)
        self._act = act
69 70 71 72
        self._conv = Conv2d(
            in_channels=num_channels,
            out_channels=num_filters,
            kernel_size=filter_size,
W
WuHaobo 已提交
73 74 75
            stride=stride,
            padding=padding,
            groups=num_groups,
76
            weight_attr=ParamAttr(
77
                initializer=MSRA(), name=name + "_weights"),
W
WuHaobo 已提交
78 79
            bias_attr=False)

80 81 82 83 84 85 86 87 88 89 90
        self._batch_norm = BatchNorm(
            num_filters,
            param_attr=ParamAttr(name=name + "_bn_scale"),
            bias_attr=ParamAttr(name=name + "_bn_offset"),
            moving_mean_name=name + "_bn_mean",
            moving_variance_name=name + "_bn_variance")

    def forward(self, inputs, if_act=True):
        y = self._conv(inputs)
        y = self._batch_norm(y)
        if self._if_act:
91
            y = F.relu(y) if self._act == 'relu' else F.swish(y)
92 93 94
        return y


95
class InvertedResidualUnit(nn.Layer):
96 97 98 99 100 101 102 103 104 105 106 107
    def __init__(self,
                 num_channels,
                 num_filters,
                 stride,
                 benchmodel,
                 act='relu',
                 name=None):
        super(InvertedResidualUnit, self).__init__()
        assert stride in [1, 2], \
            "supported stride are {} but your stride is {}".format([
                                                                   1, 2], stride)
        self.benchmodel = benchmodel
W
WuHaobo 已提交
108
        oup_inc = num_filters // 2
109
        inp = num_channels
W
WuHaobo 已提交
110
        if benchmodel == 1:
111 112
            self._conv_pw = ConvBNLayer(
                num_channels=num_channels // 2,
W
WuHaobo 已提交
113 114 115 116 117 118
                num_filters=oup_inc,
                filter_size=1,
                stride=1,
                padding=0,
                num_groups=1,
                if_act=True,
119
                act=act,
W
WuHaobo 已提交
120
                name='stage_' + name + '_conv1')
121 122
            self._conv_dw = ConvBNLayer(
                num_channels=oup_inc,
W
WuHaobo 已提交
123 124 125 126 127 128
                num_filters=oup_inc,
                filter_size=3,
                stride=stride,
                padding=1,
                num_groups=oup_inc,
                if_act=False,
129
                act=act,
W
WuHaobo 已提交
130
                name='stage_' + name + '_conv2')
131 132
            self._conv_linear = ConvBNLayer(
                num_channels=oup_inc,
W
WuHaobo 已提交
133 134 135 136 137 138
                num_filters=oup_inc,
                filter_size=1,
                stride=1,
                padding=0,
                num_groups=1,
                if_act=True,
139
                act=act,
W
WuHaobo 已提交
140 141
                name='stage_' + name + '_conv3')
        else:
142 143 144
            # branch1
            self._conv_dw_1 = ConvBNLayer(
                num_channels=num_channels,
W
WuHaobo 已提交
145 146 147 148 149 150
                num_filters=inp,
                filter_size=3,
                stride=stride,
                padding=1,
                num_groups=inp,
                if_act=False,
151
                act=act,
W
WuHaobo 已提交
152
                name='stage_' + name + '_conv4')
153 154
            self._conv_linear_1 = ConvBNLayer(
                num_channels=inp,
W
WuHaobo 已提交
155 156 157 158 159 160
                num_filters=oup_inc,
                filter_size=1,
                stride=1,
                padding=0,
                num_groups=1,
                if_act=True,
161
                act=act,
W
WuHaobo 已提交
162
                name='stage_' + name + '_conv5')
163 164 165
            # branch2
            self._conv_pw_2 = ConvBNLayer(
                num_channels=num_channels,
W
WuHaobo 已提交
166 167 168 169 170 171
                num_filters=oup_inc,
                filter_size=1,
                stride=1,
                padding=0,
                num_groups=1,
                if_act=True,
172
                act=act,
W
WuHaobo 已提交
173
                name='stage_' + name + '_conv1')
174 175
            self._conv_dw_2 = ConvBNLayer(
                num_channels=oup_inc,
W
WuHaobo 已提交
176 177 178 179 180 181
                num_filters=oup_inc,
                filter_size=3,
                stride=stride,
                padding=1,
                num_groups=oup_inc,
                if_act=False,
182
                act=act,
W
WuHaobo 已提交
183
                name='stage_' + name + '_conv2')
184 185
            self._conv_linear_2 = ConvBNLayer(
                num_channels=oup_inc,
W
WuHaobo 已提交
186 187 188 189 190 191
                num_filters=oup_inc,
                filter_size=1,
                stride=1,
                padding=0,
                num_groups=1,
                if_act=True,
192
                act=act,
W
WuHaobo 已提交
193 194
                name='stage_' + name + '_conv3')

195 196
    def forward(self, inputs):
        if self.benchmodel == 1:
197
            x1, x2 = paddle.split(
198 199
                inputs,
                num_or_sections=[inputs.shape[1] // 2, inputs.shape[1] // 2],
200
                axis=1)
201 202 203
            x2 = self._conv_pw(x2)
            x2 = self._conv_dw(x2)
            x2 = self._conv_linear(x2)
204
            out = paddle.concat([x1, x2], axis=1)
205 206 207 208 209 210 211
        else:
            x1 = self._conv_dw_1(inputs)
            x1 = self._conv_linear_1(x1)

            x2 = self._conv_pw_2(inputs)
            x2 = self._conv_dw_2(x2)
            x2 = self._conv_linear_2(x2)
212
            out = paddle.concat([x1, x2], axis=1)
213 214 215 216

        return channel_shuffle(out, 2)


217
class ShuffleNet(nn.Layer):
218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248
    def __init__(self, class_dim=1000, scale=1.0, act='relu'):
        super(ShuffleNet, self).__init__()
        self.scale = scale
        self.class_dim = class_dim
        stage_repeats = [4, 8, 4]

        if scale == 0.25:
            stage_out_channels = [-1, 24, 24, 48, 96, 512]
        elif scale == 0.33:
            stage_out_channels = [-1, 24, 32, 64, 128, 512]
        elif scale == 0.5:
            stage_out_channels = [-1, 24, 48, 96, 192, 1024]
        elif scale == 1.0:
            stage_out_channels = [-1, 24, 116, 232, 464, 1024]
        elif scale == 1.5:
            stage_out_channels = [-1, 24, 176, 352, 704, 1024]
        elif scale == 2.0:
            stage_out_channels = [-1, 24, 224, 488, 976, 2048]
        else:
            raise NotImplementedError("This scale size:[" + str(scale) +
                                      "] is not implemented!")
        # 1. conv1
        self._conv1 = ConvBNLayer(
            num_channels=3,
            num_filters=stage_out_channels[1],
            filter_size=3,
            stride=2,
            padding=1,
            if_act=True,
            act=act,
            name='stage1_conv')
249
        self._max_pool = MaxPool2d(kernel_size=3, stride=2, padding=1)
250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293

        # 2. bottleneck sequences
        self._block_list = []
        i = 1
        in_c = int(32 * scale)
        for idxstage in range(len(stage_repeats)):
            numrepeat = stage_repeats[idxstage]
            output_channel = stage_out_channels[idxstage + 2]
            for i in range(numrepeat):
                if i == 0:
                    block = self.add_sublayer(
                        str(idxstage + 2) + '_' + str(i + 1),
                        InvertedResidualUnit(
                            num_channels=stage_out_channels[idxstage + 1],
                            num_filters=output_channel,
                            stride=2,
                            benchmodel=2,
                            act=act,
                            name=str(idxstage + 2) + '_' + str(i + 1)))
                    self._block_list.append(block)
                else:
                    block = self.add_sublayer(
                        str(idxstage + 2) + '_' + str(i + 1),
                        InvertedResidualUnit(
                            num_channels=output_channel,
                            num_filters=output_channel,
                            stride=1,
                            benchmodel=1,
                            act=act,
                            name=str(idxstage + 2) + '_' + str(i + 1)))
                    self._block_list.append(block)

        # 3. last_conv
        self._last_conv = ConvBNLayer(
            num_channels=stage_out_channels[-2],
            num_filters=stage_out_channels[-1],
            filter_size=1,
            stride=1,
            padding=0,
            if_act=True,
            act=act,
            name='conv5')

        # 4. pool
294
        self._pool2d_avg = AdaptiveAvgPool2d(1)
295 296 297 298 299
        self._out_c = stage_out_channels[-1]
        # 5. fc
        self._fc = Linear(
            stage_out_channels[-1],
            class_dim,
300
            weight_attr=ParamAttr(name='fc6_weights'),
301 302 303 304 305 306 307 308 309
            bias_attr=ParamAttr(name='fc6_offset'))

    def forward(self, inputs):
        y = self._conv1(inputs)
        y = self._max_pool(y)
        for inv in self._block_list:
            y = inv(y)
        y = self._last_conv(y)
        y = self._pool2d_avg(y)
310
        y = paddle.reshape(y, shape=[-1, self._out_c])
311 312 313 314 315 316 317
        y = self._fc(y)
        return y


def ShuffleNetV2_x0_25(**args):
    model = ShuffleNetV2(scale=0.25, **args)
    return model
W
WuHaobo 已提交
318 319


320 321
def ShuffleNetV2_x0_33(**args):
    model = ShuffleNet(scale=0.33, **args)
W
WuHaobo 已提交
322 323 324
    return model


325 326
def ShuffleNetV2_x0_5(**args):
    model = ShuffleNet(scale=0.5, **args)
W
WuHaobo 已提交
327 328 329
    return model


330 331
def ShuffleNetV2(**args):
    model = ShuffleNet(scale=1.0, **args)
W
WuHaobo 已提交
332 333 334
    return model


335 336
def ShuffleNetV2_x1_5(**args):
    model = ShuffleNet(scale=1.5, **args)
W
WuHaobo 已提交
337 338 339
    return model


340 341
def ShuffleNetV2_x2_0(**args):
    model = ShuffleNet(scale=2.0, **args)
W
WuHaobo 已提交
342 343 344
    return model


345 346
def ShuffleNetV2_swish(**args):
    model = ShuffleNet(scale=1.0, act='swish', **args)
W
WuHaobo 已提交
347
    return model