mobilenet_v3.py 11.0 KB
Newer Older
littletomatodonkey's avatar
littletomatodonkey 已提交
1
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
W
WuHaobo 已提交
2
#
littletomatodonkey's avatar
littletomatodonkey 已提交
3 4 5
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
W
WuHaobo 已提交
6 7 8
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
littletomatodonkey's avatar
littletomatodonkey 已提交
9 10 11 12 13
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
W
WuHaobo 已提交
14 15 16 17 18

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

19 20
import numpy as np
import paddle
littletomatodonkey's avatar
littletomatodonkey 已提交
21 22 23
from paddle import ParamAttr
import paddle.nn as nn
import paddle.nn.functional as F
littletomatodonkey's avatar
littletomatodonkey 已提交
24 25
from paddle.nn import Conv2d, BatchNorm, Linear, Dropout
from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d
littletomatodonkey's avatar
littletomatodonkey 已提交
26 27
# TODO: need to be removed later!
from paddle.fluid.regularizer import L2Decay
28 29

import math
W
WuHaobo 已提交
30 31

__all__ = [
32 33 34 35 36
    "MobileNetV3_small_x0_35", "MobileNetV3_small_x0_5",
    "MobileNetV3_small_x0_75", "MobileNetV3_small_x1_0",
    "MobileNetV3_small_x1_25", "MobileNetV3_large_x0_35",
    "MobileNetV3_large_x0_5", "MobileNetV3_large_x0_75",
    "MobileNetV3_large_x1_0", "MobileNetV3_large_x1_25"
W
WuHaobo 已提交
37 38 39
]


40 41 42 43 44 45 46 47
def make_divisible(v, divisor=8, min_value=None):
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
    if new_v < 0.9 * v:
        new_v += divisor
    return new_v

littletomatodonkey's avatar
littletomatodonkey 已提交
48

littletomatodonkey's avatar
littletomatodonkey 已提交
49
class MobileNetV3(nn.Layer):
50 51 52 53
    def __init__(self, scale=1.0, model_name="small", class_dim=1000):
        super(MobileNetV3, self).__init__()

        inplanes = 16
W
WuHaobo 已提交
54 55 56
        if model_name == "large":
            self.cfg = [
                # k, exp, c,  se,     nl,  s,
57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
                [3, 16, 16, False, "relu", 1],
                [3, 64, 24, False, "relu", 2],
                [3, 72, 24, False, "relu", 1],
                [5, 72, 40, True, "relu", 2],
                [5, 120, 40, True, "relu", 1],
                [5, 120, 40, True, "relu", 1],
                [3, 240, 80, False, "hard_swish", 2],
                [3, 200, 80, False, "hard_swish", 1],
                [3, 184, 80, False, "hard_swish", 1],
                [3, 184, 80, False, "hard_swish", 1],
                [3, 480, 112, True, "hard_swish", 1],
                [3, 672, 112, True, "hard_swish", 1],
                [5, 672, 160, True, "hard_swish", 2],
                [5, 960, 160, True, "hard_swish", 1],
                [5, 960, 160, True, "hard_swish", 1],
W
WuHaobo 已提交
72 73 74 75 76 77
            ]
            self.cls_ch_squeeze = 960
            self.cls_ch_expand = 1280
        elif model_name == "small":
            self.cfg = [
                # k, exp, c,  se,     nl,  s,
78 79 80 81 82 83 84 85 86 87 88
                [3, 16, 16, True, "relu", 2],
                [3, 72, 24, False, "relu", 2],
                [3, 88, 24, False, "relu", 1],
                [5, 96, 40, True, "hard_swish", 2],
                [5, 240, 40, True, "hard_swish", 1],
                [5, 240, 40, True, "hard_swish", 1],
                [5, 120, 48, True, "hard_swish", 1],
                [5, 144, 48, True, "hard_swish", 1],
                [5, 288, 96, True, "hard_swish", 2],
                [5, 576, 96, True, "hard_swish", 1],
                [5, 576, 96, True, "hard_swish", 1],
W
WuHaobo 已提交
89 90 91 92
            ]
            self.cls_ch_squeeze = 576
            self.cls_ch_expand = 1280
        else:
littletomatodonkey's avatar
littletomatodonkey 已提交
93 94
            raise NotImplementedError(
                "mode[{}_model] is not implemented!".format(model_name))
W
WuHaobo 已提交
95

96 97 98
        self.conv1 = ConvBNLayer(
            in_c=3,
            out_c=make_divisible(inplanes * scale),
W
WuHaobo 已提交
99 100 101 102 103
            filter_size=3,
            stride=2,
            padding=1,
            num_groups=1,
            if_act=True,
104 105 106 107
            act="hard_swish",
            name="conv1")

        self.block_list = []
W
WuHaobo 已提交
108
        i = 0
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
        inplanes = make_divisible(inplanes * scale)
        for (k, exp, c, se, nl, s) in self.cfg:
            self.block_list.append(
                ResidualUnit(
                    in_c=inplanes,
                    mid_c=make_divisible(scale * exp),
                    out_c=make_divisible(scale * c),
                    filter_size=k,
                    stride=s,
                    use_se=se,
                    act=nl,
                    name="conv" + str(i + 2)))
            self.add_sublayer(
                sublayer=self.block_list[-1], name="conv" + str(i + 2))
            inplanes = make_divisible(scale * c)
W
WuHaobo 已提交
124 125
            i += 1

126 127 128
        self.last_second_conv = ConvBNLayer(
            in_c=inplanes,
            out_c=make_divisible(scale * self.cls_ch_squeeze),
W
WuHaobo 已提交
129 130 131 132 133
            filter_size=1,
            stride=1,
            padding=0,
            num_groups=1,
            if_act=True,
134 135 136
            act="hard_swish",
            name="conv_last")

littletomatodonkey's avatar
littletomatodonkey 已提交
137
        self.pool = AdaptiveAvgPool2d(1)
138

littletomatodonkey's avatar
littletomatodonkey 已提交
139 140 141 142
        self.last_conv = Conv2d(
            in_channels=make_divisible(scale * self.cls_ch_squeeze),
            out_channels=self.cls_ch_expand,
            kernel_size=1,
W
WuHaobo 已提交
143 144
            stride=1,
            padding=0,
littletomatodonkey's avatar
littletomatodonkey 已提交
145
            weight_attr=ParamAttr(name="last_1x1_conv_weights"),
W
WuHaobo 已提交
146
            bias_attr=False)
147 148

        self.out = Linear(
littletomatodonkey's avatar
littletomatodonkey 已提交
149 150 151
            self.cls_ch_expand,
            class_dim,
            weight_attr=ParamAttr("fc_weights"),
152 153 154 155 156 157 158 159 160
            bias_attr=ParamAttr(name="fc_offset"))

    def forward(self, inputs, label=None, dropout_prob=0.2):
        x = self.conv1(inputs)
        for block in self.block_list:
            x = block(x)
        x = self.last_second_conv(x)
        x = self.pool(x)
        x = self.last_conv(x)
littletomatodonkey's avatar
littletomatodonkey 已提交
161 162 163
        x = F.hard_swish(x)
        x = F.dropout(x=x, p=dropout_prob)
        x = paddle.reshape(x, shape=[x.shape[0], x.shape[1]])
164 165 166 167 168
        x = self.out(x)

        return x


littletomatodonkey's avatar
littletomatodonkey 已提交
169
class ConvBNLayer(nn.Layer):
170 171 172 173 174 175 176 177 178 179 180 181 182 183
    def __init__(self,
                 in_c,
                 out_c,
                 filter_size,
                 stride,
                 padding,
                 num_groups=1,
                 if_act=True,
                 act=None,
                 use_cudnn=True,
                 name=""):
        super(ConvBNLayer, self).__init__()
        self.if_act = if_act
        self.act = act
littletomatodonkey's avatar
littletomatodonkey 已提交
184 185 186 187
        self.conv = Conv2d(
            in_channels=in_c,
            out_channels=out_c,
            kernel_size=filter_size,
W
WuHaobo 已提交
188 189 190
            stride=stride,
            padding=padding,
            groups=num_groups,
littletomatodonkey's avatar
littletomatodonkey 已提交
191 192 193
            weight_attr=ParamAttr(name=name + "_weights"),
            bias_attr=False)
        self.bn = BatchNorm(
194 195
            num_channels=out_c,
            act=None,
littletomatodonkey's avatar
littletomatodonkey 已提交
196
            param_attr=ParamAttr(
197
                name=name + "_bn_scale",
littletomatodonkey's avatar
littletomatodonkey 已提交
198
                regularizer=L2Decay(regularization_coeff=0.0)),
W
WuHaobo 已提交
199
            bias_attr=ParamAttr(
200
                name=name + "_bn_offset",
littletomatodonkey's avatar
littletomatodonkey 已提交
201
                regularizer=L2Decay(regularization_coeff=0.0)),
202 203 204 205 206 207 208 209
            moving_mean_name=name + "_bn_mean",
            moving_variance_name=name + "_bn_variance")

    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        if self.if_act:
            if self.act == "relu":
littletomatodonkey's avatar
littletomatodonkey 已提交
210
                x = F.relu(x)
211
            elif self.act == "hard_swish":
littletomatodonkey's avatar
littletomatodonkey 已提交
212
                x = F.hard_swish(x)
213 214 215 216 217 218
            else:
                print("The activation function is selected incorrectly.")
                exit()
        return x


littletomatodonkey's avatar
littletomatodonkey 已提交
219
class ResidualUnit(nn.Layer):
220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235
    def __init__(self,
                 in_c,
                 mid_c,
                 out_c,
                 filter_size,
                 stride,
                 use_se,
                 act=None,
                 name=''):
        super(ResidualUnit, self).__init__()
        self.if_shortcut = stride == 1 and in_c == out_c
        self.if_se = use_se

        self.expand_conv = ConvBNLayer(
            in_c=in_c,
            out_c=mid_c,
W
WuHaobo 已提交
236 237 238 239 240
            filter_size=1,
            stride=1,
            padding=0,
            if_act=True,
            act=act,
241 242 243 244
            name=name + "_expand")
        self.bottleneck_conv = ConvBNLayer(
            in_c=mid_c,
            out_c=mid_c,
W
WuHaobo 已提交
245 246 247
            filter_size=filter_size,
            stride=stride,
            padding=int((filter_size - 1) // 2),
248
            num_groups=mid_c,
W
WuHaobo 已提交
249 250
            if_act=True,
            act=act,
251 252 253 254 255 256
            name=name + "_depthwise")
        if self.if_se:
            self.mid_se = SEModule(mid_c, name=name + "_se")
        self.linear_conv = ConvBNLayer(
            in_c=mid_c,
            out_c=out_c,
W
WuHaobo 已提交
257 258 259 260
            filter_size=1,
            stride=1,
            padding=0,
            if_act=False,
261 262 263 264 265 266 267 268 269 270
            act=None,
            name=name + "_linear")

    def forward(self, inputs):
        x = self.expand_conv(inputs)
        x = self.bottleneck_conv(x)
        if self.if_se:
            x = self.mid_se(x)
        x = self.linear_conv(x)
        if self.if_shortcut:
littletomatodonkey's avatar
littletomatodonkey 已提交
271
            x = paddle.elementwise_add(inputs, x)
272 273 274
        return x


littletomatodonkey's avatar
littletomatodonkey 已提交
275
class SEModule(nn.Layer):
276 277
    def __init__(self, channel, reduction=4, name=""):
        super(SEModule, self).__init__()
littletomatodonkey's avatar
littletomatodonkey 已提交
278
        self.avg_pool = AdaptiveAvgPool2d(1)
littletomatodonkey's avatar
littletomatodonkey 已提交
279 280 281 282
        self.conv1 = Conv2d(
            in_channels=channel,
            out_channels=channel // reduction,
            kernel_size=1,
283 284
            stride=1,
            padding=0,
littletomatodonkey's avatar
littletomatodonkey 已提交
285
            weight_attr=ParamAttr(name=name + "_1_weights"),
286
            bias_attr=ParamAttr(name=name + "_1_offset"))
littletomatodonkey's avatar
littletomatodonkey 已提交
287 288 289 290
        self.conv2 = Conv2d(
            in_channels=channel // reduction,
            out_channels=channel,
            kernel_size=1,
291 292
            stride=1,
            padding=0,
littletomatodonkey's avatar
littletomatodonkey 已提交
293
            weight_attr=ParamAttr(name + "_2_weights"),
294 295 296 297 298
            bias_attr=ParamAttr(name=name + "_2_offset"))

    def forward(self, inputs):
        outputs = self.avg_pool(inputs)
        outputs = self.conv1(outputs)
littletomatodonkey's avatar
littletomatodonkey 已提交
299
        outputs = F.relu(outputs)
300
        outputs = self.conv2(outputs)
littletomatodonkey's avatar
littletomatodonkey 已提交
301 302
        outputs = F.hard_sigmoid(outputs)
        return paddle.multiply(x=inputs, y=outputs, axis=0)
W
WuHaobo 已提交
303 304


305 306
def MobileNetV3_small_x0_35(**args):
    model = MobileNetV3(model_name="small", scale=0.35, **args)
W
WuHaobo 已提交
307 308 309
    return model


310 311
def MobileNetV3_small_x0_5(**args):
    model = MobileNetV3(model_name="small", scale=0.5, **args)
W
WuHaobo 已提交
312 313 314
    return model


315 316
def MobileNetV3_small_x0_75(**args):
    model = MobileNetV3(model_name="small", scale=0.75, **args)
W
WuHaobo 已提交
317 318 319
    return model


littletomatodonkey's avatar
littletomatodonkey 已提交
320
def MobileNetV3_small_x1_0(**args):
321
    model = MobileNetV3(model_name="small", scale=1.0, **args)
W
WuHaobo 已提交
322 323 324
    return model


325 326
def MobileNetV3_small_x1_25(**args):
    model = MobileNetV3(model_name="small", scale=1.25, **args)
W
WuHaobo 已提交
327 328 329
    return model


330 331
def MobileNetV3_large_x0_35(**args):
    model = MobileNetV3(model_name="large", scale=0.35, **args)
W
WuHaobo 已提交
332 333 334
    return model


335 336
def MobileNetV3_large_x0_5(**args):
    model = MobileNetV3(model_name="large", scale=0.5, **args)
W
WuHaobo 已提交
337 338 339
    return model


340 341
def MobileNetV3_large_x0_75(**args):
    model = MobileNetV3(model_name="large", scale=0.75, **args)
W
WuHaobo 已提交
342 343 344
    return model


littletomatodonkey's avatar
littletomatodonkey 已提交
345
def MobileNetV3_large_x1_0(**args):
346
    model = MobileNetV3(model_name="large", scale=1.0, **args)
W
WuHaobo 已提交
347 348 349
    return model


350 351
def MobileNetV3_large_x1_25(**args):
    model = MobileNetV3(model_name="large", scale=1.25, **args)
W
WuHaobo 已提交
352
    return model