det_mobilenet_v3.py 8.7 KB
Newer Older
W
WenmuZhou 已提交
1
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
L
LDOUBLEV 已提交
2
#
W
WenmuZhou 已提交
3 4 5
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
L
LDOUBLEV 已提交
6 7 8
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
W
WenmuZhou 已提交
9 10 11 12 13
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
L
LDOUBLEV 已提交
14 15 16 17 18

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

W
WenmuZhou 已提交
19 20 21 22
import paddle
from paddle import nn
import paddle.nn.functional as F
from paddle import ParamAttr
L
LDOUBLEV 已提交
23 24 25 26

__all__ = ['MobileNetV3']


W
WenmuZhou 已提交
27 28 29 30 31 32 33 34 35 36
def make_divisible(v, divisor=8, min_value=None):
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
    if new_v < 0.9 * v:
        new_v += divisor
    return new_v


class MobileNetV3(nn.Layer):
L
LDOUBLEV 已提交
37 38 39 40 41 42
    def __init__(self,
                 in_channels=3,
                 model_name='large',
                 scale=0.5,
                 disable_se=False,
                 **kwargs):
L
LDOUBLEV 已提交
43 44 45 46 47
        """
        the MobilenetV3 backbone network for detection module.
        Args:
            params(dict): the super parameters for build network
        """
W
WenmuZhou 已提交
48
        super(MobileNetV3, self).__init__()
L
LDOUBLEV 已提交
49 50 51

        self.disable_se = disable_se

L
LDOUBLEV 已提交
52
        if model_name == "large":
W
WenmuZhou 已提交
53
            cfg = [
L
LDOUBLEV 已提交
54 55 56 57 58 59 60
                # k, exp, c,  se,     nl,  s,
                [3, 16, 16, False, 'relu', 1],
                [3, 64, 24, False, 'relu', 2],
                [3, 72, 24, False, 'relu', 1],
                [5, 72, 40, True, 'relu', 2],
                [5, 120, 40, True, 'relu', 1],
                [5, 120, 40, True, 'relu', 1],
61 62 63 64 65 66 67 68 69
                [3, 240, 80, False, 'hardswish', 2],
                [3, 200, 80, False, 'hardswish', 1],
                [3, 184, 80, False, 'hardswish', 1],
                [3, 184, 80, False, 'hardswish', 1],
                [3, 480, 112, True, 'hardswish', 1],
                [3, 672, 112, True, 'hardswish', 1],
                [5, 672, 160, True, 'hardswish', 2],
                [5, 960, 160, True, 'hardswish', 1],
                [5, 960, 160, True, 'hardswish', 1],
L
LDOUBLEV 已提交
70
            ]
W
WenmuZhou 已提交
71
            cls_ch_squeeze = 960
L
LDOUBLEV 已提交
72
        elif model_name == "small":
W
WenmuZhou 已提交
73
            cfg = [
L
LDOUBLEV 已提交
74 75 76 77
                # k, exp, c,  se,     nl,  s,
                [3, 16, 16, True, 'relu', 2],
                [3, 72, 24, False, 'relu', 2],
                [3, 88, 24, False, 'relu', 1],
78 79 80 81 82 83 84 85
                [5, 96, 40, True, 'hardswish', 2],
                [5, 240, 40, True, 'hardswish', 1],
                [5, 240, 40, True, 'hardswish', 1],
                [5, 120, 48, True, 'hardswish', 1],
                [5, 144, 48, True, 'hardswish', 1],
                [5, 288, 96, True, 'hardswish', 2],
                [5, 576, 96, True, 'hardswish', 1],
                [5, 576, 96, True, 'hardswish', 1],
L
LDOUBLEV 已提交
86
            ]
W
WenmuZhou 已提交
87
            cls_ch_squeeze = 576
L
LDOUBLEV 已提交
88 89 90 91 92
        else:
            raise NotImplementedError("mode[" + model_name +
                                      "_model] is not implemented!")

        supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25]
W
WenmuZhou 已提交
93 94 95 96 97 98 99 100
        assert scale in supported_scale, \
            "supported scale are {} but input scale is {}".format(supported_scale, scale)
        inplanes = 16
        # conv1
        self.conv = ConvBNLayer(
            in_channels=in_channels,
            out_channels=make_divisible(inplanes * scale),
            kernel_size=3,
L
LDOUBLEV 已提交
101 102
            stride=2,
            padding=1,
W
WenmuZhou 已提交
103
            groups=1,
L
LDOUBLEV 已提交
104
            if_act=True,
littletomatodonkey's avatar
littletomatodonkey 已提交
105
            act='hardswish')
W
WenmuZhou 已提交
106 107 108 109

        self.stages = []
        self.out_channels = []
        block_list = []
L
LDOUBLEV 已提交
110
        i = 0
W
WenmuZhou 已提交
111 112
        inplanes = make_divisible(inplanes * scale)
        for (k, exp, c, se, nl, s) in cfg:
113
            se = se and not self.disable_se
W
WenmuZhou 已提交
114 115
            start_idx = 2 if model_name == 'large' else 0
            if s == 2 and i > start_idx:
W
WenmuZhou 已提交
116 117 118 119 120 121 122 123 124 125 126
                self.out_channels.append(inplanes)
                self.stages.append(nn.Sequential(*block_list))
                block_list = []
            block_list.append(
                ResidualUnit(
                    in_channels=inplanes,
                    mid_channels=make_divisible(scale * exp),
                    out_channels=make_divisible(scale * c),
                    kernel_size=k,
                    stride=s,
                    use_se=se,
littletomatodonkey's avatar
littletomatodonkey 已提交
127
                    act=nl))
W
WenmuZhou 已提交
128
            inplanes = make_divisible(scale * c)
L
LDOUBLEV 已提交
129
            i += 1
W
WenmuZhou 已提交
130 131 132 133 134 135 136 137 138
        block_list.append(
            ConvBNLayer(
                in_channels=inplanes,
                out_channels=make_divisible(scale * cls_ch_squeeze),
                kernel_size=1,
                stride=1,
                padding=0,
                groups=1,
                if_act=True,
littletomatodonkey's avatar
littletomatodonkey 已提交
139
                act='hardswish'))
W
WenmuZhou 已提交
140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
        self.stages.append(nn.Sequential(*block_list))
        self.out_channels.append(make_divisible(scale * cls_ch_squeeze))
        for i, stage in enumerate(self.stages):
            self.add_sublayer(sublayer=stage, name="stage{}".format(i))

    def forward(self, x):
        x = self.conv(x)
        out_list = []
        for stage in self.stages:
            x = stage(x)
            out_list.append(x)
        return out_list


class ConvBNLayer(nn.Layer):
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride,
                 padding,
                 groups=1,
                 if_act=True,
littletomatodonkey's avatar
littletomatodonkey 已提交
163
                 act=None):
W
WenmuZhou 已提交
164 165 166
        super(ConvBNLayer, self).__init__()
        self.if_act = if_act
        self.act = act
D
dyning 已提交
167
        self.conv = nn.Conv2D(
W
WenmuZhou 已提交
168 169 170
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=kernel_size,
L
LDOUBLEV 已提交
171 172
            stride=stride,
            padding=padding,
W
WenmuZhou 已提交
173
            groups=groups,
L
LDOUBLEV 已提交
174
            bias_attr=False)
W
WenmuZhou 已提交
175

littletomatodonkey's avatar
littletomatodonkey 已提交
176
        self.bn = nn.BatchNorm(num_channels=out_channels, act=None)
W
WenmuZhou 已提交
177 178 179 180 181 182 183

    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        if self.if_act:
            if self.act == "relu":
                x = F.relu(x)
184 185
            elif self.act == "hardswish":
                x = F.hardswish(x)
W
WenmuZhou 已提交
186
            else:
187 188
                print("The activation function({}) is selected incorrectly.".
                      format(self.act))
W
WenmuZhou 已提交
189 190 191 192 193 194 195 196 197 198 199 200
                exit()
        return x


class ResidualUnit(nn.Layer):
    def __init__(self,
                 in_channels,
                 mid_channels,
                 out_channels,
                 kernel_size,
                 stride,
                 use_se,
littletomatodonkey's avatar
littletomatodonkey 已提交
201
                 act=None):
W
WenmuZhou 已提交
202 203 204 205 206 207 208 209
        super(ResidualUnit, self).__init__()
        self.if_shortcut = stride == 1 and in_channels == out_channels
        self.if_se = use_se

        self.expand_conv = ConvBNLayer(
            in_channels=in_channels,
            out_channels=mid_channels,
            kernel_size=1,
L
LDOUBLEV 已提交
210 211 212
            stride=1,
            padding=0,
            if_act=True,
littletomatodonkey's avatar
littletomatodonkey 已提交
213
            act=act)
W
WenmuZhou 已提交
214 215 216 217
        self.bottleneck_conv = ConvBNLayer(
            in_channels=mid_channels,
            out_channels=mid_channels,
            kernel_size=kernel_size,
L
LDOUBLEV 已提交
218
            stride=stride,
W
WenmuZhou 已提交
219 220
            padding=int((kernel_size - 1) // 2),
            groups=mid_channels,
L
LDOUBLEV 已提交
221
            if_act=True,
littletomatodonkey's avatar
littletomatodonkey 已提交
222
            act=act)
223
        if self.if_se:
littletomatodonkey's avatar
littletomatodonkey 已提交
224
            self.mid_se = SEModule(mid_channels)
W
WenmuZhou 已提交
225 226 227 228
        self.linear_conv = ConvBNLayer(
            in_channels=mid_channels,
            out_channels=out_channels,
            kernel_size=1,
L
LDOUBLEV 已提交
229 230 231
            stride=1,
            padding=0,
            if_act=False,
littletomatodonkey's avatar
littletomatodonkey 已提交
232
            act=None)
W
WenmuZhou 已提交
233 234 235 236

    def forward(self, inputs):
        x = self.expand_conv(inputs)
        x = self.bottleneck_conv(x)
237
        if self.if_se:
W
WenmuZhou 已提交
238 239 240
            x = self.mid_se(x)
        x = self.linear_conv(x)
        if self.if_shortcut:
D
dyning 已提交
241
            x = paddle.add(inputs, x)
W
WenmuZhou 已提交
242 243 244 245
        return x


class SEModule(nn.Layer):
littletomatodonkey's avatar
littletomatodonkey 已提交
246
    def __init__(self, in_channels, reduction=4):
W
WenmuZhou 已提交
247
        super(SEModule, self).__init__()
D
dyning 已提交
248 249
        self.avg_pool = nn.AdaptiveAvgPool2D(1)
        self.conv1 = nn.Conv2D(
W
WenmuZhou 已提交
250 251 252 253
            in_channels=in_channels,
            out_channels=in_channels // reduction,
            kernel_size=1,
            stride=1,
littletomatodonkey's avatar
littletomatodonkey 已提交
254
            padding=0)
D
dyning 已提交
255
        self.conv2 = nn.Conv2D(
W
WenmuZhou 已提交
256 257 258 259
            in_channels=in_channels // reduction,
            out_channels=in_channels,
            kernel_size=1,
            stride=1,
littletomatodonkey's avatar
littletomatodonkey 已提交
260
            padding=0)
W
WenmuZhou 已提交
261 262 263 264 265 266

    def forward(self, inputs):
        outputs = self.avg_pool(inputs)
        outputs = self.conv1(outputs)
        outputs = F.relu(outputs)
        outputs = self.conv2(outputs)
267
        outputs = F.hardsigmoid(outputs, slope=0.2, offset=0.5)
L
LDOUBLEV 已提交
268
        return inputs * outputs