mobilenetv2.py 14.2 KB
Newer Older
C
ceci3 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
# Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
C
ceci3 已提交
20 21
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
22
from .search_space_base import SearchSpaceBase
C
ceci3 已提交
23
from .base_layer import conv_bn_layer
24
from .search_space_registry import SEARCHSPACE
C
ceci3 已提交
25

26 27
__all__ = ["MobileNetV2Space"]

28 29

@SEARCHSPACE.register
C
ceci3 已提交
30
class MobileNetV2Space(SearchSpaceBase):
31 32 33 34
    def __init__(self,
                 input_size,
                 output_size,
                 block_num,
C
ceci3 已提交
35
                 block_mask=None,
36 37 38
                 scale=1.0,
                 class_dim=1000):
        super(MobileNetV2Space, self).__init__(input_size, output_size,
C
ceci3 已提交
39 40 41
                                               block_num, block_mask)
        assert self.block_mask == None, 'MobileNetV2Space will use origin MobileNetV2 as seach space, so use input_size, output_size and block_num to search'
        # self.head_num means the first convolution channel
42
        self.head_num = np.array([3, 4, 8, 12, 16, 24, 32])  #7
C
ceci3 已提交
43
        # self.filter_num1 ~ self.filter_num6 means following convlution channel
44 45 46 47 48 49 50 51 52
        self.filter_num1 = np.array([3, 4, 8, 12, 16, 24, 32, 48])  #8
        self.filter_num2 = np.array([8, 12, 16, 24, 32, 48, 64, 80])  #8
        self.filter_num3 = np.array([16, 24, 32, 48, 64, 80, 96, 128])  #8
        self.filter_num4 = np.array(
            [24, 32, 48, 64, 80, 96, 128, 144, 160, 192])  #10
        self.filter_num5 = np.array(
            [32, 48, 64, 80, 96, 128, 144, 160, 192, 224])  #10
        self.filter_num6 = np.array(
            [64, 80, 96, 128, 144, 160, 192, 224, 256, 320, 384, 512])  #12
C
ceci3 已提交
53
        # self.k_size means kernel size
54
        self.k_size = np.array([3, 5])  #2
C
ceci3 已提交
55
        # self.multiply means expansion_factor of each _inverted_residual_unit
56
        self.multiply = np.array([1, 2, 3, 4, 6])  #5
C
ceci3 已提交
57
        # self.repeat means repeat_num _inverted_residual_unit in each _invresi_blocks 
58 59 60
        self.repeat = np.array([1, 2, 3, 4, 5, 6])  #6
        self.scale = scale
        self.class_dim = class_dim
C
ceci3 已提交
61

C
ceci3 已提交
62 63 64
        assert self.block_num < 7, 'MobileNetV2: block number must less than 7, but receive block number is {}'.format(
            self.block_num)

C
ceci3 已提交
65 66
    def init_tokens(self):
        """
C
ceci3 已提交
67
        The initial token.
C
ceci3 已提交
68 69
        The first one is the index of the first layers' channel in self.head_num,
        each line in the following represent the index of the [expansion_factor, filter_num, repeat_num, kernel_size]
C
ceci3 已提交
70 71
        """
        # original MobileNetV2
W
wanghaoshuang 已提交
72
        # yapf: disable
C
ceci3 已提交
73
        init_token_base =  [4,          # 1, 16, 1
W
wanghaoshuang 已提交
74 75 76 77 78 79 80 81
                4, 5, 1, 0, # 6, 24, 1
                4, 5, 1, 0, # 6, 24, 2
                4, 4, 2, 0, # 6, 32, 3
                4, 4, 3, 0, # 6, 64, 4
                4, 5, 2, 0, # 6, 96, 3
                4, 7, 2, 0, # 6, 160, 3
                4, 9, 0, 0] # 6, 320, 1
        # yapf: enable
C
ceci3 已提交
82

W
wanghaoshuang 已提交
83
        if self.block_num < 5:
C
ceci3 已提交
84 85
            self.token_len = 1 + (self.block_num - 1) * 4
        else:
W
wanghaoshuang 已提交
86 87
            self.token_len = 1 + (self.block_num + 2 *
                                  (self.block_num - 5)) * 4
C
ceci3 已提交
88 89

        return init_token_base[:self.token_len]
C
ceci3 已提交
90 91 92

    def range_table(self):
        """
C
ceci3 已提交
93
        Get range table of current search space, constrains the range of tokens. 
C
ceci3 已提交
94 95
        """
        # head_num + 7 * [multiple(expansion_factor), filter_num, repeat, kernel_size]
W
wanghaoshuang 已提交
96
        # yapf: disable
C
ceci3 已提交
97 98 99 100 101 102 103 104
        range_table_base =  [len(self.head_num),
                len(self.multiply), len(self.filter_num1), len(self.repeat), len(self.k_size),
                len(self.multiply), len(self.filter_num1), len(self.repeat), len(self.k_size),
                len(self.multiply), len(self.filter_num2), len(self.repeat), len(self.k_size),
                len(self.multiply), len(self.filter_num3), len(self.repeat), len(self.k_size),
                len(self.multiply), len(self.filter_num4), len(self.repeat), len(self.k_size),
                len(self.multiply), len(self.filter_num5), len(self.repeat), len(self.k_size),
                len(self.multiply), len(self.filter_num6), len(self.repeat), len(self.k_size)]
W
wanghaoshuang 已提交
105
        range_table_base = list(np.array(range_table_base) - 1)
W
wanghaoshuang 已提交
106
        # yapf: enable
C
ceci3 已提交
107
        return range_table_base[:self.token_len]
C
ceci3 已提交
108 109 110

    def token2arch(self, tokens=None):
        """
C
ceci3 已提交
111
        return net_arch function
C
ceci3 已提交
112
        """
C
ceci3 已提交
113

C
ceci3 已提交
114 115
        if tokens is None:
            tokens = self.init_tokens()
C
ceci3 已提交
116

C
ceci3 已提交
117
        self.bottleneck_params_list = []
W
wanghaoshuang 已提交
118
        if self.block_num >= 1:
C
ceci3 已提交
119
            self.bottleneck_params_list.append(
W
wanghaoshuang 已提交
120 121
                (1, self.head_num[tokens[0]], 1, 1, 3))
        if self.block_num >= 2:
C
ceci3 已提交
122
            self.bottleneck_params_list.append(
W
wanghaoshuang 已提交
123 124 125
                (self.multiply[tokens[1]], self.filter_num1[tokens[2]],
                 self.repeat[tokens[3]], 2, self.k_size[tokens[4]]))
        if self.block_num >= 3:
C
ceci3 已提交
126
            self.bottleneck_params_list.append(
W
wanghaoshuang 已提交
127 128 129
                (self.multiply[tokens[5]], self.filter_num1[tokens[6]],
                 self.repeat[tokens[7]], 2, self.k_size[tokens[8]]))
        if self.block_num >= 4:
C
ceci3 已提交
130
            self.bottleneck_params_list.append(
W
wanghaoshuang 已提交
131 132 133
                (self.multiply[tokens[9]], self.filter_num2[tokens[10]],
                 self.repeat[tokens[11]], 2, self.k_size[tokens[12]]))
        if self.block_num >= 5:
C
ceci3 已提交
134
            self.bottleneck_params_list.append(
W
wanghaoshuang 已提交
135 136
                (self.multiply[tokens[13]], self.filter_num3[tokens[14]],
                 self.repeat[tokens[15]], 2, self.k_size[tokens[16]]))
C
ceci3 已提交
137
            self.bottleneck_params_list.append(
C
ceci3 已提交
138
                (self.multiply[tokens[17]], self.filter_num4[tokens[18]],
W
wanghaoshuang 已提交
139 140
                 self.repeat[tokens[19]], 1, self.k_size[tokens[20]]))
        if self.block_num >= 6:
C
ceci3 已提交
141
            self.bottleneck_params_list.append(
W
wanghaoshuang 已提交
142 143
                (self.multiply[tokens[21]], self.filter_num5[tokens[22]],
                 self.repeat[tokens[23]], 2, self.k_size[tokens[24]]))
C
ceci3 已提交
144
            self.bottleneck_params_list.append(
W
wanghaoshuang 已提交
145 146 147
                (self.multiply[tokens[25]], self.filter_num6[tokens[26]],
                 self.repeat[tokens[27]], 1, self.k_size[tokens[28]]))

C
ceci3 已提交
148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
        def _modify_bottle_params(output_stride=None):
            if output_stride is not None and output_stride % 2 != 0:
                raise Exception("output stride must to be even number")
            if output_stride is None:
                return
            else:
                stride = 2
                for i, layer_setting in enumerate(self.bottleneck_params_list):
                    t, c, n, s, ks = layer_setting
                    stride = stride * s
                    if stride > output_stride:
                        s = 1
                    self.bottleneck_params_list[i] = (t, c, n, s, ks)

        def net_arch(input,
                     end_points=None,
                     decode_points=None,
                     output_stride=None):
            _modify_bottle_params(output_stride)

C
ceci3 已提交
168 169 170 171 172 173 174 175 176 177 178
            decode_ends = dict()

            def check_points(count, points):
                if points is None:
                    return False
                else:
                    if isinstance(points, list):
                        return (True if count in points else False)
                    else:
                        return (True if count == points else False)

C
ceci3 已提交
179
            #conv1
C
ceci3 已提交
180
            # all padding is 'SAME' in the conv2d, can compute the actual padding automatic. 
C
ceci3 已提交
181
            input = conv_bn_layer(
C
ceci3 已提交
182 183 184 185
                input,
                num_filters=int(32 * self.scale),
                filter_size=3,
                stride=2,
C
ceci3 已提交
186 187
                padding='SAME',
                act='relu6',
C
ceci3 已提交
188
                name='mobilenetv2_conv1_1')
C
ceci3 已提交
189 190 191 192 193 194
            layer_count = 1
            if check_points(layer_count, decode_points):
                decode_ends[layer_count] = input

            if check_points(layer_count, end_points):
                return input, decode_ends
C
ceci3 已提交
195 196 197 198

            # bottleneck sequences
            i = 1
            in_c = int(32 * self.scale)
C
ceci3 已提交
199
            for layer_setting in self.bottleneck_params_list:
C
ceci3 已提交
200 201
                t, c, n, s, k = layer_setting
                i += 1
C
ceci3 已提交
202 203
                #print(input)
                input, depthwise_output = self._invresi_blocks(
C
ceci3 已提交
204 205 206 207 208 209 210
                    input=input,
                    in_c=in_c,
                    t=t,
                    c=int(c * self.scale),
                    n=n,
                    s=s,
                    k=k,
C
ceci3 已提交
211
                    name='mobilenetv2_conv' + str(i))
C
ceci3 已提交
212
                in_c = int(c * self.scale)
C
ceci3 已提交
213
                layer_count += 1
C
ceci3 已提交
214

C
ceci3 已提交
215
                ### decode_points and end_points means block num
C
ceci3 已提交
216 217 218 219 220
                if check_points(layer_count, decode_points):
                    decode_ends[layer_count] = depthwise_output

                if check_points(layer_count, end_points):
                    return input, decode_ends
C
ceci3 已提交
221

C
ceci3 已提交
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
            # last conv
            input = conv_bn_layer(
                input=input,
                num_filters=int(1280 * self.scale)
                if self.scale > 1.0 else 1280,
                filter_size=1,
                stride=1,
                padding='SAME',
                act='relu6',
                name='mobilenetv2_conv' + str(i + 1))

            input = fluid.layers.pool2d(
                input=input,
                pool_size=7,
                pool_stride=1,
                pool_type='avg',
                global_pooling=True,
                name='mobilenetv2_last_pool')

C
ceci3 已提交
241 242
            # if output_size is 1, add fc layer in the end
            if self.output_size == 1:
243 244 245
                input = fluid.layers.fc(
                    input=input,
                    size=self.class_dim,
C
ceci3 已提交
246 247
                    param_attr=ParamAttr(name='mobilenetv2_fc_weights'),
                    bias_attr=ParamAttr(name='mobilenetv2_fc_offset'))
C
ceci3 已提交
248 249 250 251 252
            else:
                assert self.output_size == input.shape[2], \
                          ("output_size must EQUAL to input_size / (2^block_num)."
                          "But receive input_size={}, output_size={}, block_num={}".format(
                          self.input_size, self.output_size, self.block_num))
C
ceci3 已提交
253

C
ceci3 已提交
254 255
            return input

C
ceci3 已提交
256
        return net_arch
C
ceci3 已提交
257

C
ceci3 已提交
258
    def _shortcut(self, input, data_residual):
C
ceci3 已提交
259 260
        """Build shortcut layer.
        Args:
C
ceci3 已提交
261 262
            input(Variable): input.
            data_residual(Variable): residual layer.
C
ceci3 已提交
263 264 265 266 267
        Returns:
            Variable, layer output.
        """
        return fluid.layers.elementwise_add(input, data_residual)

C
ceci3 已提交
268
    def _inverted_residual_unit(self,
W
wanghaoshuang 已提交
269 270 271 272 273 274 275 276 277
                                input,
                                num_in_filter,
                                num_filters,
                                ifshortcut,
                                stride,
                                filter_size,
                                expansion_factor,
                                reduction_ratio=4,
                                name=None):
C
ceci3 已提交
278 279
        """Build inverted residual unit.
        Args:
C
ceci3 已提交
280 281 282 283 284 285
            input(Variable), input.
            num_in_filter(int), number of in filters.
            num_filters(int), number of filters.
            ifshortcut(bool), whether using shortcut.
            stride(int), stride.
            filter_size(int), filter size.
C
ceci3 已提交
286
            padding(str|int|list), padding.
C
ceci3 已提交
287 288
            expansion_factor(float), expansion factor.
            name(str), name.
C
ceci3 已提交
289 290 291 292
        Returns:
            Variable, layers output.
        """
        num_expfilter = int(round(num_in_filter * expansion_factor))
C
ceci3 已提交
293
        channel_expand = conv_bn_layer(
C
ceci3 已提交
294 295 296 297
            input=input,
            num_filters=num_expfilter,
            filter_size=1,
            stride=1,
C
ceci3 已提交
298
            padding='SAME',
C
ceci3 已提交
299
            num_groups=1,
C
ceci3 已提交
300
            act='relu6',
C
ceci3 已提交
301 302
            name=name + '_expand')

C
ceci3 已提交
303
        bottleneck_conv = conv_bn_layer(
C
ceci3 已提交
304 305 306 307
            input=channel_expand,
            num_filters=num_expfilter,
            filter_size=filter_size,
            stride=stride,
C
ceci3 已提交
308
            padding='SAME',
C
ceci3 已提交
309
            num_groups=num_expfilter,
C
ceci3 已提交
310
            act='relu6',
C
ceci3 已提交
311 312 313
            name=name + '_dwise',
            use_cudnn=False)

C
ceci3 已提交
314 315
        depthwise_output = bottleneck_conv

C
ceci3 已提交
316
        linear_out = conv_bn_layer(
C
ceci3 已提交
317 318 319 320
            input=bottleneck_conv,
            num_filters=num_filters,
            filter_size=1,
            stride=1,
C
ceci3 已提交
321
            padding='SAME',
C
ceci3 已提交
322
            num_groups=1,
C
ceci3 已提交
323
            act=None,
C
ceci3 已提交
324 325 326
            name=name + '_linear')
        out = linear_out
        if ifshortcut:
C
ceci3 已提交
327
            out = self._shortcut(input=input, data_residual=out)
C
ceci3 已提交
328
        return out, depthwise_output
C
ceci3 已提交
329

C
ceci3 已提交
330
    def _invresi_blocks(self, input, in_c, t, c, n, s, k, name=None):
C
ceci3 已提交
331 332 333 334 335 336 337 338 339 340 341 342 343
        """Build inverted residual blocks.
        Args:
            input: Variable, input.
            in_c: int, number of in filters.
            t: float, expansion factor.
            c: int, number of filters.
            n: int, number of layers.
            s: int, stride.
            k: int, filter size.
            name: str, name.
        Returns:
            Variable, layers output.
        """
C
ceci3 已提交
344
        first_block, depthwise_output = self._inverted_residual_unit(
C
ceci3 已提交
345 346 347 348 349 350 351 352 353 354 355 356 357
            input=input,
            num_in_filter=in_c,
            num_filters=c,
            ifshortcut=False,
            stride=s,
            filter_size=k,
            expansion_factor=t,
            name=name + '_1')

        last_residual_block = first_block
        last_c = c

        for i in range(1, n):
C
ceci3 已提交
358
            last_residual_block, depthwise_output = self._inverted_residual_unit(
C
ceci3 已提交
359 360 361 362 363 364 365 366
                input=last_residual_block,
                num_in_filter=last_c,
                num_filters=c,
                ifshortcut=True,
                stride=1,
                filter_size=k,
                expansion_factor=t,
                name=name + '_' + str(i + 1))
C
ceci3 已提交
367
        return last_residual_block, depthwise_output