mobilenetv1.py 9.5 KB
Newer Older
C
ceci3 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
# Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
from .search_space_base import SearchSpaceBase
from .base_layer import conv_bn_layer
from .search_space_registry import SEARCHSPACE
C
update  
ceci3 已提交
25
from .utils import check_points
C
ceci3 已提交
26 27 28 29 30 31

__all__ = ["MobileNetV1Space"]


@SEARCHSPACE.register
class MobileNetV1Space(SearchSpaceBase):
C
update  
ceci3 已提交
32
    def __init__(self, input_size, output_size, block_num, block_mask):
C
ceci3 已提交
33
        super(MobileNetV1Space, self).__init__(input_size, output_size,
C
ceci3 已提交
34
                                               block_num, block_mask)
C
ceci3 已提交
35
        # self.head_num means the channel of first convolution
C
ceci3 已提交
36
        self.head_num = np.array([3, 4, 8, 12, 16, 24, 32])  # 7
C
ceci3 已提交
37
        # self.filter_num1 ~ self.filtet_num9 means channel of the following convolution
C
ceci3 已提交
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
        self.filter_num1 = np.array([3, 4, 8, 12, 16, 24, 32, 48])  # 8
        self.filter_num2 = np.array([8, 12, 16, 24, 32, 48, 64, 80])  # 8
        self.filter_num3 = np.array(
            [16, 24, 32, 48, 64, 80, 96, 128, 144, 160])  #10
        self.filter_num4 = np.array(
            [24, 32, 48, 64, 80, 96, 128, 144, 160, 192])  #10
        self.filter_num5 = np.array(
            [32, 48, 64, 80, 96, 128, 144, 160, 192, 224, 256, 320])  #12
        self.filter_num6 = np.array(
            [64, 80, 96, 128, 144, 160, 192, 224, 256, 320, 384])  #11
        self.filter_num7 = np.array([
            64, 80, 96, 128, 144, 160, 192, 224, 256, 320, 384, 512, 1024, 1048
        ])  #14
        self.filter_num8 = np.array(
            [128, 144, 160, 192, 224, 256, 320, 384, 512, 576, 640, 704,
             768])  #13
        self.filter_num9 = np.array(
            [160, 192, 224, 256, 320, 384, 512, 640, 768, 832, 1024,
             1048])  #12
C
ceci3 已提交
57
        # self.k_size means kernel size
C
ceci3 已提交
58
        self.k_size = np.array([3, 5])  #2
C
ceci3 已提交
59
        # self.repeat means repeat_num in forth downsample 
C
ceci3 已提交
60 61 62 63 64 65 66
        self.repeat = np.array([1, 2, 3, 4, 5, 6])  #6

    def init_tokens(self):
        """
        The initial token.
        The first one is the index of the first layers' channel in self.head_num,
        each line in the following represent the index of the [filter_num1, filter_num2, kernel_size]
C
ceci3 已提交
67
        and depth means repeat times for forth downsample
C
ceci3 已提交
68 69 70 71 72 73 74 75 76 77 78 79 80 81
        """
        # yapf: disable
        base_init_tokens = [6,  # 32
            6, 6, 0,  # 32, 64, 3
            6, 7, 0,  # 64, 128, 3
            7, 6, 0,  # 128, 128, 3
            6, 10, 0,  # 128, 256, 3
            10, 8, 0,  # 256, 256, 3
            8, 11, 0,  # 256, 512, 3
            4,  # depth 5
            11, 8, 0,  # 512, 512, 3
            8, 10, 0,  # 512, 1024, 3
            10, 10, 0]  # 1024, 1024, 3
        # yapf: enable
C
ceci3 已提交
82
        return base_init_tokens
C
ceci3 已提交
83 84 85 86 87

    def range_table(self):
        """
        Get range table of current search space, constrains the range of tokens.
        """
C
ceci3 已提交
88 89 90 91 92 93 94 95 96 97 98 99 100
        # yapf: disable
        base_range_table = [len(self.head_num),
            len(self.filter_num1), len(self.filter_num2), len(self.k_size),
            len(self.filter_num2), len(self.filter_num3), len(self.k_size),
            len(self.filter_num3), len(self.filter_num4), len(self.k_size),
            len(self.filter_num4), len(self.filter_num5), len(self.k_size),
            len(self.filter_num5), len(self.filter_num6), len(self.k_size),
            len(self.filter_num6), len(self.filter_num7), len(self.k_size),
            len(self.repeat),
            len(self.filter_num7), len(self.filter_num8), len(self.k_size),
            len(self.filter_num8), len(self.filter_num9), len(self.k_size),
            len(self.filter_num9), len(self.filter_num9), len(self.k_size)]
        # yapf: enable
C
ceci3 已提交
101
        return base_range_table
C
ceci3 已提交
102 103 104 105 106 107

    def token2arch(self, tokens=None):

        if tokens is None:
            tokens = self.tokens()

C
ceci3 已提交
108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
        self.bottleneck_param_list = []

        # tokens[0] = 32
        # 32, 64
        self.bottleneck_param_list.append(
            (self.filter_num1[tokens[1]], self.filter_num2[tokens[2]], 1,
             self.k_size[tokens[3]]))
        # 64 128 128 128
        self.bottleneck_param_list.append(
            (self.filter_num2[tokens[4]], self.filter_num3[tokens[5]], 2,
             self.k_size[tokens[6]]))
        self.bottleneck_param_list.append(
            (self.filter_num3[tokens[7]], self.filter_num4[tokens[8]], 1,
             self.k_size[tokens[9]]))
        # 128 256 256 256
        self.bottleneck_param_list.append(
            (self.filter_num4[tokens[10]], self.filter_num5[tokens[11]], 2,
             self.k_size[tokens[12]]))
        self.bottleneck_param_list.append(
            (self.filter_num5[tokens[13]], self.filter_num6[tokens[14]], 1,
             self.k_size[tokens[15]]))
        # 256 512 (512 512) *  5
        self.bottleneck_param_list.append(
            (self.filter_num6[tokens[16]], self.filter_num7[tokens[17]], 2,
             self.k_size[tokens[18]]))
        for i in range(self.repeat[tokens[19]]):
            self.bottleneck_param_list.append(
C
update  
ceci3 已提交
135 136
                (self.filter_num7[tokens[20]], self.filter_num8[tokens[21]], 1,
                 self.k_size[tokens[22]]))
C
ceci3 已提交
137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158
        # 512 1024 1024 1024
        self.bottleneck_param_list.append(
            (self.filter_num8[tokens[23]], self.filter_num9[tokens[24]], 2,
             self.k_size[tokens[25]]))
        self.bottleneck_param_list.append(
            (self.filter_num9[tokens[26]], self.filter_num9[tokens[27]], 1,
             self.k_size[tokens[28]]))

        def _modify_bottle_params(output_stride=None):
            if output_stride is not None and output_stride % 2 != 0:
                raise Exception("output stride must to be even number")
            if output_stride is None:
                return
            else:
                stride = 2
                for i, layer_setting in enumerate(self.bottleneck_params_list):
                    f1, f2, s, ks = layer_setting
                    stride = stride * s
                    if stride > output_stride:
                        s = 1
                    self.bottleneck_params_list[i] = (f1, f2, s, ks)

C
update  
ceci3 已提交
159 160
        def net_arch(input,
                     scale=1.0,
C
update  
ceci3 已提交
161
                     return_block=None,
C
update  
ceci3 已提交
162 163
                     end_points=None,
                     output_stride=None):
C
ceci3 已提交
164 165 166 167 168
            self.scale = scale
            _modify_bottle_params(output_stride)

            decode_ends = dict()

C
ceci3 已提交
169 170 171 172 173
            input = conv_bn_layer(
                input=input,
                filter_size=3,
                num_filters=self.head_num[tokens[0]],
                stride=2,
C
ceci3 已提交
174
                name='mobilenetv1_conv1')
C
ceci3 已提交
175

C
ceci3 已提交
176
            layer_count = 1
C
ceci3 已提交
177
            for i, layer_setting in enumerate(self.bottleneck_param_list):
C
ceci3 已提交
178
                filter_num1, filter_num2, stride, kernel_size = layer_setting
C
update  
ceci3 已提交
179 180 181 182 183 184 185 186
                if stride == 2:
                    layer_count += 1
                ### return_block and end_points means block num
                if check_points((layer_count - 1), return_block):
                    decode_ends[layer_count - 1] = input

                if check_points((layer_count - 1), end_points):
                    return input, decode_ends
C
ceci3 已提交
187 188 189 190 191 192 193 194 195 196
                input = self._depthwise_separable(
                    input=input,
                    num_filters1=filter_num1,
                    num_filters2=filter_num2,
                    num_groups=filter_num1,
                    stride=stride,
                    scale=self.scale,
                    kernel_size=kernel_size,
                    name='mobilenetv1_{}'.format(str(i + 1)))

C
update  
ceci3 已提交
197 198 199
            ### return_block and end_points means block num
            if check_points(layer_count, end_points):
                return input, decode_ends
C
ceci3 已提交
200

C
update  
ceci3 已提交
201 202 203 204 205
            input = fluid.layers.pool2d(
                input=input,
                pool_type='avg',
                global_pooling=True,
                name='mobilenetv1_last_pool')
C
ceci3 已提交
206 207 208 209 210 211 212 213 214 215 216 217 218 219

            return input

        return net_arch

    def _depthwise_separable(self,
                             input,
                             num_filters1,
                             num_filters2,
                             num_groups,
                             stride,
                             scale,
                             kernel_size,
                             name=None):
C
ceci3 已提交
220 221 222 223 224 225 226 227
        num_groups = input.shape[1]

        s_oc = int(num_filters1 * scale)
        if s_oc > num_groups:
            output_channel = s_oc - (s_oc % num_groups)
        else:
            output_channel = num_groups

C
ceci3 已提交
228 229 230
        depthwise_conv = conv_bn_layer(
            input=input,
            filter_size=kernel_size,
C
update  
ceci3 已提交
231
            num_filters=output_channel,
C
ceci3 已提交
232
            stride=stride,
C
update  
ceci3 已提交
233
            num_groups=num_groups,
C
ceci3 已提交
234 235 236 237 238 239 240 241 242 243
            use_cudnn=False,
            name=name + '_dw')
        pointwise_conv = conv_bn_layer(
            input=depthwise_conv,
            filter_size=1,
            num_filters=int(num_filters2 * scale),
            stride=1,
            name=name + '_sep')

        return pointwise_conv