mobilenetv1.py 9.8 KB
Newer Older
C
ceci3 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
# Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
from .search_space_base import SearchSpaceBase
from .base_layer import conv_bn_layer
from .search_space_registry import SEARCHSPACE

__all__ = ["MobileNetV1Space"]


@SEARCHSPACE.register
class MobileNetV1Space(SearchSpaceBase):
C
update  
ceci3 已提交
31
    def __init__(self, input_size, output_size, block_num, block_mask):
C
ceci3 已提交
32
        super(MobileNetV1Space, self).__init__(input_size, output_size,
C
ceci3 已提交
33
                                               block_num, block_mask)
C
ceci3 已提交
34
        # self.head_num means the channel of first convolution
C
ceci3 已提交
35
        self.head_num = np.array([3, 4, 8, 12, 16, 24, 32])  # 7
C
ceci3 已提交
36
        # self.filter_num1 ~ self.filtet_num9 means channel of the following convolution
C
ceci3 已提交
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
        self.filter_num1 = np.array([3, 4, 8, 12, 16, 24, 32, 48])  # 8
        self.filter_num2 = np.array([8, 12, 16, 24, 32, 48, 64, 80])  # 8
        self.filter_num3 = np.array(
            [16, 24, 32, 48, 64, 80, 96, 128, 144, 160])  #10
        self.filter_num4 = np.array(
            [24, 32, 48, 64, 80, 96, 128, 144, 160, 192])  #10
        self.filter_num5 = np.array(
            [32, 48, 64, 80, 96, 128, 144, 160, 192, 224, 256, 320])  #12
        self.filter_num6 = np.array(
            [64, 80, 96, 128, 144, 160, 192, 224, 256, 320, 384])  #11
        self.filter_num7 = np.array([
            64, 80, 96, 128, 144, 160, 192, 224, 256, 320, 384, 512, 1024, 1048
        ])  #14
        self.filter_num8 = np.array(
            [128, 144, 160, 192, 224, 256, 320, 384, 512, 576, 640, 704,
             768])  #13
        self.filter_num9 = np.array(
            [160, 192, 224, 256, 320, 384, 512, 640, 768, 832, 1024,
             1048])  #12
C
ceci3 已提交
56
        # self.k_size means kernel size
C
ceci3 已提交
57
        self.k_size = np.array([3, 5])  #2
C
ceci3 已提交
58
        # self.repeat means repeat_num in forth downsample 
C
ceci3 已提交
59 60 61 62 63 64 65
        self.repeat = np.array([1, 2, 3, 4, 5, 6])  #6

    def init_tokens(self):
        """
        The initial token.
        The first one is the index of the first layers' channel in self.head_num,
        each line in the following represent the index of the [filter_num1, filter_num2, kernel_size]
C
ceci3 已提交
66
        and depth means repeat times for forth downsample
C
ceci3 已提交
67 68 69 70 71 72 73 74 75 76 77 78 79 80
        """
        # yapf: disable
        base_init_tokens = [6,  # 32
            6, 6, 0,  # 32, 64, 3
            6, 7, 0,  # 64, 128, 3
            7, 6, 0,  # 128, 128, 3
            6, 10, 0,  # 128, 256, 3
            10, 8, 0,  # 256, 256, 3
            8, 11, 0,  # 256, 512, 3
            4,  # depth 5
            11, 8, 0,  # 512, 512, 3
            8, 10, 0,  # 512, 1024, 3
            10, 10, 0]  # 1024, 1024, 3
        # yapf: enable
C
ceci3 已提交
81
        return base_init_tokens
C
ceci3 已提交
82 83 84 85 86

    def range_table(self):
        """
        Get range table of current search space, constrains the range of tokens.
        """
C
ceci3 已提交
87 88 89 90 91 92 93 94 95 96 97 98 99
        # yapf: disable
        base_range_table = [len(self.head_num),
            len(self.filter_num1), len(self.filter_num2), len(self.k_size),
            len(self.filter_num2), len(self.filter_num3), len(self.k_size),
            len(self.filter_num3), len(self.filter_num4), len(self.k_size),
            len(self.filter_num4), len(self.filter_num5), len(self.k_size),
            len(self.filter_num5), len(self.filter_num6), len(self.k_size),
            len(self.filter_num6), len(self.filter_num7), len(self.k_size),
            len(self.repeat),
            len(self.filter_num7), len(self.filter_num8), len(self.k_size),
            len(self.filter_num8), len(self.filter_num9), len(self.k_size),
            len(self.filter_num9), len(self.filter_num9), len(self.k_size)]
        # yapf: enable
C
ceci3 已提交
100
        return base_range_table
C
ceci3 已提交
101 102 103 104 105 106

    def token2arch(self, tokens=None):

        if tokens is None:
            tokens = self.tokens()

C
ceci3 已提交
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
        self.bottleneck_param_list = []

        # tokens[0] = 32
        # 32, 64
        self.bottleneck_param_list.append(
            (self.filter_num1[tokens[1]], self.filter_num2[tokens[2]], 1,
             self.k_size[tokens[3]]))
        # 64 128 128 128
        self.bottleneck_param_list.append(
            (self.filter_num2[tokens[4]], self.filter_num3[tokens[5]], 2,
             self.k_size[tokens[6]]))
        self.bottleneck_param_list.append(
            (self.filter_num3[tokens[7]], self.filter_num4[tokens[8]], 1,
             self.k_size[tokens[9]]))
        # 128 256 256 256
        self.bottleneck_param_list.append(
            (self.filter_num4[tokens[10]], self.filter_num5[tokens[11]], 2,
             self.k_size[tokens[12]]))
        self.bottleneck_param_list.append(
            (self.filter_num5[tokens[13]], self.filter_num6[tokens[14]], 1,
             self.k_size[tokens[15]]))
        # 256 512 (512 512) *  5
        self.bottleneck_param_list.append(
            (self.filter_num6[tokens[16]], self.filter_num7[tokens[17]], 2,
             self.k_size[tokens[18]]))
        for i in range(self.repeat[tokens[19]]):
            self.bottleneck_param_list.append(
C
update  
ceci3 已提交
134 135
                (self.filter_num7[tokens[20]], self.filter_num8[tokens[21]], 1,
                 self.k_size[tokens[22]]))
C
ceci3 已提交
136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
        # 512 1024 1024 1024
        self.bottleneck_param_list.append(
            (self.filter_num8[tokens[23]], self.filter_num9[tokens[24]], 2,
             self.k_size[tokens[25]]))
        self.bottleneck_param_list.append(
            (self.filter_num9[tokens[26]], self.filter_num9[tokens[27]], 1,
             self.k_size[tokens[28]]))

        def _modify_bottle_params(output_stride=None):
            if output_stride is not None and output_stride % 2 != 0:
                raise Exception("output stride must to be even number")
            if output_stride is None:
                return
            else:
                stride = 2
                for i, layer_setting in enumerate(self.bottleneck_params_list):
                    f1, f2, s, ks = layer_setting
                    stride = stride * s
                    if stride > output_stride:
                        s = 1
                    self.bottleneck_params_list[i] = (f1, f2, s, ks)

C
update  
ceci3 已提交
158 159 160 161 162
        def net_arch(input,
                     scale=1.0,
                     return_block=[],
                     end_points=None,
                     output_stride=None):
C
ceci3 已提交
163 164 165 166 167 168 169 170 171 172 173 174 175 176
            self.scale = scale
            _modify_bottle_params(output_stride)

            decode_ends = dict()

            def check_points(count, points):
                if points is None:
                    return False
                else:
                    if isinstance(points, list):
                        return (True if count in points else False)
                    else:
                        return (True if count == points else False)

C
ceci3 已提交
177 178 179 180 181
            input = conv_bn_layer(
                input=input,
                filter_size=3,
                num_filters=self.head_num[tokens[0]],
                stride=2,
C
ceci3 已提交
182
                name='mobilenetv1_conv1')
C
ceci3 已提交
183

C
ceci3 已提交
184
            layer_count = 1
C
ceci3 已提交
185
            for i, layer_setting in enumerate(self.bottleneck_param_list):
C
ceci3 已提交
186
                filter_num1, filter_num2, stride, kernel_size = layer_setting
C
update  
ceci3 已提交
187 188 189 190 191 192 193 194
                if stride == 2:
                    layer_count += 1
                ### return_block and end_points means block num
                if check_points((layer_count - 1), return_block):
                    decode_ends[layer_count - 1] = input

                if check_points((layer_count - 1), end_points):
                    return input, decode_ends
C
ceci3 已提交
195 196 197 198 199 200 201 202 203 204
                input = self._depthwise_separable(
                    input=input,
                    num_filters1=filter_num1,
                    num_filters2=filter_num2,
                    num_groups=filter_num1,
                    stride=stride,
                    scale=self.scale,
                    kernel_size=kernel_size,
                    name='mobilenetv1_{}'.format(str(i + 1)))

C
update  
ceci3 已提交
205 206 207
            ### return_block and end_points means block num
            if check_points(layer_count, end_points):
                return input, decode_ends
C
ceci3 已提交
208

C
update  
ceci3 已提交
209 210 211 212 213
            input = fluid.layers.pool2d(
                input=input,
                pool_type='avg',
                global_pooling=True,
                name='mobilenetv1_last_pool')
C
ceci3 已提交
214 215 216 217 218 219 220 221 222 223 224 225 226 227

            return input

        return net_arch

    def _depthwise_separable(self,
                             input,
                             num_filters1,
                             num_filters2,
                             num_groups,
                             stride,
                             scale,
                             kernel_size,
                             name=None):
C
ceci3 已提交
228 229 230 231 232 233 234 235
        num_groups = input.shape[1]

        s_oc = int(num_filters1 * scale)
        if s_oc > num_groups:
            output_channel = s_oc - (s_oc % num_groups)
        else:
            output_channel = num_groups

C
ceci3 已提交
236 237 238
        depthwise_conv = conv_bn_layer(
            input=input,
            filter_size=kernel_size,
C
update  
ceci3 已提交
239
            num_filters=output_channel,
C
ceci3 已提交
240
            stride=stride,
C
update  
ceci3 已提交
241
            num_groups=num_groups,
C
ceci3 已提交
242 243 244 245 246 247 248 249 250 251
            use_cudnn=False,
            name=name + '_dw')
        pointwise_conv = conv_bn_layer(
            input=depthwise_conv,
            filter_size=1,
            num_filters=int(num_filters2 * scale),
            stride=1,
            name=name + '_sep')

        return pointwise_conv