ghostnet.py 11.8 KB
Newer Older
W
wqz960 已提交
1
from __future__ import absolute_import
W
wqz960 已提交
2 3 4 5 6
from __future__ import division
from __future__ import print_function

import math

W
wqz960 已提交
7
import paddle.fluid as fluid
W
wqz960 已提交
8 9
from paddle.fluid.param_attr import ParamAttr

W
wqz960 已提交
10
__all__ = ["GhostNet", "GhostNet_x0_5", "GhostNet_x1_0", "GhostNet_x1_3"]
W
wqz960 已提交
11

W
wqz960 已提交
12

W
wqz960 已提交
13
class GhostNet():
W
wqz960 已提交
14
    def __init__(self, scale):
W
wqz960 已提交
15
        cfgs = [
W
wqz960 已提交
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
            # k, t, c, SE, s
            [3, 16, 16, 0, 1],
            [3, 48, 24, 0, 2],
            [3, 72, 24, 0, 1],
            [5, 72, 40, 1, 2],
            [5, 120, 40, 1, 1],
            [3, 240, 80, 0, 2],
            [3, 200, 80, 0, 1],
            [3, 184, 80, 0, 1],
            [3, 184, 80, 0, 1],
            [3, 480, 112, 1, 1],
            [3, 672, 112, 1, 1],
            [5, 672, 160, 1, 2],
            [5, 960, 160, 0, 1],
            [5, 960, 160, 1, 1],
            [5, 960, 160, 0, 1],
            [5, 960, 160, 1, 1]
        ]
W
wqz960 已提交
34
        self.cfgs = cfgs
W
wqz960 已提交
35 36 37 38 39
        self.scale = scale

    def net(self, input, class_dim=1000):
        # build first layer:
        output_channel = int(self._make_divisible(16 * self.scale, 4))
W
wqz960 已提交
40 41 42 43 44 45 46
        x = self.conv_bn_layer(input=input,
                            num_filters=output_channel,
                            filter_size=3,
                            stride=2,
                            groups=1,
                            act="relu",
                            name="conv1")
W
wqz960 已提交
47 48 49 50
        # build inverted residual blocks
        idx = 0
        for k, exp_size, c, use_se, s in self.cfgs:
            output_channel = int(self._make_divisible(c * self.scale, 4))
W
wqz960 已提交
51 52 53 54 55 56 57 58
            hidden_channel = int(self._make_divisible(exp_size * self.scale, 4))
            x = self.ghost_bottleneck(input=x,
                                    hidden_dim=hidden_channel,
                                    output=output_channel,
                                    kernel_size=k,
                                    stride=s,
                                    use_se=use_se,
                                    name="_ghostbottleneck_" + str(idx))
W
wqz960 已提交
59 60
            idx += 1
        # build last several layers
W
wqz960 已提交
61 62 63 64 65 66 67 68 69
        output_channel = int(self._make_divisible(exp_size * self.scale, 4))
        x = self.conv_bn_layer(input=x,
                            num_filters=output_channel,
                            filter_size=1,
                            stride=1,
                            groups=1,
                            act="relu",
                            name="conv_last")
        x = fluid.layers.pool2d(input=x, pool_type='avg', global_pooling=True)
W
wqz960 已提交
70 71 72
        output_channel = 1280

        stdv = 1.0 / math.sqrt(x.shape[1] * 1.0)
W
wqz960 已提交
73 74 75 76 77 78
        out = self.conv_bn_layer(input=x,
                          num_filters=output_channel,
                          filter_size=1,
                          stride=1,
                          act="relu",
                          name="fc_0")
W
wqz960 已提交
79 80
        out = fluid.layers.dropout(x=out, dropout_prob=0.2)
        stdv = 1.0 / math.sqrt(out.shape[1] * 1.0)
W
wqz960 已提交
81 82 83 84 85
        out = fluid.layers.fc(input=out,
                            size=class_dim,
                            param_attr=ParamAttr(name="fc_1_weights",
                                initializer=fluid.initializer.Uniform(-stdv, stdv)),
                            bias_attr=ParamAttr(name="fc_1_offset"))
W
wqz960 已提交
86

W
wqz960 已提交
87 88
        return out    
        
W
wqz960 已提交
89 90 91 92 93 94 95 96 97 98 99 100 101 102
    def _make_divisible(self, v, divisor, min_value=None):
        """
        This function is taken from the original tf repo.
        It ensures that all layers have a channel number that is divisible by 8
        It can be seen here:
        https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
        """
        if min_value is None:
            min_value = divisor
        new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
        # Make sure that round down does not go down by more than 10%.
        if new_v < 0.9 * v:
            new_v += divisor
        return new_v
W
wqz960 已提交
103 104 105

    def conv_bn_layer(self,
                      input,
W
wqz960 已提交
106 107 108 109 110
                      num_filters,
                      filter_size,
                      stride=1,
                      groups=1,
                      act=None,
W
wqz960 已提交
111
                      name=None):
W
wqz960 已提交
112 113 114 115 116 117 118 119 120 121
        x = fluid.layers.conv2d(input=input,
                                num_filters=num_filters,
                                filter_size=filter_size,
                                stride=stride,
                                padding=(filter_size - 1) // 2,
                                groups=groups,
                                act=None,
                                param_attr=ParamAttr(
                                    initializer=fluid.initializer.MSRA(), name=name + "_weights"),
                                bias_attr=False)
W
fix +  
wqz960 已提交
122
        bn_name = name + "_bn"
W
wqz960 已提交
123 124 125
        x = fluid.layers.batch_norm(input=x,
                                    act=act,
                                    param_attr=ParamAttr(
W
fix +  
wqz960 已提交
126
                                        name=bn_name + "_scale",
W
wqz960 已提交
127 128 129
                                        regularizer=fluid.regularizer.L2DecayRegularizer(
                                        regularization_coeff=0.0)),
                                    bias_attr=ParamAttr(
W
fix +  
wqz960 已提交
130
                                        name=bn_name + "_offset",
W
wqz960 已提交
131 132
                                        regularizer=fluid.regularizer.L2DecayRegularizer(
                                        regularization_coeff=0.0)),
W
fix +  
wqz960 已提交
133 134
                                    moving_mean_name=bn_name + "_mean",
                                    moving_variance_name=name + "_variance")
W
wqz960 已提交
135
        return x
W
wqz960 已提交
136

W
wqz960 已提交
137 138
    def se_block(self, input, num_channels, reduction_ratio=4, name=None):
        pool = fluid.layers.pool2d(input=input, pool_type='avg', global_pooling=True, use_cudnn=False)
W
wqz960 已提交
139
        stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
W
wqz960 已提交
140 141 142 143 144 145 146
        squeeze = fluid.layers.fc(input=pool,
                                size=num_channels // reduction_ratio,
                                act='relu',
                                param_attr=fluid.param_attr.ParamAttr(
                                    initializer=fluid.initializer.Uniform(-stdv, stdv),
                                    name=name + '_1_weights'),
                                bias_attr=ParamAttr(name=name + '_1_offset'))
W
wqz960 已提交
147
        stdv = 1.0 / math.sqrt(squeeze.shape[1] * 1.0)
W
wqz960 已提交
148 149
        excitation = fluid.layers.fc(input=squeeze,
                                    size=num_channels,
W
fix +  
wqz960 已提交
150
                                    act="hard_sigmoid",
W
wqz960 已提交
151 152 153 154
                                    param_attr=fluid.param_attr.ParamAttr(
                                        initializer=fluid.initializer.Uniform(-stdv, stdv),
                                        name=name + '_2_weights'),
                                    bias_attr=ParamAttr(name=name + '_2_offset'))
W
fix +  
wqz960 已提交
155 156
        #excitation = fluid.layers.clip(x=excitation, min=0, max=1)
        print("using hardsigmoid")
W
wqz960 已提交
157 158
        se_scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0)
        return se_scale
W
wqz960 已提交
159 160

    def depthwise_conv(self,
W
wqz960 已提交
161 162
                       input,
                       output,
W
wqz960 已提交
163 164 165
                       kernel_size,
                       stride=1,
                       relu=False,
W
wqz960 已提交
166
                       name=None):
W
wqz960 已提交
167 168 169 170 171 172 173
        return self.conv_bn_layer(input=input,
                                num_filters=output,
                                filter_size=kernel_size,
                                stride=stride,
                                groups=input.shape[1],
                                act="relu" if relu else None,
                                name=name + "_depthwise")
W
wqz960 已提交
174

W
wqz960 已提交
175
    def ghost_module(self,
W
wqz960 已提交
176 177
                    input,
                    output,
W
wqz960 已提交
178 179 180 181 182
                    kernel_size=1,
                    ratio=2,
                    dw_size=3,
                    stride=1,
                    relu=True,
W
wqz960 已提交
183
                    name=None):
W
wqz960 已提交
184 185
        self.output = output
        init_channels = int(math.ceil(output / ratio))
W
wqz960 已提交
186
        new_channels = int(init_channels * (ratio - 1))
W
wqz960 已提交
187 188 189 190 191 192 193 194 195 196 197 198 199 200 201
        primary_conv = self.conv_bn_layer(input=input,
                                        num_filters=init_channels,
                                        filter_size=kernel_size,
                                        stride=stride,
                                        groups=1,
                                        act="relu" if relu else None,
                                        name=name + "_primary_conv")
        cheap_operation = self.conv_bn_layer(input=primary_conv,
                                            num_filters=new_channels,
                                            filter_size=dw_size,
                                            stride=1,
                                            groups=init_channels,
                                            act="relu" if relu else None,
                                            name=name + "_cheap_operation")
        out = fluid.layers.concat([primary_conv, cheap_operation], axis=1)
W
wqz960 已提交
202
        return out
W
wqz960 已提交
203

W
wqz960 已提交
204
    def ghost_bottleneck(self,
W
wqz960 已提交
205
                        input,
W
wqz960 已提交
206
                        hidden_dim,
W
wqz960 已提交
207
                        output,
W
wqz960 已提交
208
                        kernel_size,
W
wqz960 已提交
209 210
                        stride,
                        use_se,
W
wqz960 已提交
211
                        name=None):
W
wqz960 已提交
212 213 214 215 216 217 218
        inp_channels = input.shape[1]
        x = self.ghost_module(input=input,
                            output=hidden_dim,
                            kernel_size=1,
                            stride=1,
                            relu=True,
                            name=name + "_ghost_module_1")
W
wqz960 已提交
219
        if stride == 2:
W
wqz960 已提交
220 221 222 223 224 225
            x = self.depthwise_conv(input=x,
                                    output=hidden_dim,
                                    kernel_size=kernel_size,
                                    stride=stride,
                                    relu=False,
                                    name=name + "_depthwise")
W
wqz960 已提交
226
        if use_se:
W
fix +  
wqz960 已提交
227
            x = self.se_block(input=x, num_channels=hidden_dim, name=name + "_se")
W
wqz960 已提交
228 229 230 231 232 233 234
        x = self.ghost_module(input=x,
                            output=output,
                            kernel_size=1,
                            relu=False,
                            name=name + "_ghost_module_2")
        if stride == 1 and inp_channels == output:
            shortcut = input
W
wqz960 已提交
235
        else:
W
wqz960 已提交
236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
            shortcut = self.depthwise_conv(input=input,
                                        output=inp_channels,
                                        kernel_size=kernel_size,
                                        stride=stride,
                                        relu=False,
                                        name=name + "_shortcut_depthwise")
            shortcut = self.conv_bn_layer(input=shortcut,
                                        num_filters=output,
                                        filter_size=1,
                                        stride=1,
                                        groups=1,
                                        act=None,
                                        name=name + "_shortcut_conv")
        return fluid.layers.elementwise_add(x=x, 
                                            y=shortcut,
                                            axis=-1)
W
wqz960 已提交
252

W
wqz960 已提交
253

W
wqz960 已提交
254 255
def GhostNet_x0_5():
    model = GhostNet(scale=0.5)
W
wqz960 已提交
256 257
    return model

W
wqz960 已提交
258

W
wqz960 已提交
259 260
def GhostNet_x1_0():
    model = GhostNet(scale=1.0)
W
wqz960 已提交
261 262
    return model

W
wqz960 已提交
263

W
wqz960 已提交
264 265
def GhostNet_x1_3():
    model = GhostNet(scale=1.3)
W
wqz960 已提交
266
    return model
W
wqz960 已提交
267

W
fix +  
wqz960 已提交
268 269 270 271 272 273 274 275 276 277 278 279 280
if __name__ == "__main__":
    image = fluid.data(name='image', shape=[16, 3, 224, 224], dtype='float32')
    
    model = GhostNet_x1_0()
    out = model.net(input=image, class_dim=1000)
    test_program = fluid.default_main_program().clone(for_test=True)

    place = fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    #lltotal_flops_params, is_quantize = summary(test_program)
    fluid.save(test_program, "ghostnet1_3")