# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # Code was heavily based on https://github.com/liyunsheng13/micronet # reference: https://arxiv.org/pdf/2108.05894 import math import paddle import paddle.nn as nn from ....utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url MODEL_URLS = { "micronet_m0": "", # TODO "micronet_m1": "", # TODO "micronet_m2": "", # TODO "micronet_m3": "", # TODO } __all__ = MODEL_URLS.keys() NET_CONFIG = { "msnx_dy6_exp4_4M_221": [ #s, n, c, ks, c1, c2, g1, g2, c3, g3, g4,y1,y2,y3,r [2, 1, 8, 3, 2, 2, 0, 4, 8, 2, 2, 2, 0, 1, 1], # 6 ->12(0,0) ->24 ->8(4,2) ->8 [2, 1, 12, 3, 2, 2, 0, 8, 12, 4, 4, 2, 2, 1, 1], # 8 ->16(0,0) ->32 ->16(4,4) ->12 [2, 1, 16, 5, 2, 2, 0, 12, 16, 4, 4, 2, 2, 1, 1], # 16 ->32(0,0) ->64 ->16(8,2) ->16 [1, 1, 32, 5, 1, 4, 4, 4, 32, 4, 4, 2, 2, 1, 1], # 16 ->16(2,8) ->96 ->32(8,4) ->32 [2, 1, 64, 5, 1, 4, 8, 8, 64, 8, 8, 2, 2, 1, 1], # 32 ->32(2,16) ->192 ->64(12,4) ->64 [1, 1, 96, 3, 1, 4, 8, 8, 96, 8, 8, 2, 2, 1, 2], # 64 ->64(3,16) ->384 ->96(16,6) ->96 [1, 1, 384, 3, 1, 4, 12, 12, 0, 0, 0, 2, 2, 1, 2], # 96 ->96(4,24) ->384 ], "msnx_dy6_exp6_6M_221": [ #s, n, c, ks, c1, c2, g1, g2, c3, g3, g4 [2, 1, 8, 3, 2, 2, 0, 6, 8, 2, 2, 2, 0, 1, 1], # 6 ->12(0,0) ->24 ->8(4,2) ->8 [2, 1, 16, 3, 2, 2, 0, 8, 16, 4, 4, 2, 2, 1, 1], # 8 ->16(0,0) ->32 ->16(4,4) ->16 [2, 1, 16, 5, 2, 2, 0, 16, 16, 4, 4, 2, 2, 1, 1], # 16 ->32(0,0) ->64 ->16(8,2) ->16 [1, 1, 32, 5, 1, 6, 4, 4, 32, 4, 4, 2, 2, 1, 1], # 16 ->16(2,8) ->96 ->32(8,4) ->32 [2, 1, 64, 5, 1, 6, 8, 8, 64, 8, 8, 2, 2, 1, 1], # 32 ->32(2,16) ->192 ->64(12,4) ->64 [1, 1, 96, 3, 1, 6, 8, 8, 96, 8, 8, 2, 2, 1, 2], # 64 ->64(3,16) ->384 ->96(16,6) ->96 [1, 1, 576, 3, 1, 6, 12, 12, 0, 0, 0, 2, 2, 1, 2], # 96 ->96(4,24) ->576 ], "msnx_dy9_exp6_12M_221": [ #s, n, c, ks, c1, c2, g1, g2, c3, g3, g4 [2, 1, 12, 3, 2, 2, 0, 8, 12, 4, 4, 2, 0, 1, 1], # 8 ->16(0,0) ->32 ->12(4,3) ->12 [2, 1, 16, 3, 2, 2, 0, 12, 16, 4, 4, 2, 2, 1, 1], # 12 ->24(0,0) ->48 ->16(8,2) ->16 [1, 1, 24, 3, 2, 2, 0, 16, 24, 4, 4, 2, 2, 1, 1], # 16 ->16(0,0) ->64 ->24(8,3) ->24 [2, 1, 32, 5, 1, 6, 6, 6, 32, 4, 4, 2, 2, 1, 1], # 24 ->24(2,12) ->144 ->32(16,2) ->32 [1, 1, 32, 5, 1, 6, 8, 8, 32, 4, 4, 2, 2, 1, 2], # 32 ->32(2,16) ->192 ->32(16,2) ->32 [1, 1, 64, 5, 1, 6, 8, 8, 64, 8, 8, 2, 2, 1, 2], # 32 ->32(2,16) ->192 ->64(12,4) ->64 [2, 1, 96, 5, 1, 6, 8, 8, 96, 8, 8, 2, 2, 1, 2], # 64 ->64(4,12) ->384 ->96(16,5) ->96 [1, 1, 128, 3, 1, 6, 12, 12, 128, 8, 8, 2, 2, 1, 2], # 96 ->96(5,16) ->576 ->128(16,8) ->128 [1, 1, 768, 3, 1, 6, 16, 16, 0, 0, 0, 2, 2, 1, 2], # 128 ->128(4,32) ->768 ], "msnx_dy12_exp6_20M_020": [ #s, n, c, ks, c1, c2, g1, g2, c3, g3, g4 [2, 1, 16, 3, 2, 2, 0, 12, 16, 4, 4, 0, 2, 0, 1], # 12 ->24(0,0) ->48 ->16(8,2) ->16 [2, 1, 24, 3, 2, 2, 0, 16, 24, 4, 4, 0, 2, 0, 1], # 16 ->32(0,0) ->64 ->24(8,3) ->24 [1, 1, 24, 3, 2, 2, 0, 24, 24, 4, 4, 0, 2, 0, 1], # 24 ->48(0,0) ->96 ->24(8,3) ->24 [2, 1, 32, 5, 1, 6, 6, 6, 32, 4, 4, 0, 2, 0, 1], # 24 ->24(2,12) ->144 ->32(16,2) ->32 [1, 1, 32, 5, 1, 6, 8, 8, 32, 4, 4, 0, 2, 0, 2], # 32 ->32(2,16) ->192 ->32(16,2) ->32 [1, 1, 64, 5, 1, 6, 8, 8, 48, 8, 8, 0, 2, 0, 2], # 32 ->32(2,16) ->192 ->48(12,4) ->64 [1, 1, 80, 5, 1, 6, 8, 8, 80, 8, 8, 0, 2, 0, 2], # 48 ->48(3,16) ->288 ->80(16,5) ->80 [1, 1, 80, 5, 1, 6, 10, 10, 80, 8, 8, 0, 2, 0, 2], # 80 ->80(4,20) ->480 ->80(20,4) ->80 [2, 1, 120, 5, 1, 6, 10, 10, 120, 10, 10, 0, 2, 0, 2], # 80 ->80(4,20) ->480 ->128(16,8) ->120 [1, 1, 120, 5, 1, 6, 12, 12, 120, 10, 10, 0, 2, 0, 2], # 120 ->128(4,32) ->720 ->128(32,4) ->120 [1, 1, 144, 3, 1, 6, 12, 12, 144, 12, 12, 0, 2, 0, 2], # 120 ->128(4,32) ->720 ->160(32,5) ->144 [1, 1, 864, 3, 1, 6, 12, 12, 0, 0, 0, 0, 2, 0, 2], # 144 ->144(5,32) ->864 ], } ACTIVATION_CONFIG = { "msnx_dy6_exp4_4M_221": { "act_max": 2.0, "reduction": 8, "init_ab3": [1.0, 0.0], "init_a": [1.0, 1.0], "init_b": [0.0, 0.0], }, "msnx_dy6_exp6_6M_221": { "act_max": 2.0, "reduction": 8, "init_ab3": [1.0, 0.0], "init_a": [1.0, 1.0], "init_b": [0.0, 0.0], }, "msnx_dy9_exp6_12M_221": { "act_max": 2.0, "reduction": 8, "init_ab3": [1.0, 0.0], "init_a": [1.0, 1.0], "init_b": [0.0, 0.0], }, "msnx_dy12_exp6_20M_020": { "act_max": 2.0, "reduction": 8, "init_ab3": [1.0, 0.0], "init_a": [1.0, 0.5], "init_b": [0.0, 0.5], }, } def _make_divisible(v, divisor=8, min_value=None): if min_value is None: min_value = divisor new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) if new_v < 0.9 * v: new_v += divisor return new_v class MaxGroupPooling(nn.Layer): def __init__(self, channel_per_group=2): super().__init__() self.channel_per_group = channel_per_group def forward(self, x): if self.channel_per_group == 1: return x # max op b, c, h, w = x.shape # reshape y = x.reshape([b, c // self.channel_per_group, -1, h, w]) out, _ = paddle.max(y, axis=2) return out class SwishLinear(nn.Layer): def __init__(self, inp, oup): super().__init__() self.linear = nn.Sequential( nn.Linear(inp, oup), nn.BatchNorm1D(oup), nn.Hardswish()) def forward(self, x): return self.linear(x) class StemLayer(nn.Layer): def __init__(self, inp, oup, stride, groups=(4, 4)): super().__init__() g1, g2 = groups self.stem = nn.Sequential( SpatialSepConvSF(inp, groups, 3, stride), MaxGroupPooling(2) if g1 * g2 == 2 * oup else nn.ReLU6()) def forward(self, x): out = self.stem(x) return out class GroupConv(nn.Layer): def __init__(self, inp, oup, groups=2): super().__init__() self.inp = inp self.oup = oup self.groups = groups self.conv = nn.Sequential( nn.Conv2D( inp, oup, 1, groups=self.groups[0], bias_attr=False), nn.BatchNorm2D(oup)) def forward(self, x): x = self.conv(x) return x class ChannelShuffle(nn.Layer): def __init__(self, groups): super().__init__() self.groups = groups def forward(self, x): b, c, h, w = x.shape channels_per_group = c // self.groups # reshape x = x.reshape([b, self.groups, channels_per_group, h, w]) x = x.transpose([0, 2, 1, 3, 4]) out = x.reshape([b, c, h, w]) return out class ChannelShuffle2(ChannelShuffle): pass class SpatialSepConvSF(nn.Layer): def __init__(self, inp, oups, kernel_size, stride): super().__init__() oup1, oup2 = oups self.conv = nn.Sequential( nn.Conv2D( inp, oup1, (kernel_size, 1), (stride, 1), (kernel_size // 2, 0), groups=1, bias_attr=False), nn.BatchNorm2D(oup1), nn.Conv2D( oup1, oup1 * oup2, (1, kernel_size), (1, stride), (0, kernel_size // 2), groups=oup1, bias_attr=False), nn.BatchNorm2D(oup1 * oup2), ChannelShuffle(oup1), ) def forward(self, x): out = self.conv(x) return out class DepthSpatialSepConv(nn.Layer): def __init__(self, inp, expand, kernel_size, stride): super().__init__() exp1, exp2 = expand hidden_dim = inp * exp1 oup = inp * exp1 * exp2 self.conv = nn.Sequential( nn.Conv2D( inp, inp * exp1, (kernel_size, 1), (stride, 1), (kernel_size // 2, 0), groups=inp, bias_attr=False), nn.BatchNorm2D(inp * exp1), nn.Conv2D( hidden_dim, oup, (1, kernel_size), (1, stride), (0, kernel_size // 2), groups=hidden_dim, bias_attr=False), nn.BatchNorm2D(oup)) def forward(self, x): out = self.conv(x) return out class DYShiftMax(nn.Layer): def __init__(self, inp, oup, reduction=4, act_max=1.0, act_relu=True, init_a=[0.0, 0.0], init_b=[0.0, 0.0], relu_before_pool=False, g=None, expansion=False): super().__init__() self.oup = oup self.act_max = act_max * 2 self.act_relu = act_relu self.avg_pool = nn.Sequential(nn.ReLU() if relu_before_pool == True else nn.Identity(), nn.AdaptiveAvgPool2D(1)) self.exp = 4 if act_relu else 2 self.init_a = init_a self.init_b = init_b # determine squeeze squeeze = _make_divisible(inp // reduction, 4) if squeeze < 4: squeeze = 4 self.fc = nn.Sequential( nn.Linear(inp, squeeze), nn.ReLU(), nn.Linear(squeeze, oup * self.exp), nn.Hardsigmoid()) if g is None: g = 1 self.g = g[1] if self.g != 1 and expansion: self.g = inp // self.g self.gc = inp // self.g index = paddle.to_tensor(list(range(inp))).reshape([1, inp, 1, 1]) index = index.reshape([1, self.g, self.gc, 1, 1]) indexgs = paddle.split(index, [1, self.g - 1], axis=1) indexgs = paddle.concat((indexgs[1], indexgs[0]), axis=1) indexs = paddle.split(indexgs, [1, self.gc - 1], axis=2) indexs = paddle.concat((indexs[1], indexs[0]), axis=2) self.index = indexs.reshape([inp]).astype(paddle.int64) self.expansion = expansion def forward(self, x): x_in = x x_out = x b, c, _, _ = x_in.shape y = self.avg_pool(x_in).reshape([b, c]) y = self.fc(y).reshape([b, self.oup * self.exp, 1, 1]) y = (y - 0.5) * self.act_max n2, c2, h2, w2 = x_out.shape x2 = paddle.index_select(x_out, self.index, axis=1) if self.exp == 4: a1, b1, a2, b2 = paddle.split(y, 4, axis=1) a1 = a1 + self.init_a[0] a2 = a2 + self.init_a[1] b1 = b1 + self.init_b[0] b2 = b2 + self.init_b[1] z1 = x_out * a1 + x2 * b1 z2 = x_out * a2 + x2 * b2 out = paddle.maximum(z1, z2) elif self.exp == 2: a1, b1 = paddle.split(y, 2, axis=1) a1 = a1 + self.init_a[0] b1 = b1 + self.init_b[0] out = x_out * a1 + x2 * b1 return out class DYMicroBlock(nn.Layer): def __init__(self, inp, oup, kernel_size=3, stride=1, ch_exp=(2, 2), ch_per_group=4, groups_1x1=(1, 1), dy=[0, 0, 0], ratio=1.0, activation_cfg=None): super().__init__() self.identity = stride == 1 and inp == oup y1, y2, y3 = dy act_max = activation_cfg["act_max"] act_reduction = activation_cfg["reduction"] * ratio init_a = activation_cfg["init_a"] init_b = activation_cfg["init_b"] init_ab3 = activation_cfg["init_ab3"] t1 = ch_exp gs1 = ch_per_group hidden_fft, g1, g2 = groups_1x1 hidden_dim1 = inp * t1[0] hidden_dim2 = inp * t1[0] * t1[1] if gs1[0] == 0: self.layers = nn.Sequential( DepthSpatialSepConv(inp, t1, kernel_size, stride), DYShiftMax( hidden_dim2, hidden_dim2, act_max=act_max, act_relu=True if y2 == 2 else False, init_a=init_a, reduction=act_reduction, init_b=init_b, g=gs1, expansion=False) if y2 > 0 else nn.ReLU6(), ChannelShuffle(gs1[1]), ChannelShuffle2(hidden_dim2 // 2) if y2 != 0 else nn.Identity(), GroupConv(hidden_dim2, oup, (g1, g2)), DYShiftMax( oup, oup, act_max=act_max, act_relu=False, init_a=[init_ab3[0], 0.0], reduction=act_reduction // 2, init_b=[init_ab3[1], 0.0], g=(g1, g2), expansion=False) if y3 > 0 else nn.Identity(), ChannelShuffle(g2), ChannelShuffle2(oup // 2) if oup % 2 == 0 and y3 != 0 else nn.Identity(), ) elif g2 == 0: self.layers = nn.Sequential( GroupConv(inp, hidden_dim2, gs1), DYShiftMax( hidden_dim2, hidden_dim2, act_max=act_max, act_relu=False, init_a=[init_ab3[0], 0.0], reduction=act_reduction, init_b=[init_ab3[1], 0.0], g=gs1, expansion=False) if y3 > 0 else nn.Identity(), ) else: self.layers = nn.Sequential( GroupConv(inp, hidden_dim2, gs1), DYShiftMax( hidden_dim2, hidden_dim2, act_max=act_max, act_relu=True if y1 == 2 else False, init_a=init_a, reduction=act_reduction, init_b=init_b, g=gs1, expansion=False) if y1 > 0 else nn.ReLU6(), ChannelShuffle(gs1[1]), DepthSpatialSepConv(hidden_dim2, (1, 1), kernel_size, stride), nn.Identity(), DYShiftMax( hidden_dim2, hidden_dim2, act_max=act_max, act_relu=True if y2 == 2 else False, init_a=init_a, reduction=act_reduction, init_b=init_b, g=gs1, expansion=True, ) if y2 > 0 else nn.ReLU6(), ChannelShuffle2(hidden_dim2 // 4) if y1 != 0 and y2 != 0 else nn.Identity() if y1 == 0 and y2 == 0 else ChannelShuffle2(hidden_dim2 // 2), GroupConv(hidden_dim2, oup, (g1, g2)), DYShiftMax( oup, oup, act_max=act_max, act_relu=False, init_a=[init_ab3[0], 0.0], reduction=act_reduction // 2 if oup < hidden_dim2 else act_reduction, init_b=[init_ab3[1], 0.0], g=(g1, g2), expansion=False) if y3 > 0 else nn.Identity(), ChannelShuffle(g2), ChannelShuffle2(oup // 2) if y3 != 0 else nn.Identity(), ) def forward(self, x): out = self.layers(x) if self.identity: out = out + x return out class MicroNet(nn.Layer): def __init__(self, net_cfg, activation_cfg, input_size=224, class_num=1000, stem_ch=16, stem_groups=[4, 8], out_ch=1024, dropout_rate=0.0): super().__init__() # building first layer assert input_size % 32 == 0 input_channel = stem_ch layers = [StemLayer(3, input_channel, stride=2, groups=stem_groups)] for s, n, c, ks, c1, c2, g1, g2, c3, g3, g4, y1, y2, y3, r in net_cfg: for i in range(n): layers.append( DYMicroBlock( input_channel, c, kernel_size=ks, stride=s if i == 0 else 1, ch_exp=(c1, c2), ch_per_group=(g1, g2), groups_1x1=(c3, g3, g4), dy=[y1, y2, y3], ratio=r, activation_cfg=activation_cfg, )) input_channel = c self.features = nn.Sequential(*layers) self.avgpool = nn.Sequential( nn.ReLU6(), nn.AdaptiveAvgPool2D(1), nn.Hardswish(), ) # building last several layers self.classifier = nn.Sequential( SwishLinear(input_channel, out_ch), nn.Dropout(dropout_rate), SwishLinear(out_ch, class_num), ) self.apply(self._initialize_weights) def _initialize_weights(self, m): if isinstance(m, nn.Conv2D): n = m._kernel_size[0] * m._kernel_size[1] * m._out_channels nn.initializer.Normal(std=math.sqrt(2. / n))(m.weight) elif isinstance(m, nn.Linear): nn.initializer.Normal(std=0.01)(m.weight) def forward(self, x): x = self.features(x) x = self.avgpool(x) x = self.classifier(x.flatten(1)) return x def _load_pretrained(pretrained, model, model_url, use_ssld): if pretrained is False: pass elif pretrained is True: load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld) elif isinstance(pretrained, str): load_dygraph_pretrain(model, pretrained) else: raise RuntimeError( "pretrained type is not available. Please use `string` or `boolean` type." ) def micronet_m0(pretrained=False, use_ssld=False, **kwargs): model = MicroNet( NET_CONFIG["msnx_dy6_exp4_4M_221"], ACTIVATION_CONFIG["msnx_dy6_exp4_4M_221"], stem_ch=4, stem_groups=[2, 2], out_ch=640, dropout_rate=0.05, **kwargs) _load_pretrained(pretrained, model, MODEL_URLS["micronet_m0"], use_ssld) return model def micronet_m1(pretrained=False, use_ssld=False, **kwargs): model = MicroNet( NET_CONFIG["msnx_dy6_exp6_6M_221"], ACTIVATION_CONFIG["msnx_dy6_exp6_6M_221"], stem_ch=6, stem_groups=[3, 2], out_ch=960, dropout_rate=0.05, **kwargs) _load_pretrained(pretrained, model, MODEL_URLS["micronet_m1"], use_ssld) return model def micronet_m2(pretrained=False, use_ssld=False, **kwargs): model = MicroNet( NET_CONFIG["msnx_dy9_exp6_12M_221"], ACTIVATION_CONFIG["msnx_dy9_exp6_12M_221"], stem_ch=8, stem_groups=[4, 2], out_ch=1024, dropout_rate=0.1, **kwargs) _load_pretrained(pretrained, model, MODEL_URLS["micronet_m2"], use_ssld) return model def micronet_m3(pretrained=False, use_ssld=False, **kwargs): model = MicroNet( NET_CONFIG["msnx_dy12_exp6_20M_020"], ACTIVATION_CONFIG["msnx_dy12_exp6_20M_020"], stem_ch=12, stem_groups=[4, 3], out_ch=1024, dropout_rate=0.1, **kwargs) _load_pretrained(pretrained, model, MODEL_URLS["micronet_m3"], use_ssld) return model