add micronet (#5251)

f2d6f541 · bupt906 · GitHub · 107a316f · f2d6f541 · f2d6f541
隐藏空白更改
内联并排

Showing with 530 addition and 1 deletion

ppocr/modeling/backbones/__init__.py ppocr/modeling/backbones/__init__.py +2 -1

ppocr/modeling/backbones/rec_micronet.py ppocr/modeling/backbones/rec_micronet.py +528 -0

未找到文件。
--- a/ppocr/modeling/backbones/__init__.py
+++ b/ppocr/modeling/backbones/__init__.py
@@ -29,9 +29,10 @@ def build_backbone(config, model_type):
        from .rec_nrtr_mtb import MTB
        from .rec_resnet_31 import ResNet31
        from .rec_resnet_aster import ResNet_ASTER
+        from .rec_micronet import MicroNet
        support_dict = [
            'MobileNetV1Enhance', 'MobileNetV3', 'ResNet', 'ResNetFPN', 'MTB',
-            "ResNet31", "ResNet_ASTER"
+            "ResNet31", "ResNet_ASTER", 'MicroNet'
        ]
    elif model_type == "e2e":
        from .e2e_resnet_vd_pg import ResNet

--- a/ppocr/modeling/backbones/rec_micronet.py
+++ b/ppocr/modeling/backbones/rec_micronet.py
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is refer from: 
+https://github.com/liyunsheng13/micronet/blob/main/backbone/micronet.py
+https://github.com/liyunsheng13/micronet/blob/main/backbone/activation.py
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import paddle
+import paddle.nn as nn
+from ppocr.modeling.backbones.det_mobilenet_v3 import make_divisible
+M0_cfgs = [
+    # s, n, c, ks, c1, c2, g1, g2, c3, g3, g4, y1, y2, y3, r
+    [2, 1, 8, 3, 2, 2, 0, 4, 8, 2, 2, 2, 0, 1, 1],
+    [2, 1, 12, 3, 2, 2, 0, 8, 12, 4, 4, 2, 2, 1, 1],
+    [2, 1, 16, 5, 2, 2, 0, 12, 16, 4, 4, 2, 2, 1, 1],
+    [1, 1, 32, 5, 1, 4, 4, 4, 32, 4, 4, 2, 2, 1, 1],
+    [2, 1, 64, 5, 1, 4, 8, 8, 64, 8, 8, 2, 2, 1, 1],
+    [1, 1, 96, 3, 1, 4, 8, 8, 96, 8, 8, 2, 2, 1, 2],
+    [1, 1, 384, 3, 1, 4, 12, 12, 0, 0, 0, 2, 2, 1, 2],
+]
+M1_cfgs = [
+    # s, n, c, ks, c1, c2, g1, g2, c3, g3, g4
+    [2, 1, 8, 3, 2, 2, 0, 6, 8, 2, 2, 2, 0, 1, 1],
+    [2, 1, 16, 3, 2, 2, 0, 8, 16, 4, 4, 2, 2, 1, 1],
+    [2, 1, 16, 5, 2, 2, 0, 16, 16, 4, 4, 2, 2, 1, 1],
+    [1, 1, 32, 5, 1, 6, 4, 4, 32, 4, 4, 2, 2, 1, 1],
+    [2, 1, 64, 5, 1, 6, 8, 8, 64, 8, 8, 2, 2, 1, 1],
+    [1, 1, 96, 3, 1, 6, 8, 8, 96, 8, 8, 2, 2, 1, 2],
+    [1, 1, 576, 3, 1, 6, 12, 12, 0, 0, 0, 2, 2, 1, 2],
+]
+M2_cfgs = [
+    # s, n, c, ks, c1, c2, g1, g2, c3, g3, g4
+    [2, 1, 12, 3, 2, 2, 0, 8, 12, 4, 4, 2, 0, 1, 1],
+    [2, 1, 16, 3, 2, 2, 0, 12, 16, 4, 4, 2, 2, 1, 1],
+    [1, 1, 24, 3, 2, 2, 0, 16, 24, 4, 4, 2, 2, 1, 1],
+    [2, 1, 32, 5, 1, 6, 6, 6, 32, 4, 4, 2, 2, 1, 1],
+    [1, 1, 32, 5, 1, 6, 8, 8, 32, 4, 4, 2, 2, 1, 2],
+    [1, 1, 64, 5, 1, 6, 8, 8, 64, 8, 8, 2, 2, 1, 2],
+    [2, 1, 96, 5, 1, 6, 8, 8, 96, 8, 8, 2, 2, 1, 2],
+    [1, 1, 128, 3, 1, 6, 12, 12, 128, 8, 8, 2, 2, 1, 2],
+    [1, 1, 768, 3, 1, 6, 16, 16, 0, 0, 0, 2, 2, 1, 2],
+]
+M3_cfgs = [
+    # s, n, c, ks, c1, c2, g1, g2, c3, g3, g4
+    [2, 1, 16, 3, 2, 2, 0, 12, 16, 4, 4, 0, 2, 0, 1],
+    [2, 1, 24, 3, 2, 2, 0, 16, 24, 4, 4, 0, 2, 0, 1],
+    [1, 1, 24, 3, 2, 2, 0, 24, 24, 4, 4, 0, 2, 0, 1],
+    [2, 1, 32, 5, 1, 6, 6, 6, 32, 4, 4, 0, 2, 0, 1],
+    [1, 1, 32, 5, 1, 6, 8, 8, 32, 4, 4, 0, 2, 0, 2],
+    [1, 1, 64, 5, 1, 6, 8, 8, 48, 8, 8, 0, 2, 0, 2],
+    [1, 1, 80, 5, 1, 6, 8, 8, 80, 8, 8, 0, 2, 0, 2],
+    [1, 1, 80, 5, 1, 6, 10, 10, 80, 8, 8, 0, 2, 0, 2],
+    [1, 1, 120, 5, 1, 6, 10, 10, 120, 10, 10, 0, 2, 0, 2],
+    [1, 1, 120, 5, 1, 6, 12, 12, 120, 10, 10, 0, 2, 0, 2],
+    [1, 1, 144, 3, 1, 6, 12, 12, 144, 12, 12, 0, 2, 0, 2],
+    [1, 1, 432, 3, 1, 3, 12, 12, 0, 0, 0, 0, 2, 0, 2],
+]
+def get_micronet_config(mode):
+    return eval(mode + '_cfgs')
+class MaxGroupPooling(nn.Layer):
+    def __init__(self, channel_per_group=2):
+        super(MaxGroupPooling, self).__init__()
+        self.channel_per_group = channel_per_group
+    def forward(self, x):
+        if self.channel_per_group == 1:
+            return x
+        # max op
+        b, c, h, w = x.shape
+        # reshape
+        y = paddle.reshape(x, [b, c // self.channel_per_group, -1, h, w])
+        out = paddle.max(y, axis=2)
+        return out
+class SpatialSepConvSF(nn.Layer):
+    def __init__(self, inp, oups, kernel_size, stride):
+        super(SpatialSepConvSF, self).__init__()
+        oup1, oup2 = oups
+        self.conv = nn.Sequential(
+            nn.Conv2D(
+                inp,
+                oup1, (kernel_size, 1), (stride, 1), (kernel_size // 2, 0),
+                bias_attr=False,
+                groups=1),
+            nn.BatchNorm2D(oup1),
+            nn.Conv2D(
+                oup1,
+                oup1 * oup2, (1, kernel_size), (1, stride),
+                (0, kernel_size // 2),
+                bias_attr=False,
+                groups=oup1),
+            nn.BatchNorm2D(oup1 * oup2),
+            ChannelShuffle(oup1), )
+    def forward(self, x):
+        out = self.conv(x)
+        return out
+class ChannelShuffle(nn.Layer):
+    def __init__(self, groups):
+        super(ChannelShuffle, self).__init__()
+        self.groups = groups
+    def forward(self, x):
+        b, c, h, w = x.shape
+        channels_per_group = c // self.groups
+        # reshape
+        x = paddle.reshape(x, [b, self.groups, channels_per_group, h, w])
+        x = paddle.transpose(x, (0, 2, 1, 3, 4))
+        out = paddle.reshape(x, [b, -1, h, w])
+        return out
+class StemLayer(nn.Layer):
+    def __init__(self, inp, oup, stride, groups=(4, 4)):
+        super(StemLayer, self).__init__()
+        g1, g2 = groups
+        self.stem = nn.Sequential(
+            SpatialSepConvSF(inp, groups, 3, stride),
+            MaxGroupPooling(2) if g1 * g2 == 2 * oup else nn.ReLU6())
+    def forward(self, x):
+        out = self.stem(x)
+        return out
+class DepthSpatialSepConv(nn.Layer):
+    def __init__(self, inp, expand, kernel_size, stride):
+        super(DepthSpatialSepConv, self).__init__()
+        exp1, exp2 = expand
+        hidden_dim = inp * exp1
+        oup = inp * exp1 * exp2
+        self.conv = nn.Sequential(
+            nn.Conv2D(
+                inp,
+                inp * exp1, (kernel_size, 1), (stride, 1),
+                (kernel_size // 2, 0),
+                bias_attr=False,
+                groups=inp),
+            nn.BatchNorm2D(inp * exp1),
+            nn.Conv2D(
+                hidden_dim,
+                oup, (1, kernel_size),
+                1, (0, kernel_size // 2),
+                bias_attr=False,
+                groups=hidden_dim),
+            nn.BatchNorm2D(oup))
+    def forward(self, x):
+        x = self.conv(x)
+        return x
+class GroupConv(nn.Layer):
+    def __init__(self, inp, oup, groups=2):
+        super(GroupConv, self).__init__()
+        self.inp = inp
+        self.oup = oup
+        self.groups = groups
+        self.conv = nn.Sequential(
+            nn.Conv2D(
+                inp, oup, 1, 1, 0, bias_attr=False, groups=self.groups[0]),
+            nn.BatchNorm2D(oup))
+    def forward(self, x):
+        x = self.conv(x)
+        return x
+class DepthConv(nn.Layer):
+    def __init__(self, inp, oup, kernel_size, stride):
+        super(DepthConv, self).__init__()
+        self.conv = nn.Sequential(
+            nn.Conv2D(
+                inp,
+                oup,
+                kernel_size,
+                stride,
+                kernel_size // 2,
+                bias_attr=False,
+                groups=inp),
+            nn.BatchNorm2D(oup))
+    def forward(self, x):
+        out = self.conv(x)
+        return out
+class DYShiftMax(nn.Layer):
+    def __init__(self,
+                 inp,
+                 oup,
+                 reduction=4,
+                 act_max=1.0,
+                 act_relu=True,
+                 init_a=[0.0, 0.0],
+                 init_b=[0.0, 0.0],
+                 relu_before_pool=False,
+                 g=None,
+                 expansion=False):
+        super(DYShiftMax, self).__init__()
+        self.oup = oup
+        self.act_max = act_max * 2
+        self.act_relu = act_relu
+        self.avg_pool = nn.Sequential(nn.ReLU() if relu_before_pool == True else
+                                      nn.Sequential(), nn.AdaptiveAvgPool2D(1))
+        self.exp = 4 if act_relu else 2
+        self.init_a = init_a
+        self.init_b = init_b
+        # determine squeeze
+        squeeze = make_divisible(inp // reduction, 4)
+        if squeeze < 4:
+            squeeze = 4
+        self.fc = nn.Sequential(
+            nn.Linear(inp, squeeze),
+            nn.ReLU(), nn.Linear(squeeze, oup * self.exp), nn.Hardsigmoid())
+        if g is None:
+            g = 1
+        self.g = g[1]
+        if self.g != 1 and expansion:
+            self.g = inp // self.g
+        self.gc = inp // self.g
+        index = paddle.to_tensor([range(inp)])
+        index = paddle.reshape(index, [1, inp, 1, 1])
+        index = paddle.reshape(index, [1, self.g, self.gc, 1, 1])
+        indexgs = paddle.split(index, [1, self.g - 1], axis=1)
+        indexgs = paddle.concat((indexgs[1], indexgs[0]), axis=1)
+        indexs = paddle.split(indexgs, [1, self.gc - 1], axis=2)
+        indexs = paddle.concat((indexs[1], indexs[0]), axis=2)
+        self.index = paddle.reshape(indexs, [inp])
+        self.expansion = expansion
+    def forward(self, x):
+        x_in = x
+        x_out = x
+        b, c, _, _ = x_in.shape
+        y = self.avg_pool(x_in)
+        y = paddle.reshape(y, [b, c])
+        y = self.fc(y)
+        y = paddle.reshape(y, [b, self.oup * self.exp, 1, 1])
+        y = (y - 0.5) * self.act_max
+        n2, c2, h2, w2 = x_out.shape
+        x2 = paddle.to_tensor(x_out.numpy()[:, self.index.numpy(), :, :])
+        if self.exp == 4:
+            temp = y.shape
+            a1, b1, a2, b2 = paddle.split(y, temp[1] // self.oup, axis=1)
+            a1 = a1 + self.init_a[0]
+            a2 = a2 + self.init_a[1]
+            b1 = b1 + self.init_b[0]
+            b2 = b2 + self.init_b[1]
+            z1 = x_out * a1 + x2 * b1
+            z2 = x_out * a2 + x2 * b2
+            out = paddle.maximum(z1, z2)
+        elif self.exp == 2:
+            temp = y.shape
+            a1, b1 = paddle.split(y, temp[1] // self.oup, axis=1)
+            a1 = a1 + self.init_a[0]
+            b1 = b1 + self.init_b[0]
+            out = x_out * a1 + x2 * b1
+        return out
+class DYMicroBlock(nn.Layer):
+    def __init__(self,
+                 inp,
+                 oup,
+                 kernel_size=3,
+                 stride=1,
+                 ch_exp=(2, 2),
+                 ch_per_group=4,
+                 groups_1x1=(1, 1),
+                 depthsep=True,
+                 shuffle=False,
+                 activation_cfg=None):
+        super(DYMicroBlock, self).__init__()
+        self.identity = stride == 1 and inp == oup
+        y1, y2, y3 = activation_cfg['dy']
+        act_reduction = 8 * activation_cfg['ratio']
+        init_a = activation_cfg['init_a']
+        init_b = activation_cfg['init_b']
+        t1 = ch_exp
+        gs1 = ch_per_group
+        hidden_fft, g1, g2 = groups_1x1
+        hidden_dim2 = inp * t1[0] * t1[1]
+        if gs1[0] == 0:
+            self.layers = nn.Sequential(
+                DepthSpatialSepConv(inp, t1, kernel_size, stride),
+                DYShiftMax(
+                    hidden_dim2,
+                    hidden_dim2,
+                    act_max=2.0,
+                    act_relu=True if y2 == 2 else False,
+                    init_a=init_a,
+                    reduction=act_reduction,
+                    init_b=init_b,
+                    g=gs1,
+                    expansion=False) if y2 > 0 else nn.ReLU6(),
+                ChannelShuffle(gs1[1]) if shuffle else nn.Sequential(),
+                ChannelShuffle(hidden_dim2 // 2)
+                if shuffle and y2 != 0 else nn.Sequential(),
+                GroupConv(hidden_dim2, oup, (g1, g2)),
+                DYShiftMax(
+                    oup,
+                    oup,
+                    act_max=2.0,
+                    act_relu=False,
+                    init_a=[1.0, 0.0],
+                    reduction=act_reduction // 2,
+                    init_b=[0.0, 0.0],
+                    g=(g1, g2),
+                    expansion=False) if y3 > 0 else nn.Sequential(),
+                ChannelShuffle(g2) if shuffle else nn.Sequential(),
+                ChannelShuffle(oup // 2)
+                if shuffle and oup % 2 == 0 and y3 != 0 else nn.Sequential(), )
+        elif g2 == 0:
+            self.layers = nn.Sequential(
+                GroupConv(inp, hidden_dim2, gs1),
+                DYShiftMax(
+                    hidden_dim2,
+                    hidden_dim2,
+                    act_max=2.0,
+                    act_relu=False,
+                    init_a=[1.0, 0.0],
+                    reduction=act_reduction,
+                    init_b=[0.0, 0.0],
+                    g=gs1,
+                    expansion=False) if y3 > 0 else nn.Sequential(), )
+        else:
+            self.layers = nn.Sequential(
+                GroupConv(inp, hidden_dim2, gs1),
+                DYShiftMax(
+                    hidden_dim2,
+                    hidden_dim2,
+                    act_max=2.0,
+                    act_relu=True if y1 == 2 else False,
+                    init_a=init_a,
+                    reduction=act_reduction,
+                    init_b=init_b,
+                    g=gs1,
+                    expansion=False) if y1 > 0 else nn.ReLU6(),
+                ChannelShuffle(gs1[1]) if shuffle else nn.Sequential(),
+                DepthSpatialSepConv(hidden_dim2, (1, 1), kernel_size, stride)
+                if depthsep else
+                DepthConv(hidden_dim2, hidden_dim2, kernel_size, stride),
+                nn.Sequential(),
+                DYShiftMax(
+                    hidden_dim2,
+                    hidden_dim2,
+                    act_max=2.0,
+                    act_relu=True if y2 == 2 else False,
+                    init_a=init_a,
+                    reduction=act_reduction,
+                    init_b=init_b,
+                    g=gs1,
+                    expansion=True) if y2 > 0 else nn.ReLU6(),
+                ChannelShuffle(hidden_dim2 // 4)
+                if shuffle and y1 != 0 and y2 != 0 else nn.Sequential()
+                if y1 == 0 and y2 == 0 else ChannelShuffle(hidden_dim2 // 2),
+                GroupConv(hidden_dim2, oup, (g1, g2)),
+                DYShiftMax(
+                    oup,
+                    oup,
+                    act_max=2.0,
+                    act_relu=False,
+                    init_a=[1.0, 0.0],
+                    reduction=act_reduction // 2
+                    if oup < hidden_dim2 else act_reduction,
+                    init_b=[0.0, 0.0],
+                    g=(g1, g2),
+                    expansion=False) if y3 > 0 else nn.Sequential(),
+                ChannelShuffle(g2) if shuffle else nn.Sequential(),
+                ChannelShuffle(oup // 2)
+                if shuffle and y3 != 0 else nn.Sequential(), )
+    def forward(self, x):
+        identity = x
+        out = self.layers(x)
+        if self.identity:
+            out = out + identity
+        return out
+class MicroNet(nn.Layer):
+    """
+        the MicroNet backbone network for recognition module.
+        Args:
+            mode(str): {'M0', 'M1', 'M2', 'M3'} 
+                Four models are proposed based on four different computational costs (4M, 6M, 12M, 21M MAdds)
+                Default: 'M3'.
+    """
+    def __init__(self, mode='M3', **kwargs):
+        super(MicroNet, self).__init__()
+        self.cfgs = get_micronet_config(mode)
+        activation_cfg = {}
+        if mode == 'M0':
+            input_channel = 4
+            stem_groups = 2, 2
+            out_ch = 384
+            activation_cfg['init_a'] = 1.0, 1.0
+            activation_cfg['init_b'] = 0.0, 0.0
+        elif mode == 'M1':
+            input_channel = 6
+            stem_groups = 3, 2
+            out_ch = 576
+            activation_cfg['init_a'] = 1.0, 1.0
+            activation_cfg['init_b'] = 0.0, 0.0
+        elif mode == 'M2':
+            input_channel = 8
+            stem_groups = 4, 2
+            out_ch = 768
+            activation_cfg['init_a'] = 1.0, 1.0
+            activation_cfg['init_b'] = 0.0, 0.0
+        elif mode == 'M3':
+            input_channel = 12
+            stem_groups = 4, 3
+            out_ch = 432
+            activation_cfg['init_a'] = 1.0, 0.5
+            activation_cfg['init_b'] = 0.0, 0.5
+        else:
+            raise NotImplementedError("mode[" + mode +
+                                      "_model] is not implemented!")
+        layers = [StemLayer(3, input_channel, stride=2, groups=stem_groups)]
+        for idx, val in enumerate(self.cfgs):
+            s, n, c, ks, c1, c2, g1, g2, c3, g3, g4, y1, y2, y3, r = val
+            t1 = (c1, c2)
+            gs1 = (g1, g2)
+            gs2 = (c3, g3, g4)
+            activation_cfg['dy'] = [y1, y2, y3]
+            activation_cfg['ratio'] = r
+            output_channel = c
+            layers.append(
+                DYMicroBlock(
+                    input_channel,
+                    output_channel,
+                    kernel_size=ks,
+                    stride=s,
+                    ch_exp=t1,
+                    ch_per_group=gs1,
+                    groups_1x1=gs2,
+                    depthsep=True,
+                    shuffle=True,
+                    activation_cfg=activation_cfg, ))
+            input_channel = output_channel
+            for i in range(1, n):
+                layers.append(
+                    DYMicroBlock(
+                        input_channel,
+                        output_channel,
+                        kernel_size=ks,
+                        stride=1,
+                        ch_exp=t1,
+                        ch_per_group=gs1,
+                        groups_1x1=gs2,
+                        depthsep=True,
+                        shuffle=True,
+                        activation_cfg=activation_cfg, ))
+                input_channel = output_channel
+        self.features = nn.Sequential(*layers)
+        self.pool = nn.MaxPool2D(kernel_size=2, stride=2, padding=0)
+        self.out_channels = make_divisible(out_ch)
+    def forward(self, x):
+        x = self.features(x)
+        x = self.pool(x)
+        return x