add db++

142b5e9d · wangjingyeye · 961dca72 · 142b5e9d · 142b5e9d · 142b5e9d
5 changed file
--- a/ppocr/data/imaug/operators.py
+++ b/ppocr/data/imaug/operators.py
@@ -238,9 +238,12 @@ class DetResizeForTest(object):
    def __init__(self, **kwargs):
        super(DetResizeForTest, self).__init__()
        self.resize_type = 0
+        self.keep_ratio = False
        if 'image_shape' in kwargs:
            self.image_shape = kwargs['image_shape']
            self.resize_type = 1
+            if 'keep_ratio' in kwargs:  ######
+                self.keep_ratio = kwargs['keep_ratio']  #######
        elif 'limit_side_len' in kwargs:
            self.limit_side_len = kwargs['limit_side_len']
            self.limit_type = kwargs.get('limit_type', 'min')
@@ -270,6 +273,10 @@ class DetResizeForTest(object):
    def resize_image_type1(self, img):
        resize_h, resize_w = self.image_shape
        ori_h, ori_w = img.shape[:2]  # (h, w, c)
+        if self.keep_ratio:  ########
+            resize_w = ori_w * resize_h / ori_h
+            N = math.ceil(resize_w / 32)
+            resize_w = N * 32
        ratio_h = float(resize_h) / ori_h
        ratio_w = float(resize_w) / ori_w
        img = cv2.resize(img, (int(resize_w), int(resize_h)))

--- a/ppocr/modeling/backbones/__init__.py
+++ b/ppocr/modeling/backbones/__init__.py
@@ -18,9 +18,10 @@ __all__ = ["build_backbone"]
 def build_backbone(config, model_type):
    if model_type == "det" or model_type == "table":
        from .det_mobilenet_v3 import MobileNetV3
-        from .det_resnet_vd import ResNet
+        from .det_resnet import ResNet
+        from .det_resnet_vd import ResNet_vd
        from .det_resnet_vd_sast import ResNet_SAST
-        support_dict = ["MobileNetV3", "ResNet", "ResNet_SAST"]
+        support_dict = ["MobileNetV3", "ResNet", "ResNet_vd", "ResNet_SAST"]
    elif model_type == "rec" or model_type == "cls":
        from .rec_mobilenet_v3 import MobileNetV3
        from .rec_resnet_vd import ResNet

--- a/ppocr/modeling/backbones/det_resnet_vd.py
+++ b/ppocr/modeling/backbones/det_resnet_vd.py
@@ -25,7 +25,7 @@ from paddle.vision.ops import DeformConv2D
 from paddle.regularizer import L2Decay
 from paddle.nn.initializer import Normal, Constant, XavierUniform

-__all__ = ["ResNet"]
+__all__ = ["ResNet_vd", "ConvBNLayer", "DeformableConvV2"]


 class DeformableConvV2(nn.Layer):
@@ -104,6 +104,7 @@ class ConvBNLayer(nn.Layer):
                 kernel_size,
                 stride=1,
                 groups=1,
+                 dcn_groups=1,
                 is_vd_mode=False,
                 act=None,
                 is_dcn=False):
@@ -128,7 +129,7 @@ class ConvBNLayer(nn.Layer):
                kernel_size=kernel_size,
                stride=stride,
                padding=(kernel_size - 1) // 2,
-                groups=2,  #groups,
+                groups=dcn_groups,  #groups,
                bias_attr=False)
        self._batch_norm = nn.BatchNorm(out_channels, act=act)

@@ -162,7 +163,8 @@ class BottleneckBlock(nn.Layer):
            kernel_size=3,
            stride=stride,
            act='relu',
-            is_dcn=is_dcn)
+            is_dcn=is_dcn,
+            dcn_groups=2)
        self.conv2 = ConvBNLayer(
            in_channels=out_channels,
            out_channels=out_channels * 4,
@@ -238,14 +240,14 @@ class BasicBlock(nn.Layer):
        return y


-class ResNet(nn.Layer):
+class ResNet_vd(nn.Layer):
    def __init__(self,
                 in_channels=3,
                 layers=50,
                 dcn_stage=None,
                 out_indices=None,
                 **kwargs):
-        super(ResNet, self).__init__()
+        super(ResNet_vd, self).__init__()

        self.layers = layers
        supported_layers = [18, 34, 50, 101, 152, 200]

--- a/ppocr/modeling/necks/db_fpn.py
+++ b/ppocr/modeling/necks/db_fpn.py
@@ -105,9 +105,10 @@ class DSConv(nn.Layer):


 class DBFPN(nn.Layer):
-    def __init__(self, in_channels, out_channels, **kwargs):
+    def __init__(self, in_channels, out_channels, use_asf=None, **kwargs):
        super(DBFPN, self).__init__()
        self.out_channels = out_channels
+        self.use_asf = use_asf
        weight_attr = paddle.nn.initializer.KaimingUniform()

        self.in2_conv = nn.Conv2D(
@@ -163,6 +164,9 @@ class DBFPN(nn.Layer):
            weight_attr=ParamAttr(initializer=weight_attr),
            bias_attr=False)

+        if self.use_asf:
+            self.asf = ASFBlock(self.out_channels, self.out_channels // 4)
+
    def forward(self, x):
        c2, c3, c4, c5 = x

@@ -187,6 +191,10 @@ class DBFPN(nn.Layer):
        p3 = F.upsample(p3, scale_factor=2, mode="nearest", align_mode=1)

        fuse = paddle.concat([p5, p4, p3, p2], axis=1)
+
+        if self.use_asf:
+            fuse = self.asf(fuse, [p5, p4, p3, p2])
+
        return fuse


@@ -356,3 +364,53 @@ class LKPAN(nn.Layer):

        fuse = paddle.concat([p5, p4, p3, p2], axis=1)
        return fuse
+
+
+class ASFBlock(nn.Layer):
+    def __init__(self, in_channels, inter_channels, out_features_num=4):
+        super(ASFBlock, self).__init__()
+        weight_attr = paddle.nn.initializer.KaimingUniform()
+        self.in_channels = in_channels
+        self.inter_channels = inter_channels
+        self.out_features_num = out_features_num
+        self.conv = nn.Conv2D(in_channels, inter_channels, 3, padding=1)
+
+        self.attention_block_1 = nn.Sequential(
+            #Nx1xHxW
+            nn.Conv2D(
+                1,
+                1,
+                3,
+                bias_attr=False,
+                padding=1,
+                weight_attr=ParamAttr(initializer=weight_attr)),
+            nn.ReLU(),
+            nn.Conv2D(
+                1,
+                1,
+                1,
+                bias_attr=False,
+                weight_attr=ParamAttr(initializer=weight_attr)),
+            nn.Sigmoid())
+
+        self.attention_block_2 = nn.Sequential(
+            nn.Conv2D(
+                inter_channels,
+                out_features_num,
+                1,
+                bias_attr=False,
+                weight_attr=ParamAttr(initializer=weight_attr)),
+            nn.Sigmoid())
+
+    def forward(self, fuse_features, features_list):
+        fuse_features = self.conv(fuse_features)
+        attention_scores = self.attention_block_1(
+            paddle.mean(
+                fuse_features, axis=1, keepdim=True)) + fuse_features
+        attention_scores = self.attention_block_2(attention_scores)
+        assert len(features_list) == self.out_features_num
+
+        out_list = []
+        for i in range(self.out_features_num):
+            out_list.append(attention_scores[:, i:i + 1] * features_list[i])
+        return paddle.concat(out_list, axis=1)
--- a/ppocr/optimizer/learning_rate.py
+++ b/ppocr/optimizer/learning_rate.py
@@ -308,3 +308,38 @@ class Const(object):
                end_lr=self.learning_rate,
                last_epoch=self.last_epoch)
        return learning_rate
+
+
+class DecayLearningRate(object):
+    """
+    DecayLearningRate learning rate decay
+    new_lr = (lr - end_lr) * (1 - epoch/decay_steps)**power + end_lr
+    Args:
+        learning_rate(float): initial learning rate
+        step_each_epoch(int): steps each epoch
+        epochs(int): total training epochs
+        factor(float): Power of polynomial, should greater than 0.0 to get learning rate decay. Default: 0.9
+        end_lr(float): The minimum final learning rate. Default: 0.0.
+    """
+
+    def __init__(self,
+                 learning_rate,
+                 step_each_epoch,
+                 epochs,
+                 factor=0.9,
+                 end_lr=0,
+                 **kwargs):
+        super(DecayLearningRate, self).__init__()
+        self.learning_rate = learning_rate
+        self.epochs = epochs + 1
+        self.factor = factor
+        self.end_lr = 0
+        self.decay_steps = step_each_epoch * epochs
+
+    def __call__(self):
+        learning_rate = lr.PolynomialDecay(
+            learning_rate=self.learning_rate,
+            decay_steps=self.decay_steps,
+            power=self.factor,
+            end_lr=self.end_lr)
+        return learning_rate