diff --git a/ppocr/data/imaug/operators.py b/ppocr/data/imaug/operators.py index a83dc648389866b60a4ec5923553cea9fdf6fd38..ee5492de7961fbe494b6ab36f4a385312fe8b9eb 100644 --- a/ppocr/data/imaug/operators.py +++ b/ppocr/data/imaug/operators.py @@ -242,8 +242,8 @@ class DetResizeForTest(object): if 'image_shape' in kwargs: self.image_shape = kwargs['image_shape'] self.resize_type = 1 - if 'keep_ratio' in kwargs: ###### - self.keep_ratio = kwargs['keep_ratio'] ####### + if 'keep_ratio' in kwargs: + self.keep_ratio = kwargs['keep_ratio'] elif 'limit_side_len' in kwargs: self.limit_side_len = kwargs['limit_side_len'] self.limit_type = kwargs.get('limit_type', 'min') @@ -273,7 +273,7 @@ class DetResizeForTest(object): def resize_image_type1(self, img): resize_h, resize_w = self.image_shape ori_h, ori_w = img.shape[:2] # (h, w, c) - if self.keep_ratio: ######## + if self.keep_ratio: resize_w = ori_w * resize_h / ori_h N = math.ceil(resize_w / 32) resize_w = N * 32 diff --git a/ppocr/modeling/backbones/det_resnet.py b/ppocr/modeling/backbones/det_resnet.py index 4c75b663af9cc927e2007966d9d2987f4ac08d5a..3d8ce44508522a27e28408ca5915110eebb0fe78 100644 --- a/ppocr/modeling/backbones/det_resnet.py +++ b/ppocr/modeling/backbones/det_resnet.py @@ -1,4 +1,4 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/ppocr/modeling/necks/db_fpn.py b/ppocr/modeling/necks/db_fpn.py index b46d7c460d1f5ba277bec04a0c4a8fcb83fd5615..8c3f52a331db5daafab2a38c0a441edd44eb141d 100644 --- a/ppocr/modeling/necks/db_fpn.py +++ b/ppocr/modeling/necks/db_fpn.py @@ -105,7 +105,7 @@ class DSConv(nn.Layer): class DBFPN(nn.Layer): - def __init__(self, in_channels, out_channels, use_asf=None, **kwargs): + def __init__(self, in_channels, out_channels, use_asf=False, **kwargs): super(DBFPN, self).__init__() self.out_channels = out_channels self.use_asf = use_asf @@ -164,7 +164,7 @@ class DBFPN(nn.Layer): weight_attr=ParamAttr(initializer=weight_attr), bias_attr=False) - if self.use_asf: + if self.use_asf is True: self.asf = ASFBlock(self.out_channels, self.out_channels // 4) def forward(self, x): @@ -192,7 +192,7 @@ class DBFPN(nn.Layer): fuse = paddle.concat([p5, p4, p3, p2], axis=1) - if self.use_asf: + if self.use_asf is True: fuse = self.asf(fuse, [p5, p4, p3, p2]) return fuse @@ -367,7 +367,19 @@ class LKPAN(nn.Layer): class ASFBlock(nn.Layer): + """ + This code is refered from: + https://github.com/MhLiao/DB/blob/master/decoders/feature_attention.py + """ + def __init__(self, in_channels, inter_channels, out_features_num=4): + """ + Adaptive Scale Fusion (ASF) block of DBNet++ + Args: + in_channels: the number of channels in the input data + inter_channels: the number of middle channels + out_features_num: the number of fused stages + """ super(ASFBlock, self).__init__() weight_attr = paddle.nn.initializer.KaimingUniform() self.in_channels = in_channels @@ -375,39 +387,38 @@ class ASFBlock(nn.Layer): self.out_features_num = out_features_num self.conv = nn.Conv2D(in_channels, inter_channels, 3, padding=1) - self.attention_block_1 = nn.Sequential( + self.spatial_scale = nn.Sequential( #Nx1xHxW nn.Conv2D( - 1, - 1, - 3, + in_channels=1, + out_channels=1, + kernel_size=3, bias_attr=False, padding=1, weight_attr=ParamAttr(initializer=weight_attr)), nn.ReLU(), nn.Conv2D( - 1, - 1, - 1, + in_channels=1, + out_channels=1, + kernel_size=1, bias_attr=False, weight_attr=ParamAttr(initializer=weight_attr)), nn.Sigmoid()) - self.attention_block_2 = nn.Sequential( + self.channel_scale = nn.Sequential( nn.Conv2D( - inter_channels, - out_features_num, - 1, + in_channels=inter_channels, + out_channels=out_features_num, + kernel_size=1, bias_attr=False, weight_attr=ParamAttr(initializer=weight_attr)), nn.Sigmoid()) def forward(self, fuse_features, features_list): fuse_features = self.conv(fuse_features) - attention_scores = self.attention_block_1( - paddle.mean( - fuse_features, axis=1, keepdim=True)) + fuse_features - attention_scores = self.attention_block_2(attention_scores) + spatial_x = paddle.mean(fuse_features, axis=1, keepdim=True) + attention_scores = self.spatial_scale(spatial_x) + fuse_features + attention_scores = self.channel_scale(attention_scores) assert len(features_list) == self.out_features_num out_list = []