diff --git a/ppocr/data/imaug/operators.py b/ppocr/data/imaug/operators.py index 09736515e7a388e191a12826e1e9e348e2fcde86..a83dc648389866b60a4ec5923553cea9fdf6fd38 100644 --- a/ppocr/data/imaug/operators.py +++ b/ppocr/data/imaug/operators.py @@ -238,9 +238,12 @@ class DetResizeForTest(object): def __init__(self, **kwargs): super(DetResizeForTest, self).__init__() self.resize_type = 0 + self.keep_ratio = False if 'image_shape' in kwargs: self.image_shape = kwargs['image_shape'] self.resize_type = 1 + if 'keep_ratio' in kwargs: ###### + self.keep_ratio = kwargs['keep_ratio'] ####### elif 'limit_side_len' in kwargs: self.limit_side_len = kwargs['limit_side_len'] self.limit_type = kwargs.get('limit_type', 'min') @@ -270,6 +273,10 @@ class DetResizeForTest(object): def resize_image_type1(self, img): resize_h, resize_w = self.image_shape ori_h, ori_w = img.shape[:2] # (h, w, c) + if self.keep_ratio: ######## + resize_w = ori_w * resize_h / ori_h + N = math.ceil(resize_w / 32) + resize_w = N * 32 ratio_h = float(resize_h) / ori_h ratio_w = float(resize_w) / ori_w img = cv2.resize(img, (int(resize_w), int(resize_h))) diff --git a/ppocr/modeling/backbones/__init__.py b/ppocr/modeling/backbones/__init__.py index 072d6e0f84d4126d256c26aa5baf17c9dc4e63df..0d8e60e93aee57dc7f977e4dbeefcff89250200d 100755 --- a/ppocr/modeling/backbones/__init__.py +++ b/ppocr/modeling/backbones/__init__.py @@ -18,9 +18,10 @@ __all__ = ["build_backbone"] def build_backbone(config, model_type): if model_type == "det" or model_type == "table": from .det_mobilenet_v3 import MobileNetV3 - from .det_resnet_vd import ResNet + from .det_resnet import ResNet + from .det_resnet_vd import ResNet_vd from .det_resnet_vd_sast import ResNet_SAST - support_dict = ["MobileNetV3", "ResNet", "ResNet_SAST"] + support_dict = ["MobileNetV3", "ResNet", "ResNet_vd", "ResNet_SAST"] elif model_type == "rec" or model_type == "cls": from .rec_mobilenet_v3 import MobileNetV3 from .rec_resnet_vd import ResNet diff --git a/ppocr/modeling/backbones/det_resnet_vd.py b/ppocr/modeling/backbones/det_resnet_vd.py index 8c955a4af377374f21e7c09f0d10952f2fe1ceed..5337e14c401be51344d54e881ce98f64c2ce0e7e 100644 --- a/ppocr/modeling/backbones/det_resnet_vd.py +++ b/ppocr/modeling/backbones/det_resnet_vd.py @@ -25,7 +25,7 @@ from paddle.vision.ops import DeformConv2D from paddle.regularizer import L2Decay from paddle.nn.initializer import Normal, Constant, XavierUniform -__all__ = ["ResNet"] +__all__ = ["ResNet_vd", "ConvBNLayer", "DeformableConvV2"] class DeformableConvV2(nn.Layer): @@ -104,6 +104,7 @@ class ConvBNLayer(nn.Layer): kernel_size, stride=1, groups=1, + dcn_groups=1, is_vd_mode=False, act=None, is_dcn=False): @@ -128,7 +129,7 @@ class ConvBNLayer(nn.Layer): kernel_size=kernel_size, stride=stride, padding=(kernel_size - 1) // 2, - groups=2, #groups, + groups=dcn_groups, #groups, bias_attr=False) self._batch_norm = nn.BatchNorm(out_channels, act=act) @@ -162,7 +163,8 @@ class BottleneckBlock(nn.Layer): kernel_size=3, stride=stride, act='relu', - is_dcn=is_dcn) + is_dcn=is_dcn, + dcn_groups=2) self.conv2 = ConvBNLayer( in_channels=out_channels, out_channels=out_channels * 4, @@ -238,14 +240,14 @@ class BasicBlock(nn.Layer): return y -class ResNet(nn.Layer): +class ResNet_vd(nn.Layer): def __init__(self, in_channels=3, layers=50, dcn_stage=None, out_indices=None, **kwargs): - super(ResNet, self).__init__() + super(ResNet_vd, self).__init__() self.layers = layers supported_layers = [18, 34, 50, 101, 152, 200] diff --git a/ppocr/modeling/necks/db_fpn.py b/ppocr/modeling/necks/db_fpn.py index 93ed2dbfd1fac9bf2d163c54d23a20e16b537981..b46d7c460d1f5ba277bec04a0c4a8fcb83fd5615 100644 --- a/ppocr/modeling/necks/db_fpn.py +++ b/ppocr/modeling/necks/db_fpn.py @@ -105,9 +105,10 @@ class DSConv(nn.Layer): class DBFPN(nn.Layer): - def __init__(self, in_channels, out_channels, **kwargs): + def __init__(self, in_channels, out_channels, use_asf=None, **kwargs): super(DBFPN, self).__init__() self.out_channels = out_channels + self.use_asf = use_asf weight_attr = paddle.nn.initializer.KaimingUniform() self.in2_conv = nn.Conv2D( @@ -163,6 +164,9 @@ class DBFPN(nn.Layer): weight_attr=ParamAttr(initializer=weight_attr), bias_attr=False) + if self.use_asf: + self.asf = ASFBlock(self.out_channels, self.out_channels // 4) + def forward(self, x): c2, c3, c4, c5 = x @@ -187,6 +191,10 @@ class DBFPN(nn.Layer): p3 = F.upsample(p3, scale_factor=2, mode="nearest", align_mode=1) fuse = paddle.concat([p5, p4, p3, p2], axis=1) + + if self.use_asf: + fuse = self.asf(fuse, [p5, p4, p3, p2]) + return fuse @@ -356,3 +364,53 @@ class LKPAN(nn.Layer): fuse = paddle.concat([p5, p4, p3, p2], axis=1) return fuse + + +class ASFBlock(nn.Layer): + def __init__(self, in_channels, inter_channels, out_features_num=4): + super(ASFBlock, self).__init__() + weight_attr = paddle.nn.initializer.KaimingUniform() + self.in_channels = in_channels + self.inter_channels = inter_channels + self.out_features_num = out_features_num + self.conv = nn.Conv2D(in_channels, inter_channels, 3, padding=1) + + self.attention_block_1 = nn.Sequential( + #Nx1xHxW + nn.Conv2D( + 1, + 1, + 3, + bias_attr=False, + padding=1, + weight_attr=ParamAttr(initializer=weight_attr)), + nn.ReLU(), + nn.Conv2D( + 1, + 1, + 1, + bias_attr=False, + weight_attr=ParamAttr(initializer=weight_attr)), + nn.Sigmoid()) + + self.attention_block_2 = nn.Sequential( + nn.Conv2D( + inter_channels, + out_features_num, + 1, + bias_attr=False, + weight_attr=ParamAttr(initializer=weight_attr)), + nn.Sigmoid()) + + def forward(self, fuse_features, features_list): + fuse_features = self.conv(fuse_features) + attention_scores = self.attention_block_1( + paddle.mean( + fuse_features, axis=1, keepdim=True)) + fuse_features + attention_scores = self.attention_block_2(attention_scores) + assert len(features_list) == self.out_features_num + + out_list = [] + for i in range(self.out_features_num): + out_list.append(attention_scores[:, i:i + 1] * features_list[i]) + return paddle.concat(out_list, axis=1) diff --git a/ppocr/optimizer/learning_rate.py b/ppocr/optimizer/learning_rate.py index fe251f36e736bb1eac8a71a8115c941cbd7443e6..8e05a84011e3c463dd15a3b6bd76f24fa3ab81ef 100644 --- a/ppocr/optimizer/learning_rate.py +++ b/ppocr/optimizer/learning_rate.py @@ -308,3 +308,38 @@ class Const(object): end_lr=self.learning_rate, last_epoch=self.last_epoch) return learning_rate + + +class DecayLearningRate(object): + """ + DecayLearningRate learning rate decay + new_lr = (lr - end_lr) * (1 - epoch/decay_steps)**power + end_lr + Args: + learning_rate(float): initial learning rate + step_each_epoch(int): steps each epoch + epochs(int): total training epochs + factor(float): Power of polynomial, should greater than 0.0 to get learning rate decay. Default: 0.9 + end_lr(float): The minimum final learning rate. Default: 0.0. + """ + + def __init__(self, + learning_rate, + step_each_epoch, + epochs, + factor=0.9, + end_lr=0, + **kwargs): + super(DecayLearningRate, self).__init__() + self.learning_rate = learning_rate + self.epochs = epochs + 1 + self.factor = factor + self.end_lr = 0 + self.decay_steps = step_each_epoch * epochs + + def __call__(self): + learning_rate = lr.PolynomialDecay( + learning_rate=self.learning_rate, + decay_steps=self.decay_steps, + power=self.factor, + end_lr=self.end_lr) + return learning_rate