diff --git a/ppocr/modeling/necks/__init__.py b/ppocr/modeling/necks/__init__.py index 9a4218d1a90fd5d4051969968ba1d3b145f25a92..aa1d77ba748e4818bd2af963abdb9e49f2bea95c 100644 --- a/ppocr/modeling/necks/__init__.py +++ b/ppocr/modeling/necks/__init__.py @@ -16,7 +16,7 @@ __all__ = ['build_neck'] def build_neck(config): - from .db_fpn import DBFPN, CAFPN, FEPAN + from .db_fpn import DBFPN, CAFPN, FEPAN, FEPANLite from .east_fpn import EASTFPN from .sast_fpn import SASTFPN from .rnn import SequenceEncoder @@ -26,8 +26,8 @@ def build_neck(config): from .fce_fpn import FCEFPN from .pren_fpn import PRENFPN support_dict = [ - 'FPN', 'FCEFPN', 'FEPAN', 'DBFPN', 'CAFPN', 'EASTFPN', 'SASTFPN', - 'SequenceEncoder', 'PGFPN', 'TableFPN', 'PRENFPN' + 'FPN', 'FCEFPN', 'FEPAN', 'FEPANLite', 'DBFPN', 'CAFPN', 'EASTFPN', + 'SASTFPN', 'SequenceEncoder', 'PGFPN', 'TableFPN', 'PRENFPN' ] module_name = config.pop('name') diff --git a/ppocr/modeling/necks/db_fpn.py b/ppocr/modeling/necks/db_fpn.py index 0c7593f49313bf67e131267a69f0b5a76d418e05..47a258e2a821366ef977251d47eb05bfeb96b45e 100644 --- a/ppocr/modeling/necks/db_fpn.py +++ b/ppocr/modeling/necks/db_fpn.py @@ -30,7 +30,7 @@ sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../../..'))) from ppocr.modeling.backbones.det_mobilenet_v3 import SEModule -class ConvBNLayer(nn.Layer): +class DSConv(nn.Layer): def __init__(self, in_channels, out_channels, @@ -40,7 +40,7 @@ class ConvBNLayer(nn.Layer): groups=None, if_act=True, act="relu"): - super(ConvBNLayer, self).__init__() + super(DSConv, self).__init__() if groups == None: groups = in_channels self.if_act = if_act @@ -268,23 +268,109 @@ class FEPAN(nn.Layer): self.out_channels = out_channels weight_attr = paddle.nn.initializer.KaimingUniform() - self.ins_conv = [] - self.inp_conv = [] + self.ins_conv = nn.LayerList() + self.inp_conv = nn.LayerList() # pan head - self.pan_head_conv = [] - self.pan_lat_conv = [] + self.pan_head_conv = nn.LayerList() + self.pan_lat_conv = nn.LayerList() for i in range(len(in_channels)): self.ins_conv.append( nn.Conv2D( - in_channels=in_channels[0], + in_channels=in_channels[i], out_channels=self.out_channels, kernel_size=1, weight_attr=ParamAttr(initializer=weight_attr), bias_attr=False)) self.inp_conv.append( - ConvBNLayer( + nn.Conv2D( + in_channels=self.out_channels, + out_channels=self.out_channels // 4, + kernel_size=9, + padding=4, + weight_attr=ParamAttr(initializer=weight_attr), + bias_attr=False)) + + if i > 0: + self.pan_head_conv.append( + nn.Conv2D( + in_channels=self.out_channels // 4, + out_channels=self.out_channels // 4, + kernel_size=3, + padding=1, + stride=2, + weight_attr=ParamAttr(initializer=weight_attr), + bias_attr=False)) + self.pan_lat_conv.append( + nn.Conv2D( + in_channels=self.out_channels // 4, + out_channels=self.out_channels // 4, + kernel_size=9, + padding=4, + weight_attr=ParamAttr(initializer=weight_attr), + bias_attr=False)) + + def forward(self, x): + c2, c3, c4, c5 = x + + in5 = self.ins_conv[3](c5) + in4 = self.ins_conv[2](c4) + in3 = self.ins_conv[1](c3) + in2 = self.ins_conv[0](c2) + + out4 = in4 + F.upsample( + in5, scale_factor=2, mode="nearest", align_mode=1) # 1/16 + out3 = in3 + F.upsample( + out4, scale_factor=2, mode="nearest", align_mode=1) # 1/8 + out2 = in2 + F.upsample( + out3, scale_factor=2, mode="nearest", align_mode=1) # 1/4 + + f5 = self.inp_conv[3](in5) + f4 = self.inp_conv[2](out4) + f3 = self.inp_conv[1](out3) + f2 = self.inp_conv[0](out2) + + pan3 = f3 + self.pan_head_conv[0](f2) + pan4 = f4 + self.pan_head_conv[1](pan3) + pan5 = f5 + self.pan_head_conv[2](pan4) + + p2 = self.pan_lat_conv[0](f2) + p3 = self.pan_lat_conv[1](pan3) + p4 = self.pan_lat_conv[2](pan4) + p5 = self.pan_lat_conv[3](pan5) + + p5 = F.upsample(p5, scale_factor=8, mode="nearest", align_mode=1) + p4 = F.upsample(p4, scale_factor=4, mode="nearest", align_mode=1) + p3 = F.upsample(p3, scale_factor=2, mode="nearest", align_mode=1) + + fuse = paddle.concat([p5, p4, p3, p2], axis=1) + return fuse + + +class FEPANLite(nn.Layer): + def __init__(self, in_channels, out_channels, **kwargs): + super(FEPANLite, self).__init__() + self.out_channels = out_channels + weight_attr = paddle.nn.initializer.KaimingUniform() + + self.ins_conv = nn.LayerList() + self.inp_conv = nn.LayerList() + # pan head + self.pan_head_conv = nn.LayerList() + self.pan_lat_conv = nn.LayerList() + + for i in range(len(in_channels)): + self.ins_conv.append( + nn.Conv2D( + in_channels=in_channels[i], + out_channels=self.out_channels, + kernel_size=1, + weight_attr=ParamAttr(initializer=weight_attr), + bias_attr=False)) + + self.inp_conv.append( + DSConv( in_channels=self.out_channels, out_channels=self.out_channels // 4, kernel_size=9, @@ -300,8 +386,9 @@ class FEPAN(nn.Layer): stride=2, weight_attr=ParamAttr(initializer=weight_attr), bias_attr=False)) + self.pan_lat_conv.append( - ConvBNLayer( + DSConv( in_channels=self.out_channels // 4, out_channels=self.out_channels // 4, kernel_size=9, @@ -327,14 +414,14 @@ class FEPAN(nn.Layer): f3 = self.inp_conv[1](out3) f2 = self.inp_conv[0](out2) - pan3 = f3 + self.pan_head[0](f2) - pan4 = f4 + self.pan_head[1](pan3) - pan5 = f5 + self.pan_head[2](pan4) + pan3 = f3 + self.pan_head_conv[0](f2) + pan4 = f4 + self.pan_head_conv[1](pan3) + pan5 = f5 + self.pan_head_conv[2](pan4) - p2 = self.pan_lat[0](f2) - p3 = self.pan_lat[1](pan3) - p4 = self.pan_lat[2](pan4) - p5 = self.pan_lat[3](pan5) + p2 = self.pan_lat_conv[0](f2) + p3 = self.pan_lat_conv[1](pan3) + p4 = self.pan_lat_conv[2](pan4) + p5 = self.pan_lat_conv[3](pan5) p5 = F.upsample(p5, scale_factor=8, mode="nearest", align_mode=1) p4 = F.upsample(p4, scale_factor=4, mode="nearest", align_mode=1)