db_fpn.py 15.7 KB
Newer Older
W
WenmuZhou 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import paddle
from paddle import nn
import paddle.nn.functional as F
from paddle import ParamAttr
L
LDOUBLEV 已提交
23
import os
L
fix det  
LDOUBLEV 已提交
24
import sys
H
huangjun12 已提交
25
from ppocr.modeling.necks.intracl import IntraCLBlock
L
fix det  
LDOUBLEV 已提交
26

L
fix  
LDOUBLEV 已提交
27 28 29 30
__dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__)
sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../../..')))

L
LDOUBLEV 已提交
31 32 33
from ppocr.modeling.backbones.det_mobilenet_v3 import SEModule


L
LDOUBLEV 已提交
34
class DSConv(nn.Layer):
L
LDOUBLEV 已提交
35 36 37 38 39 40 41 42
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 padding,
                 stride=1,
                 groups=None,
                 if_act=True,
L
LDOUBLEV 已提交
43 44
                 act="relu",
                 **kwargs):
L
LDOUBLEV 已提交
45
        super(DSConv, self).__init__()
L
LDOUBLEV 已提交
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
        if groups == None:
            groups = in_channels
        self.if_act = if_act
        self.act = act
        self.conv1 = nn.Conv2D(
            in_channels=in_channels,
            out_channels=in_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            groups=groups,
            bias_attr=False)

        self.bn1 = nn.BatchNorm(num_channels=in_channels, act=None)

        self.conv2 = nn.Conv2D(
            in_channels=in_channels,
            out_channels=int(in_channels * 4),
            kernel_size=1,
            stride=1,
            bias_attr=False)

        self.bn2 = nn.BatchNorm(num_channels=int(in_channels * 4), act=None)

        self.conv3 = nn.Conv2D(
            in_channels=int(in_channels * 4),
            out_channels=out_channels,
            kernel_size=1,
            stride=1,
            bias_attr=False)
        self._c = [in_channels, out_channels]
        if in_channels != out_channels:
            self.conv_end = nn.Conv2D(
                in_channels=in_channels,
                out_channels=out_channels,
                kernel_size=1,
                stride=1,
                bias_attr=False)

    def forward(self, inputs):

        x = self.conv1(inputs)
        x = self.bn1(x)

        x = self.conv2(x)
        x = self.bn2(x)
        if self.if_act:
            if self.act == "relu":
                x = F.relu(x)
            elif self.act == "hardswish":
                x = F.hardswish(x)
            else:
                print("The activation function({}) is selected incorrectly.".
                      format(self.act))
                exit()

        x = self.conv3(x)
        if self._c[0] != self._c[1]:
            x = x + self.conv_end(inputs)
        return x
W
WenmuZhou 已提交
106 107


D
dyning 已提交
108
class DBFPN(nn.Layer):
W
wangjingyeye 已提交
109
    def __init__(self, in_channels, out_channels, use_asf=False, **kwargs):
D
dyning 已提交
110
        super(DBFPN, self).__init__()
W
WenmuZhou 已提交
111
        self.out_channels = out_channels
W
wangjingyeye 已提交
112
        self.use_asf = use_asf
W
WenmuZhou 已提交
113
        weight_attr = paddle.nn.initializer.KaimingUniform()
W
WenmuZhou 已提交
114

D
dyning 已提交
115
        self.in2_conv = nn.Conv2D(
W
WenmuZhou 已提交
116 117 118
            in_channels=in_channels[0],
            out_channels=self.out_channels,
            kernel_size=1,
littletomatodonkey's avatar
littletomatodonkey 已提交
119
            weight_attr=ParamAttr(initializer=weight_attr),
W
WenmuZhou 已提交
120
            bias_attr=False)
D
dyning 已提交
121
        self.in3_conv = nn.Conv2D(
W
WenmuZhou 已提交
122 123 124
            in_channels=in_channels[1],
            out_channels=self.out_channels,
            kernel_size=1,
littletomatodonkey's avatar
littletomatodonkey 已提交
125
            weight_attr=ParamAttr(initializer=weight_attr),
W
WenmuZhou 已提交
126
            bias_attr=False)
D
dyning 已提交
127
        self.in4_conv = nn.Conv2D(
W
WenmuZhou 已提交
128 129 130
            in_channels=in_channels[2],
            out_channels=self.out_channels,
            kernel_size=1,
littletomatodonkey's avatar
littletomatodonkey 已提交
131
            weight_attr=ParamAttr(initializer=weight_attr),
W
WenmuZhou 已提交
132
            bias_attr=False)
D
dyning 已提交
133
        self.in5_conv = nn.Conv2D(
W
WenmuZhou 已提交
134 135 136
            in_channels=in_channels[3],
            out_channels=self.out_channels,
            kernel_size=1,
littletomatodonkey's avatar
littletomatodonkey 已提交
137
            weight_attr=ParamAttr(initializer=weight_attr),
W
WenmuZhou 已提交
138
            bias_attr=False)
D
dyning 已提交
139
        self.p5_conv = nn.Conv2D(
W
WenmuZhou 已提交
140 141 142 143
            in_channels=self.out_channels,
            out_channels=self.out_channels // 4,
            kernel_size=3,
            padding=1,
littletomatodonkey's avatar
littletomatodonkey 已提交
144
            weight_attr=ParamAttr(initializer=weight_attr),
W
WenmuZhou 已提交
145
            bias_attr=False)
D
dyning 已提交
146
        self.p4_conv = nn.Conv2D(
W
WenmuZhou 已提交
147 148 149 150
            in_channels=self.out_channels,
            out_channels=self.out_channels // 4,
            kernel_size=3,
            padding=1,
littletomatodonkey's avatar
littletomatodonkey 已提交
151
            weight_attr=ParamAttr(initializer=weight_attr),
W
WenmuZhou 已提交
152
            bias_attr=False)
D
dyning 已提交
153
        self.p3_conv = nn.Conv2D(
W
WenmuZhou 已提交
154 155 156 157
            in_channels=self.out_channels,
            out_channels=self.out_channels // 4,
            kernel_size=3,
            padding=1,
littletomatodonkey's avatar
littletomatodonkey 已提交
158
            weight_attr=ParamAttr(initializer=weight_attr),
W
WenmuZhou 已提交
159
            bias_attr=False)
D
dyning 已提交
160
        self.p2_conv = nn.Conv2D(
W
WenmuZhou 已提交
161 162 163 164
            in_channels=self.out_channels,
            out_channels=self.out_channels // 4,
            kernel_size=3,
            padding=1,
littletomatodonkey's avatar
littletomatodonkey 已提交
165
            weight_attr=ParamAttr(initializer=weight_attr),
W
WenmuZhou 已提交
166 167
            bias_attr=False)

W
wangjingyeye 已提交
168
        if self.use_asf is True:
W
wangjingyeye 已提交
169 170
            self.asf = ASFBlock(self.out_channels, self.out_channels // 4)

W
WenmuZhou 已提交
171 172 173 174 175 176 177 178
    def forward(self, x):
        c2, c3, c4, c5 = x

        in5 = self.in5_conv(c5)
        in4 = self.in4_conv(c4)
        in3 = self.in3_conv(c3)
        in2 = self.in2_conv(c2)

W
WenmuZhou 已提交
179 180 181 182 183 184
        out4 = in4 + F.upsample(
            in5, scale_factor=2, mode="nearest", align_mode=1)  # 1/16
        out3 = in3 + F.upsample(
            out4, scale_factor=2, mode="nearest", align_mode=1)  # 1/8
        out2 = in2 + F.upsample(
            out3, scale_factor=2, mode="nearest", align_mode=1)  # 1/4
W
WenmuZhou 已提交
185 186 187 188 189

        p5 = self.p5_conv(in5)
        p4 = self.p4_conv(out4)
        p3 = self.p3_conv(out3)
        p2 = self.p2_conv(out2)
W
WenmuZhou 已提交
190 191 192
        p5 = F.upsample(p5, scale_factor=8, mode="nearest", align_mode=1)
        p4 = F.upsample(p4, scale_factor=4, mode="nearest", align_mode=1)
        p3 = F.upsample(p3, scale_factor=2, mode="nearest", align_mode=1)
W
WenmuZhou 已提交
193 194

        fuse = paddle.concat([p5, p4, p3, p2], axis=1)
W
wangjingyeye 已提交
195

W
wangjingyeye 已提交
196
        if self.use_asf is True:
W
wangjingyeye 已提交
197 198
            fuse = self.asf(fuse, [p5, p4, p3, p2])

W
WenmuZhou 已提交
199
        return fuse
L
LDOUBLEV 已提交
200 201


L
rename  
LDOUBLEV 已提交
202
class RSELayer(nn.Layer):
L
LDOUBLEV 已提交
203
    def __init__(self, in_channels, out_channels, kernel_size, shortcut=True):
L
rename  
LDOUBLEV 已提交
204
        super(RSELayer, self).__init__()
L
LDOUBLEV 已提交
205
        weight_attr = paddle.nn.initializer.KaimingUniform()
L
fix  
LDOUBLEV 已提交
206
        self.out_channels = out_channels
L
LDOUBLEV 已提交
207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
        self.in_conv = nn.Conv2D(
            in_channels=in_channels,
            out_channels=self.out_channels,
            kernel_size=kernel_size,
            padding=int(kernel_size // 2),
            weight_attr=ParamAttr(initializer=weight_attr),
            bias_attr=False)
        self.se_block = SEModule(self.out_channels)
        self.shortcut = shortcut

    def forward(self, ins):
        x = self.in_conv(ins)
        if self.shortcut:
            out = x + self.se_block(x)
        else:
            out = self.se_block(x)
        return out


L
rename  
LDOUBLEV 已提交
226
class RSEFPN(nn.Layer):
L
fix det  
LDOUBLEV 已提交
227
    def __init__(self, in_channels, out_channels, shortcut=True, **kwargs):
L
rename  
LDOUBLEV 已提交
228
        super(RSEFPN, self).__init__()
L
fix det  
LDOUBLEV 已提交
229
        self.out_channels = out_channels
L
LDOUBLEV 已提交
230 231
        self.ins_conv = nn.LayerList()
        self.inp_conv = nn.LayerList()
H
huangjun12 已提交
232 233 234 235 236 237 238
        self.intracl = False
        if 'intracl' in kwargs.keys() and kwargs['intracl'] is True:
            self.intracl = kwargs['intracl']
            self.incl1 = IntraCLBlock(self.out_channels // 4, reduce_factor=2)
            self.incl2 = IntraCLBlock(self.out_channels // 4, reduce_factor=2)
            self.incl3 = IntraCLBlock(self.out_channels // 4, reduce_factor=2)
            self.incl4 = IntraCLBlock(self.out_channels // 4, reduce_factor=2)
L
LDOUBLEV 已提交
239 240 241

        for i in range(len(in_channels)):
            self.ins_conv.append(
L
LDOUBLEV 已提交
242
                RSELayer(
L
LDOUBLEV 已提交
243 244 245 246 247
                    in_channels[i],
                    out_channels,
                    kernel_size=1,
                    shortcut=shortcut))
            self.inp_conv.append(
L
LDOUBLEV 已提交
248
                RSELayer(
L
LDOUBLEV 已提交
249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273
                    out_channels,
                    out_channels // 4,
                    kernel_size=3,
                    shortcut=shortcut))

    def forward(self, x):
        c2, c3, c4, c5 = x

        in5 = self.ins_conv[3](c5)
        in4 = self.ins_conv[2](c4)
        in3 = self.ins_conv[1](c3)
        in2 = self.ins_conv[0](c2)

        out4 = in4 + F.upsample(
            in5, scale_factor=2, mode="nearest", align_mode=1)  # 1/16
        out3 = in3 + F.upsample(
            out4, scale_factor=2, mode="nearest", align_mode=1)  # 1/8
        out2 = in2 + F.upsample(
            out3, scale_factor=2, mode="nearest", align_mode=1)  # 1/4

        p5 = self.inp_conv[3](in5)
        p4 = self.inp_conv[2](out4)
        p3 = self.inp_conv[1](out3)
        p2 = self.inp_conv[0](out2)

H
huangjun12 已提交
274 275 276 277 278 279
        if self.intracl is True:
            p5 = self.incl4(p5)
            p4 = self.incl3(p4)
            p3 = self.incl2(p3)
            p2 = self.incl1(p2)

L
LDOUBLEV 已提交
280 281 282 283 284 285
        p5 = F.upsample(p5, scale_factor=8, mode="nearest", align_mode=1)
        p4 = F.upsample(p4, scale_factor=4, mode="nearest", align_mode=1)
        p3 = F.upsample(p3, scale_factor=2, mode="nearest", align_mode=1)

        fuse = paddle.concat([p5, p4, p3, p2], axis=1)
        return fuse
L
LDOUBLEV 已提交
286 287


L
rename  
LDOUBLEV 已提交
288
class LKPAN(nn.Layer):
L
LDOUBLEV 已提交
289
    def __init__(self, in_channels, out_channels, mode='large', **kwargs):
L
rename  
LDOUBLEV 已提交
290
        super(LKPAN, self).__init__()
L
LDOUBLEV 已提交
291 292 293
        self.out_channels = out_channels
        weight_attr = paddle.nn.initializer.KaimingUniform()

L
LDOUBLEV 已提交
294 295
        self.ins_conv = nn.LayerList()
        self.inp_conv = nn.LayerList()
L
LDOUBLEV 已提交
296
        # pan head
L
LDOUBLEV 已提交
297 298
        self.pan_head_conv = nn.LayerList()
        self.pan_lat_conv = nn.LayerList()
L
LDOUBLEV 已提交
299

L
LDOUBLEV 已提交
300 301 302 303 304 305 306 307 308
        if mode.lower() == 'lite':
            p_layer = DSConv
        elif mode.lower() == 'large':
            p_layer = nn.Conv2D
        else:
            raise ValueError(
                "mode can only be one of ['lite', 'large'], but received {}".
                format(mode))

L
LDOUBLEV 已提交
309 310 311
        for i in range(len(in_channels)):
            self.ins_conv.append(
                nn.Conv2D(
L
LDOUBLEV 已提交
312
                    in_channels=in_channels[i],
L
LDOUBLEV 已提交
313 314 315 316 317 318
                    out_channels=self.out_channels,
                    kernel_size=1,
                    weight_attr=ParamAttr(initializer=weight_attr),
                    bias_attr=False))

            self.inp_conv.append(
L
LDOUBLEV 已提交
319
                p_layer(
L
LDOUBLEV 已提交
320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337
                    in_channels=self.out_channels,
                    out_channels=self.out_channels // 4,
                    kernel_size=9,
                    padding=4,
                    weight_attr=ParamAttr(initializer=weight_attr),
                    bias_attr=False))

            if i > 0:
                self.pan_head_conv.append(
                    nn.Conv2D(
                        in_channels=self.out_channels // 4,
                        out_channels=self.out_channels // 4,
                        kernel_size=3,
                        padding=1,
                        stride=2,
                        weight_attr=ParamAttr(initializer=weight_attr),
                        bias_attr=False))
            self.pan_lat_conv.append(
L
LDOUBLEV 已提交
338
                p_layer(
L
LDOUBLEV 已提交
339 340 341 342 343 344 345
                    in_channels=self.out_channels // 4,
                    out_channels=self.out_channels // 4,
                    kernel_size=9,
                    padding=4,
                    weight_attr=ParamAttr(initializer=weight_attr),
                    bias_attr=False))

H
huangjun12 已提交
346 347 348 349 350 351 352 353
        self.intracl = False
        if 'intracl' in kwargs.keys() and kwargs['intracl'] is True:
            self.intracl = kwargs['intracl']
            self.incl1 = IntraCLBlock(self.out_channels // 4, reduce_factor=2)
            self.incl2 = IntraCLBlock(self.out_channels // 4, reduce_factor=2)
            self.incl3 = IntraCLBlock(self.out_channels // 4, reduce_factor=2)
            self.incl4 = IntraCLBlock(self.out_channels // 4, reduce_factor=2)

L
LDOUBLEV 已提交
354 355 356 357 358 359 360 361 362 363
    def forward(self, x):
        c2, c3, c4, c5 = x

        in5 = self.ins_conv[3](c5)
        in4 = self.ins_conv[2](c4)
        in3 = self.ins_conv[1](c3)
        in2 = self.ins_conv[0](c2)

        out4 = in4 + F.upsample(
            in5, scale_factor=2, mode="nearest", align_mode=1)  # 1/16
L
LDOUBLEV 已提交
364 365 366 367 368 369 370 371 372 373
        out3 = in3 + F.upsample(
            out4, scale_factor=2, mode="nearest", align_mode=1)  # 1/8
        out2 = in2 + F.upsample(
            out3, scale_factor=2, mode="nearest", align_mode=1)  # 1/4

        f5 = self.inp_conv[3](in5)
        f4 = self.inp_conv[2](out4)
        f3 = self.inp_conv[1](out3)
        f2 = self.inp_conv[0](out2)

L
LDOUBLEV 已提交
374 375 376
        pan3 = f3 + self.pan_head_conv[0](f2)
        pan4 = f4 + self.pan_head_conv[1](pan3)
        pan5 = f5 + self.pan_head_conv[2](pan4)
L
LDOUBLEV 已提交
377

L
LDOUBLEV 已提交
378 379 380 381
        p2 = self.pan_lat_conv[0](f2)
        p3 = self.pan_lat_conv[1](pan3)
        p4 = self.pan_lat_conv[2](pan4)
        p5 = self.pan_lat_conv[3](pan5)
L
LDOUBLEV 已提交
382

H
huangjun12 已提交
383 384 385 386 387 388
        if self.intracl is True:
            p5 = self.incl4(p5)
            p4 = self.incl3(p4)
            p3 = self.incl2(p3)
            p2 = self.incl1(p2)

L
LDOUBLEV 已提交
389 390 391 392 393 394
        p5 = F.upsample(p5, scale_factor=8, mode="nearest", align_mode=1)
        p4 = F.upsample(p4, scale_factor=4, mode="nearest", align_mode=1)
        p3 = F.upsample(p3, scale_factor=2, mode="nearest", align_mode=1)

        fuse = paddle.concat([p5, p4, p3, p2], axis=1)
        return fuse
W
wangjingyeye 已提交
395 396 397


class ASFBlock(nn.Layer):
W
wangjingyeye 已提交
398 399 400 401 402
    """
    This code is refered from:
        https://github.com/MhLiao/DB/blob/master/decoders/feature_attention.py
    """

W
wangjingyeye 已提交
403
    def __init__(self, in_channels, inter_channels, out_features_num=4):
W
wangjingyeye 已提交
404 405 406 407 408 409 410
        """
        Adaptive Scale Fusion (ASF) block of DBNet++
        Args:
            in_channels: the number of channels in the input data
            inter_channels: the number of middle channels
            out_features_num: the number of fused stages
        """
W
wangjingyeye 已提交
411 412 413 414 415 416 417
        super(ASFBlock, self).__init__()
        weight_attr = paddle.nn.initializer.KaimingUniform()
        self.in_channels = in_channels
        self.inter_channels = inter_channels
        self.out_features_num = out_features_num
        self.conv = nn.Conv2D(in_channels, inter_channels, 3, padding=1)

W
wangjingyeye 已提交
418
        self.spatial_scale = nn.Sequential(
W
wangjingyeye 已提交
419 420
            #Nx1xHxW
            nn.Conv2D(
W
wangjingyeye 已提交
421 422 423
                in_channels=1,
                out_channels=1,
                kernel_size=3,
W
wangjingyeye 已提交
424 425 426 427 428
                bias_attr=False,
                padding=1,
                weight_attr=ParamAttr(initializer=weight_attr)),
            nn.ReLU(),
            nn.Conv2D(
W
wangjingyeye 已提交
429 430 431
                in_channels=1,
                out_channels=1,
                kernel_size=1,
W
wangjingyeye 已提交
432 433 434 435
                bias_attr=False,
                weight_attr=ParamAttr(initializer=weight_attr)),
            nn.Sigmoid())

W
wangjingyeye 已提交
436
        self.channel_scale = nn.Sequential(
W
wangjingyeye 已提交
437
            nn.Conv2D(
W
wangjingyeye 已提交
438 439 440
                in_channels=inter_channels,
                out_channels=out_features_num,
                kernel_size=1,
W
wangjingyeye 已提交
441 442 443 444 445 446
                bias_attr=False,
                weight_attr=ParamAttr(initializer=weight_attr)),
            nn.Sigmoid())

    def forward(self, fuse_features, features_list):
        fuse_features = self.conv(fuse_features)
W
wangjingyeye 已提交
447 448 449
        spatial_x = paddle.mean(fuse_features, axis=1, keepdim=True)
        attention_scores = self.spatial_scale(spatial_x) + fuse_features
        attention_scores = self.channel_scale(attention_scores)
W
wangjingyeye 已提交
450 451 452 453 454
        assert len(features_list) == self.out_features_num

        out_list = []
        for i in range(self.out_features_num):
            out_list.append(attention_scores[:, i:i + 1] * features_list[i])
H
huangjun12 已提交
455
        return paddle.concat(out_list, axis=1)