det_sast_head.py 14.2 KB
Newer Older
L
licx 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import paddle.fluid as fluid
from ..common_functions import conv_bn_layer, deconv_bn_layer
from collections import OrderedDict


class SASTHead(object):
    """
    SAST: 
L
licx 已提交
27
        see arxiv: https://arxiv.org/abs/1908.05498
L
licx 已提交
28 29 30 31 32 33 34 35 36 37 38 39 40
    args:
        params(dict): the super parameters for network build
    """

    def __init__(self, params):
        self.model_name = params['model_name']
        self.with_cab = params['with_cab']

    def FPN_Up_Fusion(self, blocks):
        """
        blocks{}: contain block_2, block_3, block_4, block_5, block_6, block_7 with
                1/4, 1/8, 1/16, 1/32, 1/64, 1/128 resolution.
        """
T
tink2123 已提交
41 42 43 44
        f = [
            blocks['block_6'], blocks['block_5'], blocks['block_4'],
            blocks['block_3'], blocks['block_2']
        ]
L
licx 已提交
45 46
        num_outputs = [256, 256, 192, 192, 128]
        g = [None, None, None, None, None]
T
tink2123 已提交
47
        h = [None, None, None, None, None]
L
licx 已提交
48
        for i in range(5):
T
tink2123 已提交
49 50 51 52 53 54 55
            h[i] = conv_bn_layer(
                input=f[i],
                num_filters=num_outputs[i],
                filter_size=1,
                stride=1,
                act=None,
                name='fpn_up_h' + str(i))
L
licx 已提交
56 57 58

        for i in range(4):
            if i == 0:
T
tink2123 已提交
59 60 61 62 63
                g[i] = deconv_bn_layer(
                    input=h[i],
                    num_filters=num_outputs[i + 1],
                    act=None,
                    name='fpn_up_g0')
L
licx 已提交
64
                #print("g[{}] shape: {}".format(i, g[i].shape))
L
licx 已提交
65 66 67 68 69
            else:
                g[i] = fluid.layers.elementwise_add(x=g[i - 1], y=h[i])
                g[i] = fluid.layers.relu(g[i])
                #g[i] = conv_bn_layer(input=g[i], num_filters=num_outputs[i],
                #                    filter_size=1, stride=1, act='relu')
T
tink2123 已提交
70 71 72 73 74 75 76 77 78 79 80 81
                g[i] = conv_bn_layer(
                    input=g[i],
                    num_filters=num_outputs[i],
                    filter_size=3,
                    stride=1,
                    act='relu',
                    name='fpn_up_g%d_1' % i)
                g[i] = deconv_bn_layer(
                    input=g[i],
                    num_filters=num_outputs[i + 1],
                    act=None,
                    name='fpn_up_g%d_2' % i)
L
licx 已提交
82
                #print("g[{}] shape: {}".format(i, g[i].shape))
L
licx 已提交
83 84 85

        g[4] = fluid.layers.elementwise_add(x=g[3], y=h[4])
        g[4] = fluid.layers.relu(g[4])
T
tink2123 已提交
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
        g[4] = conv_bn_layer(
            input=g[4],
            num_filters=num_outputs[4],
            filter_size=3,
            stride=1,
            act='relu',
            name='fpn_up_fusion_1')
        g[4] = conv_bn_layer(
            input=g[4],
            num_filters=num_outputs[4],
            filter_size=1,
            stride=1,
            act=None,
            name='fpn_up_fusion_2')

L
licx 已提交
101 102 103 104 105 106 107 108 109 110
        return g[4]

    def FPN_Down_Fusion(self, blocks):
        """
        blocks{}: contain block_2, block_3, block_4, block_5, block_6, block_7 with
                1/4, 1/8, 1/16, 1/32, 1/64, 1/128 resolution.
        """
        f = [blocks['block_0'], blocks['block_1'], blocks['block_2']]
        num_outputs = [32, 64, 128]
        g = [None, None, None]
T
tink2123 已提交
111
        h = [None, None, None]
L
licx 已提交
112
        for i in range(3):
T
tink2123 已提交
113 114 115 116 117 118 119
            h[i] = conv_bn_layer(
                input=f[i],
                num_filters=num_outputs[i],
                filter_size=3,
                stride=1,
                act=None,
                name='fpn_down_h' + str(i))
L
licx 已提交
120 121
        for i in range(2):
            if i == 0:
T
tink2123 已提交
122 123 124 125 126 127 128
                g[i] = conv_bn_layer(
                    input=h[i],
                    num_filters=num_outputs[i + 1],
                    filter_size=3,
                    stride=2,
                    act=None,
                    name='fpn_down_g0')
L
licx 已提交
129 130 131
            else:
                g[i] = fluid.layers.elementwise_add(x=g[i - 1], y=h[i])
                g[i] = fluid.layers.relu(g[i])
T
tink2123 已提交
132 133 134 135 136 137 138 139 140 141 142 143 144 145
                g[i] = conv_bn_layer(
                    input=g[i],
                    num_filters=num_outputs[i],
                    filter_size=3,
                    stride=1,
                    act='relu',
                    name='fpn_down_g%d_1' % i)
                g[i] = conv_bn_layer(
                    input=g[i],
                    num_filters=num_outputs[i + 1],
                    filter_size=3,
                    stride=2,
                    act=None,
                    name='fpn_down_g%d_2' % i)
L
licx 已提交
146
            # print("g[{}] shape: {}".format(i, g[i].shape)) 
L
licx 已提交
147 148
        g[2] = fluid.layers.elementwise_add(x=g[1], y=h[2])
        g[2] = fluid.layers.relu(g[2])
T
tink2123 已提交
149 150 151 152 153 154 155 156 157 158 159 160 161 162
        g[2] = conv_bn_layer(
            input=g[2],
            num_filters=num_outputs[2],
            filter_size=3,
            stride=1,
            act='relu',
            name='fpn_down_fusion_1')
        g[2] = conv_bn_layer(
            input=g[2],
            num_filters=num_outputs[2],
            filter_size=1,
            stride=1,
            act=None,
            name='fpn_down_fusion_2')
L
licx 已提交
163 164 165 166 167
        return g[2]

    def SAST_Header1(self, f_common):
        """Detector header."""
        #f_score
T
tink2123 已提交
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194
        f_score = conv_bn_layer(
            input=f_common,
            num_filters=64,
            filter_size=1,
            stride=1,
            act='relu',
            name='f_score1')
        f_score = conv_bn_layer(
            input=f_score,
            num_filters=64,
            filter_size=3,
            stride=1,
            act='relu',
            name='f_score2')
        f_score = conv_bn_layer(
            input=f_score,
            num_filters=128,
            filter_size=1,
            stride=1,
            act='relu',
            name='f_score3')
        f_score = conv_bn_layer(
            input=f_score,
            num_filters=1,
            filter_size=3,
            stride=1,
            name='f_score4')
L
licx 已提交
195
        f_score = fluid.layers.sigmoid(f_score)
L
licx 已提交
196
        # print("f_score shape: {}".format(f_score.shape))
L
licx 已提交
197 198

        #f_boder
T
tink2123 已提交
199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
        f_border = conv_bn_layer(
            input=f_common,
            num_filters=64,
            filter_size=1,
            stride=1,
            act='relu',
            name='f_border1')
        f_border = conv_bn_layer(
            input=f_border,
            num_filters=64,
            filter_size=3,
            stride=1,
            act='relu',
            name='f_border2')
        f_border = conv_bn_layer(
            input=f_border,
            num_filters=128,
            filter_size=1,
            stride=1,
            act='relu',
            name='f_border3')
        f_border = conv_bn_layer(
            input=f_border,
            num_filters=4,
            filter_size=3,
            stride=1,
            name='f_border4')
L
licx 已提交
226
        # print("f_border shape: {}".format(f_border.shape))
T
tink2123 已提交
227

L
licx 已提交
228 229 230
        return f_score, f_border

    def SAST_Header2(self, f_common):
T
tink2123 已提交
231
        """Detector header."""
L
licx 已提交
232
        #f_tvo
T
tink2123 已提交
233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
        f_tvo = conv_bn_layer(
            input=f_common,
            num_filters=64,
            filter_size=1,
            stride=1,
            act='relu',
            name='f_tvo1')
        f_tvo = conv_bn_layer(
            input=f_tvo,
            num_filters=64,
            filter_size=3,
            stride=1,
            act='relu',
            name='f_tvo2')
        f_tvo = conv_bn_layer(
            input=f_tvo,
            num_filters=128,
            filter_size=1,
            stride=1,
            act='relu',
            name='f_tvo3')
        f_tvo = conv_bn_layer(
            input=f_tvo, num_filters=8, filter_size=3, stride=1, name='f_tvo4')
L
licx 已提交
256
        # print("f_tvo shape: {}".format(f_tvo.shape))
L
licx 已提交
257 258

        #f_tco
T
tink2123 已提交
259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281
        f_tco = conv_bn_layer(
            input=f_common,
            num_filters=64,
            filter_size=1,
            stride=1,
            act='relu',
            name='f_tco1')
        f_tco = conv_bn_layer(
            input=f_tco,
            num_filters=64,
            filter_size=3,
            stride=1,
            act='relu',
            name='f_tco2')
        f_tco = conv_bn_layer(
            input=f_tco,
            num_filters=128,
            filter_size=1,
            stride=1,
            act='relu',
            name='f_tco3')
        f_tco = conv_bn_layer(
            input=f_tco, num_filters=2, filter_size=3, stride=1, name='f_tco4')
L
licx 已提交
282
        # print("f_tco shape: {}".format(f_tco.shape))
T
tink2123 已提交
283

L
licx 已提交
284 285 286 287 288 289
        return f_tvo, f_tco

    def cross_attention(self, f_common):
        """
        """
        f_shape = fluid.layers.shape(f_common)
T
tink2123 已提交
290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310
        f_theta = conv_bn_layer(
            input=f_common,
            num_filters=128,
            filter_size=1,
            stride=1,
            act='relu',
            name='f_theta')
        f_phi = conv_bn_layer(
            input=f_common,
            num_filters=128,
            filter_size=1,
            stride=1,
            act='relu',
            name='f_phi')
        f_g = conv_bn_layer(
            input=f_common,
            num_filters=128,
            filter_size=1,
            stride=1,
            act='relu',
            name='f_g')
L
licx 已提交
311 312 313 314 315 316
        ### horizon
        fh_theta = f_theta
        fh_phi = f_phi
        fh_g = f_g
        #flatten
        fh_theta = fluid.layers.transpose(fh_theta, [0, 2, 3, 1])
T
tink2123 已提交
317 318
        fh_theta = fluid.layers.reshape(
            fh_theta, [f_shape[0] * f_shape[2], f_shape[3], 128])
L
licx 已提交
319
        fh_phi = fluid.layers.transpose(fh_phi, [0, 2, 3, 1])
T
tink2123 已提交
320 321
        fh_phi = fluid.layers.reshape(
            fh_phi, [f_shape[0] * f_shape[2], f_shape[3], 128])
L
licx 已提交
322
        fh_g = fluid.layers.transpose(fh_g, [0, 2, 3, 1])
T
tink2123 已提交
323 324
        fh_g = fluid.layers.reshape(fh_g,
                                    [f_shape[0] * f_shape[2], f_shape[3], 128])
L
licx 已提交
325
        #correlation
T
tink2123 已提交
326 327
        fh_attn = fluid.layers.matmul(fh_theta,
                                      fluid.layers.transpose(fh_phi, [0, 2, 1]))
L
licx 已提交
328
        #scale
T
tink2123 已提交
329
        fh_attn = fh_attn / (128**0.5)
L
licx 已提交
330 331 332
        fh_attn = fluid.layers.softmax(fh_attn)
        #weighted sum
        fh_weight = fluid.layers.matmul(fh_attn, fh_g)
T
tink2123 已提交
333 334
        fh_weight = fluid.layers.reshape(
            fh_weight, [f_shape[0], f_shape[2], f_shape[3], 128])
L
licx 已提交
335
        # print("fh_weight: {}".format(fh_weight.shape))
L
licx 已提交
336
        fh_weight = fluid.layers.transpose(fh_weight, [0, 3, 1, 2])
T
tink2123 已提交
337 338 339 340 341 342
        fh_weight = conv_bn_layer(
            input=fh_weight,
            num_filters=128,
            filter_size=1,
            stride=1,
            name='fh_weight')
L
licx 已提交
343
        #short cut
T
tink2123 已提交
344 345 346 347 348 349
        fh_sc = conv_bn_layer(
            input=f_common,
            num_filters=128,
            filter_size=1,
            stride=1,
            name='fh_sc')
L
licx 已提交
350 351 352 353 354 355 356 357
        f_h = fluid.layers.relu(fh_weight + fh_sc)
        ######
        #vertical
        fv_theta = fluid.layers.transpose(f_theta, [0, 1, 3, 2])
        fv_phi = fluid.layers.transpose(f_phi, [0, 1, 3, 2])
        fv_g = fluid.layers.transpose(f_g, [0, 1, 3, 2])
        #flatten
        fv_theta = fluid.layers.transpose(fv_theta, [0, 2, 3, 1])
T
tink2123 已提交
358 359
        fv_theta = fluid.layers.reshape(
            fv_theta, [f_shape[0] * f_shape[3], f_shape[2], 128])
L
licx 已提交
360
        fv_phi = fluid.layers.transpose(fv_phi, [0, 2, 3, 1])
T
tink2123 已提交
361 362
        fv_phi = fluid.layers.reshape(
            fv_phi, [f_shape[0] * f_shape[3], f_shape[2], 128])
L
licx 已提交
363
        fv_g = fluid.layers.transpose(fv_g, [0, 2, 3, 1])
T
tink2123 已提交
364 365
        fv_g = fluid.layers.reshape(fv_g,
                                    [f_shape[0] * f_shape[3], f_shape[2], 128])
L
licx 已提交
366
        #correlation
T
tink2123 已提交
367 368
        fv_attn = fluid.layers.matmul(fv_theta,
                                      fluid.layers.transpose(fv_phi, [0, 2, 1]))
L
licx 已提交
369
        #scale
T
tink2123 已提交
370
        fv_attn = fv_attn / (128**0.5)
L
licx 已提交
371 372 373
        fv_attn = fluid.layers.softmax(fv_attn)
        #weighted sum
        fv_weight = fluid.layers.matmul(fv_attn, fv_g)
T
tink2123 已提交
374 375
        fv_weight = fluid.layers.reshape(
            fv_weight, [f_shape[0], f_shape[3], f_shape[2], 128])
L
licx 已提交
376
        # print("fv_weight: {}".format(fv_weight.shape))
L
licx 已提交
377
        fv_weight = fluid.layers.transpose(fv_weight, [0, 3, 2, 1])
T
tink2123 已提交
378 379 380 381 382 383
        fv_weight = conv_bn_layer(
            input=fv_weight,
            num_filters=128,
            filter_size=1,
            stride=1,
            name='fv_weight')
L
licx 已提交
384
        #short cut
T
tink2123 已提交
385 386 387 388 389 390
        fv_sc = conv_bn_layer(
            input=f_common,
            num_filters=128,
            filter_size=1,
            stride=1,
            name='fv_sc')
L
licx 已提交
391 392 393
        f_v = fluid.layers.relu(fv_weight + fv_sc)
        ######
        f_attn = fluid.layers.concat([f_h, f_v], axis=1)
T
tink2123 已提交
394 395 396 397 398 399 400
        f_attn = conv_bn_layer(
            input=f_attn,
            num_filters=128,
            filter_size=1,
            stride=1,
            act='relu',
            name='f_attn')
L
licx 已提交
401
        return f_attn
T
tink2123 已提交
402

L
licx 已提交
403
    def __call__(self, blocks, with_cab=False):
T
tink2123 已提交
404 405 406 407 408 409 410 411
        """
        Fuse different levels of feature map from backbone and predict results
        Args:
            blocks(list): feature maps from backbone
            with_cab(bool): whether use cross_attention
        Return: predicts
        """

L
licx 已提交
412 413
        # for k, v in blocks.items():
        #     print(k, v.shape)
L
licx 已提交
414 415 416

        #down fpn
        f_down = self.FPN_Down_Fusion(blocks)
L
licx 已提交
417
        # print("f_down shape: {}".format(f_down.shape))
L
licx 已提交
418 419
        #up fpn
        f_up = self.FPN_Up_Fusion(blocks)
L
licx 已提交
420
        # print("f_up shape: {}".format(f_up.shape))
L
licx 已提交
421 422 423
        #fusion
        f_common = fluid.layers.elementwise_add(x=f_down, y=f_up)
        f_common = fluid.layers.relu(f_common)
L
licx 已提交
424
        # print("f_common: {}".format(f_common.shape))
T
tink2123 已提交
425

L
licx 已提交
426
        if self.with_cab:
L
licx 已提交
427
            # print('enhence f_common with CAB.')
L
licx 已提交
428
            f_common = self.cross_attention(f_common)
T
tink2123 已提交
429 430

        f_score, f_border = self.SAST_Header1(f_common)
L
licx 已提交
431 432 433 434 435 436 437
        f_tvo, f_tco = self.SAST_Header2(f_common)

        predicts = OrderedDict()
        predicts['f_score'] = f_score
        predicts['f_border'] = f_border
        predicts['f_tvo'] = f_tvo
        predicts['f_tco'] = f_tco
T
tink2123 已提交
438
        return predicts