bfp.py

# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import sys

from collections import OrderedDict

from paddle import fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Xavier
from paddle.fluid.regularizer import L2Decay

from ppdet.core.workspace import register

from .nonlocal_helper import add_space_nonlocal
from .fpn import FPN

__all__ = ['BFP']


@register
class BFP(object):
    """
    Libra R-CNN, see https://arxiv.org/abs/1904.02701
    Args:
        base_neck (dict): basic neck before balanced feature pyramid (bfp)
        refine_level (int): index of integration and refine level of bfp
        refine_type (str): refine type, None, conv or nonlocal
        nonlocal_reduction (float): channel reduction level if refine_type is nonlocal
        with_bias (bool): whether the nonlocal module contains bias
        with_scale (bool): whether to scale feature in nonlocal module or not
    """
    __inject__ = ['base_neck']

    def __init__(self,
                 base_neck=FPN().__dict__,
                 refine_level=2,
                 refine_type="nonlocal",
                 nonlocal_reduction=1,
                 with_bias=True,
                 with_scale=False):
        if isinstance(base_neck, dict):
            self.base_neck = FPN(**base_neck)
        self.refine_level = refine_level
        self.refine_type = refine_type
        self.nonlocal_reduction = nonlocal_reduction
        self.with_bias = with_bias
        self.with_scale = with_scale

    def get_output(self, body_dict):
        # top-down order
        res_dict, spatial_scale = self.base_neck.get_output(body_dict)
        res_dict = self.get_output_bfp(res_dict)
        return res_dict, spatial_scale

    def get_output_bfp(self, body_dict):
        body_name_list = list(body_dict.keys())
        num_backbone_stages = len(body_name_list)

        self.num_levels = len(body_dict)

        # step 1: gather multi-level features by resize and average
        feats = []
        refine_level_name = body_name_list[self.refine_level]

        for i in range(self.num_levels):
            curr_fpn_name = body_name_list[i]
            pool_stride = 2**(i - self.refine_level)
            pool_size = [
                body_dict[refine_level_name].shape[2],
                body_dict[refine_level_name].shape[3]
            ]
            if i > self.refine_level:
                gathered = fluid.layers.pool2d(
                    input=body_dict[curr_fpn_name],
                    pool_type='max',
                    pool_size=pool_stride,
                    pool_stride=pool_stride,
                    ceil_mode=True, )
            else:
                gathered = self._resize_input_tensor(
                    body_dict[curr_fpn_name], body_dict[refine_level_name],
                    1.0 / pool_stride)
            feats.append(gathered)

        bsf = sum(feats) / len(feats)

        # step 2: refine gathered features
        if self.refine_type == "conv":
            bsf = fluid.layers.conv2d(
                bsf,
                bsf.shape[1],
                filter_size=3,
                padding=1,
                param_attr=ParamAttr(name="bsf_w"),
                bias_attr=ParamAttr(name="bsf_b"),
                name="bsf")
        elif self.refine_type == "nonlocal":
            dim_in = bsf.shape[1]
            nonlocal_name = "nonlocal_bsf"
            bsf = add_space_nonlocal(
                bsf,
                bsf.shape[1],
                bsf.shape[1],
                nonlocal_name,
                int(bsf.shape[1] / self.nonlocal_reduction),
                with_bias=self.with_bias,
                with_scale=self.with_scale)

        # step 3: scatter refined features to multi-levels by a residual path
        fpn_dict = {}
        fpn_name_list = []
        for i in range(self.num_levels):
            curr_fpn_name = body_name_list[i]
            pool_stride = 2**(self.refine_level - i)
            if i >= self.refine_level:
                residual = self._resize_input_tensor(
                    bsf, body_dict[curr_fpn_name], 1.0 / pool_stride)
            else:
                residual = fluid.layers.pool2d(
                    input=bsf,
                    pool_type='max',
                    pool_size=pool_stride,
                    pool_stride=pool_stride,
                    ceil_mode=True, )

            fpn_dict[curr_fpn_name] = residual + body_dict[curr_fpn_name]
            fpn_name_list.append(curr_fpn_name)

        res_dict = OrderedDict([(k, fpn_dict[k]) for k in fpn_name_list])
        return res_dict

    def _resize_input_tensor(self, body_input, ref_output, scale):
        shape = fluid.layers.shape(ref_output)
        shape_hw = fluid.layers.slice(shape, axes=[0], starts=[2], ends=[4])
        out_shape_ = shape_hw
        out_shape = fluid.layers.cast(out_shape_, dtype='int32')
        out_shape.stop_gradient = True
        body_output = fluid.layers.resize_nearest(
            body_input, scale=scale, out_shape=out_shape)
        return body_output