bifpn.py

# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import
from __future__ import division

from paddle import fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.regularizer import L2Decay
from paddle.fluid.initializer import Constant, Xavier

from ppdet.core.workspace import register

__all__ = ['BiFPN']


class FusionConv(object):
    def __init__(self, num_chan):
        super(FusionConv, self).__init__()
        self.num_chan = num_chan

    def __call__(self, inputs, name=''):
        x = fluid.layers.swish(inputs)
        # depthwise
        x = fluid.layers.conv2d(
            x,
            self.num_chan,
            filter_size=3,
            padding='SAME',
            groups=self.num_chan,
            param_attr=ParamAttr(
                initializer=Xavier(), name=name + '_dw_w'),
            bias_attr=False,
            use_cudnn=False)
        # pointwise
        x = fluid.layers.conv2d(
            x,
            self.num_chan,
            filter_size=1,
            param_attr=ParamAttr(
                initializer=Xavier(), name=name + '_pw_w'),
            bias_attr=ParamAttr(
                regularizer=L2Decay(0.), name=name + '_pw_b'))
        # bn + act
        x = fluid.layers.batch_norm(
            x,
            momentum=0.997,
            epsilon=1e-04,
            param_attr=ParamAttr(
                initializer=Constant(1.0),
                regularizer=L2Decay(0.),
                name=name + '_bn_w'),
            bias_attr=ParamAttr(
                regularizer=L2Decay(0.), name=name + '_bn_b'))
        return x


class BiFPNCell(object):
    def __init__(self, num_chan, levels=5, inputs_layer_num=3):
        """
        # Node id starts from the input features and monotonically increase whenever

        # [Node NO.] Here is an example for level P3 - P7:
        # {3: [0,    8],
        #  4: [1, 7, 9],
        #  5: [2, 6, 10],
        #  6: [3, 5, 11],
        #  7: [4,    12]}

        #  [Related Edge]
        #   {'feat_level': 6, 'inputs_offsets': [3, 4]},      # for P6'
        #   {'feat_level': 5, 'inputs_offsets': [2, 5]},      # for P5'
        #   {'feat_level': 4, 'inputs_offsets': [1, 6]},      # for P4'
        #   {'feat_level': 3, 'inputs_offsets': [0, 7]},      # for P3"
        #   {'feat_level': 4, 'inputs_offsets': [1, 7, 8]},   # for P4"
        #   {'feat_level': 5, 'inputs_offsets': [2, 6, 9]},   # for P5"
        #   {'feat_level': 6, 'inputs_offsets': [3, 5, 10]},  # for P6"
        #   {'feat_level': 7, 'inputs_offsets': [4, 11]},     # for P7"

        P7 (4) --------------> P7" (12)
          |----------|           ↑
                     ↓           |
        P6 (3) --> P6' (5) --> P6" (11)
          |----------|----------↑↑
                     ↓           |
        P5 (2) --> P5' (6) --> P5" (10)
          |----------|----------↑↑
                     ↓           |
        P4 (1) --> P4' (7) --> P4" (9)
          |----------|----------↑↑
                     |----------↓|
        P3 (0) --------------> P3" (8)
        """

        super(BiFPNCell, self).__init__()
        self.levels = levels
        self.num_chan = num_chan
        self.inputs_layer_num = inputs_layer_num
        # Learnable weights of [P4", P5", P6"]
        self.trigates = fluid.layers.create_parameter(
            shape=[levels - 2, 3],
            dtype='float32',
            default_initializer=fluid.initializer.Constant(1.))
        # Learnable weights of [P6', P5', P4', P3", P7"]
        self.bigates = fluid.layers.create_parameter(
            shape=[levels, 2],
            dtype='float32',
            default_initializer=fluid.initializer.Constant(1.))
        self.eps = 1e-4

    def __call__(self, inputs, cell_name='', is_first_time=False, p4_2_p5_2=[]):
        assert len(inputs) == self.levels
        assert ((is_first_time) and (len(p4_2_p5_2) != 0)) or ((not is_first_time) and (len(p4_2_p5_2) == 0))

        # upsample operator
        def upsample(feat):
            return fluid.layers.resize_nearest(feat, scale=2.)

        # downsample operator
        def downsample(feat):
            return fluid.layers.pool2d(feat, pool_type='max', pool_size=3, pool_stride=2, pool_padding='SAME')

        # 3x3 fuse conv after OP combine
        fuse_conv = FusionConv(self.num_chan)

        # Normalize weight
        trigates = fluid.layers.relu(self.trigates)
        bigates = fluid.layers.relu(self.bigates)
        trigates /= fluid.layers.reduce_sum(trigates, dim=1, keep_dim=True) + self.eps
        bigates /= fluid.layers.reduce_sum(bigates, dim=1, keep_dim=True) + self.eps

        feature_maps = list(inputs)  # make a copy, 依次是 [P3, P4, P5, P6, P7]
        # top down path
        for l in range(self.levels - 1):
            p = self.levels - l - 2
            w1 = fluid.layers.slice(bigates, axes=[0, 1], starts=[l, 0], ends=[l + 1, 1])
            w2 = fluid.layers.slice(bigates, axes=[0, 1], starts=[l, 1], ends=[l + 1, 2])
            above_layer = upsample(feature_maps[p + 1])
            feature_maps[p] = fuse_conv(w1 * above_layer + w2 * inputs[p], name='{}_tb_{}'.format(cell_name, l))
        # bottom up path
        for l in range(1, self.levels):
            p = l
            name = '{}_bt_{}'.format(cell_name, l)
            below = downsample(feature_maps[p - 1])
            if p == self.levels - 1:
                # handle P7
                w1 = fluid.layers.slice(bigates, axes=[0, 1], starts=[p, 0], ends=[p + 1, 1])
                w2 = fluid.layers.slice(bigates, axes=[0, 1], starts=[p, 1], ends=[p + 1, 2])
                feature_maps[p] = fuse_conv(w1 * below + w2 * inputs[p], name=name)
            else:
                if is_first_time:
                    if p < self.inputs_layer_num:
                        w1 = fluid.layers.slice(trigates, axes=[0, 1], starts=[p - 1, 0], ends=[p, 1])
                        w2 = fluid.layers.slice(trigates, axes=[0, 1], starts=[p - 1, 1], ends=[p, 2])
                        w3 = fluid.layers.slice(trigates, axes=[0, 1], starts=[p - 1, 2], ends=[p, 3])
                        feature_maps[p] = fuse_conv(w1 * feature_maps[p] + w2 * below + w3 * p4_2_p5_2[p - 1], name=name)
                    else:  # For P6"
                        w1 = fluid.layers.slice(trigates, axes=[0, 1], starts=[p - 1, 0], ends=[p, 1])
                        w2 = fluid.layers.slice(trigates, axes=[0, 1], starts=[p - 1, 1], ends=[p, 2])
                        w3 = fluid.layers.slice(trigates, axes=[0, 1], starts=[p - 1, 2], ends=[p, 3])
                        feature_maps[p] = fuse_conv(w1 * feature_maps[p] + w2 * below + w3 * inputs[p], name=name)
                else:
                    w1 = fluid.layers.slice(trigates, axes=[0, 1], starts=[p - 1, 0], ends=[p, 1])
                    w2 = fluid.layers.slice(trigates, axes=[0, 1], starts=[p - 1, 1], ends=[p, 2])
                    w3 = fluid.layers.slice(trigates, axes=[0, 1], starts=[p - 1, 2], ends=[p, 3])
                    feature_maps[p] = fuse_conv(w1 * feature_maps[p] + w2 * below + w3 * inputs[p], name=name)
        return feature_maps


@register
class BiFPN(object):
    """
    Bidirectional Feature Pyramid Network, see https://arxiv.org/abs/1911.09070

    Args:
        num_chan (int): number of feature channels
        repeat (int): number of repeats of the BiFPN module
        level (int): number of FPN levels, default: 5
    """

    def __init__(self, num_chan, repeat=3, levels=5):
        super(BiFPN, self).__init__()
        self.num_chan = num_chan
        self.repeat = repeat
        self.levels = levels

    def __call__(self, inputs):
        feats = []
        # Squeeze the channel with 1x1 conv
        for idx in range(len(inputs)):
            if inputs[idx].shape[1] != self.num_chan:
                feat = fluid.layers.conv2d(
                    inputs[idx],
                    self.num_chan,
                    filter_size=1,
                    padding='SAME',
                    param_attr=ParamAttr(initializer=Xavier()),
                    bias_attr=ParamAttr(regularizer=L2Decay(0.)),
                    name='resample_conv_{}'.format(idx))
                feat = fluid.layers.batch_norm(
                    feat,
                    momentum=0.997,
                    epsilon=1e-04,
                    param_attr=ParamAttr(initializer=Constant(1.0), regularizer=L2Decay(0.)),
                    bias_attr=ParamAttr(regularizer=L2Decay(0.)),
                    name='resample_bn_{}'.format(idx))
            else:
                feat = inputs[idx]
            feats.append(feat)
        # Build additional input features that are not from backbone.
        # P_7 layer we just use pool2d without conv layer & bn, for the same channel with P_6.
        # https://github.com/google/automl/blob/master/efficientdet/keras/efficientdet_keras.py#L820
        for idx in range(len(inputs), self.levels):
            if feats[-1].shape[1] != self.num_chan:
                feat = fluid.layers.conv2d(
                    feats[-1],
                    self.num_chan,
                    filter_size=1,
                    padding='SAME',
                    param_attr=ParamAttr(initializer=Xavier()),
                    bias_attr=ParamAttr(regularizer=L2Decay(0.)),
                    name='resample_conv_{}'.format(idx))
                feat = fluid.layers.batch_norm(
                    feat,
                    momentum=0.997,
                    epsilon=1e-04,
                    param_attr=ParamAttr(initializer=Constant(1.0), regularizer=L2Decay(0.)),
                    bias_attr=ParamAttr(regularizer=L2Decay(0.)),
                    name='resample_bn_{}'.format(idx))
            feat = fluid.layers.pool2d(
                feat,
                pool_type='max',
                pool_size=3,
                pool_stride=2,
                pool_padding='SAME',
                name='resample_downsample_{}'.format(idx))
            feats.append(feat)
        # Handle the p4_2 and p5_2 with another 1x1 conv & bn layer
        p4_2_p5_2 = []
        for idx in range(1, len(inputs)):
            feat = fluid.layers.conv2d(
                inputs[idx],
                self.num_chan,
                filter_size=1,
                padding='SAME',
                param_attr=ParamAttr(initializer=Xavier()),
                bias_attr=ParamAttr(regularizer=L2Decay(0.)),
                name='resample2_conv_{}'.format(idx))
            feat = fluid.layers.batch_norm(
                feat,
                momentum=0.997,
                epsilon=1e-04,
                param_attr=ParamAttr(initializer=Constant(1.0), regularizer=L2Decay(0.)),
                bias_attr=ParamAttr(regularizer=L2Decay(0.)),
                name='resample2_bn_{}'.format(idx))
            p4_2_p5_2.append(feat)

        # BiFPN, repeated
        biFPN = BiFPNCell(self.num_chan, self.levels, len(inputs))
        for r in range(self.repeat):
            if r == 0:
                feats = biFPN(feats, cell_name='bifpn_{}'.format(r), is_first_time=True, p4_2_p5_2=p4_2_p5_2)
            else:
                feats = biFPN(feats, cell_name='bifpn_{}'.format(r))

        return feats