solo_mask_head.py

# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import paddle
from paddle import fluid

from ppdet.core.workspace import register
from ppdet.modeling.ops import ConvNorm, DeformConvNorm

__all__ = ['SOLOv2MaskHead']


@register
class SOLOv2MaskHead(object):
    """
    SOLOv2MaskHead

    Args:
        out_channels (int): The channel number of output variable.
        start_level (int): The position where the input starts.
        end_level (int): The position where the input ends.
        num_classes (int): Number of classes in SOLOv2MaskHead output.
        use_dcn_in_tower: Whether to use dcn in tower or not.
    """
    __shared__ = ['num_classes']

    def __init__(self,
                 out_channels=128,
                 start_level=0,
                 end_level=3,
                 num_classes=81,
                 use_dcn_in_tower=False):
        super(SOLOv2MaskHead, self).__init__()
        assert start_level >= 0 and end_level >= start_level
        self.out_channels = out_channels
        self.start_level = start_level
        self.end_level = end_level
        self.num_classes = num_classes
        self.use_dcn_in_tower = use_dcn_in_tower
        self.conv_type = [ConvNorm, DeformConvNorm]

    def _convs_levels(self, conv_feat, level, name=None):
        conv_func = self.conv_type[0]
        if self.use_dcn_in_tower:
            conv_func = self.conv_type[1]

        if level == 0:
            return conv_func(
                input=conv_feat,
                num_filters=self.out_channels,
                filter_size=3,
                stride=1,
                norm_type='gn',
                norm_groups=32,
                freeze_norm=False,
                act='relu',
                initializer=fluid.initializer.NormalInitializer(scale=0.01),
                norm_name=name + '.conv' + str(level) + '.gn',
                name=name + '.conv' + str(level))

        for j in range(level):
            conv_feat = conv_func(
                input=conv_feat,
                num_filters=self.out_channels,
                filter_size=3,
                stride=1,
                norm_type='gn',
                norm_groups=32,
                freeze_norm=False,
                act='relu',
                initializer=fluid.initializer.NormalInitializer(scale=0.01),
                norm_name=name + '.conv' + str(j) + '.gn',
                name=name + '.conv' + str(j))
            conv_feat = fluid.layers.resize_bilinear(
                conv_feat,
                scale=2,
                name='upsample' + str(level) + str(j),
                align_corners=False,
                align_mode=0)
        return conv_feat

    def _conv_pred(self, conv_feat):
        conv_func = self.conv_type[0]
        if self.use_dcn_in_tower:
            conv_func = self.conv_type[1]
        conv_feat = conv_func(
            input=conv_feat,
            num_filters=self.num_classes,
            filter_size=1,
            stride=1,
            norm_type='gn',
            norm_groups=32,
            freeze_norm=False,
            act='relu',
            initializer=fluid.initializer.NormalInitializer(scale=0.01),
            norm_name='mask_feat_head.conv_pred.0.gn',
            name='mask_feat_head.conv_pred.0')

        return conv_feat

    def get_output(self, inputs, batch_size=1):
        """
        Get SOLOv2MaskHead output.

        Args:
            inputs(list[Variable]): feature map from each necks with shape of [N, C, H, W]
            batch_size (int): batch size
        Returns:
            ins_pred(Variable): Output of SOLOv2MaskHead head
        """
        range_level = self.end_level - self.start_level + 1
        feature_add_all_level = self._convs_levels(
            inputs[0], 0, name='mask_feat_head.convs_all_levels.0')
        for i in range(1, range_level):
            input_p = inputs[i]
            if i == 3:
                input_feat = input_p
                x_range = paddle.linspace(
                    -1, 1, fluid.layers.shape(input_feat)[-1], dtype='float32')
                y_range = paddle.linspace(
                    -1, 1, fluid.layers.shape(input_feat)[-2], dtype='float32')
                y, x = paddle.tensor.meshgrid([y_range, x_range])
                x = fluid.layers.unsqueeze(x, [0, 1])
                y = fluid.layers.unsqueeze(y, [0, 1])
                y = fluid.layers.expand(y, expand_times=[batch_size, 1, 1, 1])
                x = fluid.layers.expand(x, expand_times=[batch_size, 1, 1, 1])
                coord_feat = fluid.layers.concat([x, y], axis=1)
                input_p = fluid.layers.concat([input_p, coord_feat], axis=1)
            feature_add_all_level = fluid.layers.elementwise_add(
                feature_add_all_level,
                self._convs_levels(
                    input_p,
                    i,
                    name='mask_feat_head.convs_all_levels.{}'.format(i)))
        ins_pred = self._conv_pred(feature_add_all_level)

        return ins_pred