mask_head.py 4.5 KB
Newer Older
F
FDInSky 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
import paddle.fluid as fluid
from paddle.fluid.dygraph import Layer

from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Normal, MSRA
from paddle.fluid.regularizer import L2Decay
from paddle.fluid.dygraph.nn import Conv2D, Pool2D
from ppdet.core.workspace import register
from ..ops import RoIExtractor
from ..backbone.resnet import Blocks


@register
class MaskFeat(Layer):
    __inject__ = ['mask_roi_extractor']

    def __init__(self,
                 feat_in=2048,
                 feat_out=256,
                 mask_roi_extractor=RoIExtractor().__dict__,
                 stage=0):
        super(MaskFeat, self).__init__()
        self.feat_in = feat_in
        self.feat_out = feat_out
        self.mask_roi_extractor = mask_roi_extractor
        if isinstance(mask_roi_extractor, dict):
            self.mask_roi_extractor = RoIExtractor(**mask_roi_extractor)
        if stage == 0:
            postfix = ''
        else:
            postfix = '_' + str(stage)
        self.upsample = fluid.dygraph.Conv2DTranspose(
            num_channels=self.feat_in,
            num_filters=self.feat_out,
            filter_size=2,
            stride=2,
            act='relu',
            param_attr=ParamAttr(
                name='conv5_mask_w' + postfix, initializer=MSRA(uniform=False)),
            bias_attr=ParamAttr(
                name='conv5_mask_b' + postfix,
                learning_rate=2.,
                regularizer=L2Decay(0.)))

    def forward(self, inputs):
        if inputs['mode'] == 'train':
            x = inputs['res5']
            rois_feat = fluid.layers.gather(x, inputs['rois_has_mask_int32'])
        elif inputs['mode'] == 'infer':
            rois = inputs['predicted_bbox'][:, 2:] * inputs['im_info'][:, 2]
            rois_num = inputs['predicted_bbox_nums']
            # TODO: optim here 
            if callable(inputs['shared_roi_extractor']):
                rois_feat = inputs['shared_roi_extractor'](inputs['res4'], rois,
                                                           rois_num)
            if callable(inputs['shared_res5_block']):
                rois_feat = inputs['shared_res5_block'](rois_feat)
        # upsample 
        y = self.upsample(rois_feat)
        outs = {'mask_feat': y}
        return outs


@register
class MaskHead(Layer):
    __shared__ = ['num_classes']
    __inject__ = ['mask_feat']

    def __init__(self,
                 feat_in=256,
                 resolution=14,
                 num_classes=81,
                 mask_feat=MaskFeat().__dict__,
                 stage=0):
        super(MaskHead, self).__init__()
        self.feat_in = feat_in
        self.resolution = resolution
        self.num_classes = num_classes
        self.mask_feat = mask_feat
        if isinstance(mask_feat, dict):
            self.mask_feat = MaskFeat(**mask_feat)
        if stage == 0:
            postfix = ''
        else:
            postfix = '_' + str(stage)
        self.mask_fcn_logits = fluid.dygraph.Conv2D(
            num_channels=self.feat_in,
            num_filters=self.num_classes,
            filter_size=1,
            param_attr=ParamAttr(
                name='mask_fcn_logits_w' + postfix,
                initializer=MSRA(uniform=False)),
            bias_attr=ParamAttr(
                name='mask_fcn_logits_b' + postfix,
                learning_rate=2.,
                regularizer=L2Decay(0.0)))

    def forward(self, inputs):
        # feat 
        outs = self.mask_feat(inputs)
        x = outs['mask_feat']
        # logits 
        mask_logits = self.mask_fcn_logits(x)
        if inputs['mode'] == 'infer':
            pred_bbox = inputs['predicted_bbox']
            shape = reduce((lambda x, y: x * y), pred_bbox.shape)
            shape = np.asarray(shape).reshape((1, 1))
            ones = np.ones((1, 1), dtype=np.int32)
            cond = (shape == ones).all()
            if cond:
                mask_logits = pred_bbox

        outs['mask_logits'] = mask_logits

        return outs

    def loss(self, inputs):
        # input needs (model_out, target)
        reshape_dim = self.num_classes * self.resolution * self.resolution
        mask_logits = fluid.layers.reshape(inputs['mask_logits'],
                                           (-1, reshape_dim))
        mask_label = fluid.layers.cast(x=inputs['mask_int32'], dtype='float32')

        loss_mask = fluid.layers.sigmoid_cross_entropy_with_logits(
            x=mask_logits, label=mask_label, ignore_index=-1, normalize=True)
        loss_mask = fluid.layers.reduce_sum(loss_mask, name='loss_mask')

        return loss_mask