mask_head.py 4.4 KB
Newer Older
F
FDInSky 已提交
1 2 3 4 5 6 7 8
import paddle.fluid as fluid
from paddle.fluid.dygraph import Layer

from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Normal, MSRA
from paddle.fluid.regularizer import L2Decay
from paddle.fluid.dygraph.nn import Conv2D, Pool2D
from ppdet.core.workspace import register
9
# TODO: del it and use inject 
F
FDInSky 已提交
10 11 12 13 14
from ..backbone.resnet import Blocks


@register
class MaskFeat(Layer):
15
    def __init__(self, feat_in=2048, feat_out=256, mask_stages=1):
F
FDInSky 已提交
16 17 18
        super(MaskFeat, self).__init__()
        self.feat_in = feat_in
        self.feat_out = feat_out
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
        self.mask_stages = mask_stages

        for i in range(self.mask_stages):
            if i == 0:
                postfix = ''
            else:
                postfix = '_' + str(i)
            self.upsample = fluid.dygraph.Conv2DTranspose(
                num_channels=self.feat_in,
                num_filters=self.feat_out,
                filter_size=2,
                stride=2,
                act='relu',
                param_attr=ParamAttr(
                    name='conv5_mask_w' + postfix,
                    initializer=MSRA(uniform=False)),
                bias_attr=ParamAttr(
                    name='conv5_mask_b' + postfix,
                    learning_rate=2.,
                    regularizer=L2Decay(0.)))
F
FDInSky 已提交
39 40

    def forward(self, inputs):
41
        bbox_head_out = inputs['bbox_head_' + str(inputs['stage'])]
F
FDInSky 已提交
42
        if inputs['mode'] == 'train':
43
            x = bbox_head_out['res5']
F
FDInSky 已提交
44 45 46 47 48
            rois_feat = fluid.layers.gather(x, inputs['rois_has_mask_int32'])
        elif inputs['mode'] == 'infer':
            rois = inputs['predicted_bbox'][:, 2:] * inputs['im_info'][:, 2]
            rois_num = inputs['predicted_bbox_nums']
            # TODO: optim here 
49 50 51 52 53 54 55 56
            shared_roi_ext = bbox_head_out['shared_roi_extractor']
            if callable(shared_roi_ext):
                rois_feat = shared_roi_ext(inputs['res4'], rois, rois_num)

            shared_res5 = bbox_head_out['shared_res5_block']
            if callable(shared_res5):
                rois_feat = shared_res5(rois_feat)

F
FDInSky 已提交
57 58 59 60 61 62 63 64 65 66 67 68
        # upsample 
        y = self.upsample(rois_feat)
        outs = {'mask_feat': y}
        return outs


@register
class MaskHead(Layer):
    __shared__ = ['num_classes']
    __inject__ = ['mask_feat']

    def __init__(self,
69 70
                 mask_feat,
                 num_classes=81,
F
FDInSky 已提交
71 72
                 feat_in=256,
                 resolution=14,
73
                 mask_stages=1):
F
FDInSky 已提交
74
        super(MaskHead, self).__init__()
75
        self.mask_feat = mask_feat
F
FDInSky 已提交
76 77 78
        self.feat_in = feat_in
        self.resolution = resolution
        self.num_classes = num_classes
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
        self.mask_stages = mask_stages

        for i in range(self.mask_stages):
            if i == 0:
                postfix = ''
            else:
                postfix = '_' + str(i)
            self.mask_fcn_logits = fluid.dygraph.Conv2D(
                num_channels=self.feat_in,
                num_filters=self.num_classes,
                filter_size=1,
                param_attr=ParamAttr(
                    name='mask_fcn_logits_w' + postfix,
                    initializer=MSRA(uniform=False)),
                bias_attr=ParamAttr(
                    name='mask_fcn_logits_b' + postfix,
                    learning_rate=2.,
                    regularizer=L2Decay(0.0)))
F
FDInSky 已提交
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127

    def forward(self, inputs):
        # feat 
        outs = self.mask_feat(inputs)
        x = outs['mask_feat']
        # logits 
        mask_logits = self.mask_fcn_logits(x)
        if inputs['mode'] == 'infer':
            pred_bbox = inputs['predicted_bbox']
            shape = reduce((lambda x, y: x * y), pred_bbox.shape)
            shape = np.asarray(shape).reshape((1, 1))
            ones = np.ones((1, 1), dtype=np.int32)
            cond = (shape == ones).all()
            if cond:
                mask_logits = pred_bbox

        outs['mask_logits'] = mask_logits

        return outs

    def loss(self, inputs):
        reshape_dim = self.num_classes * self.resolution * self.resolution
        mask_logits = fluid.layers.reshape(inputs['mask_logits'],
                                           (-1, reshape_dim))
        mask_label = fluid.layers.cast(x=inputs['mask_int32'], dtype='float32')

        loss_mask = fluid.layers.sigmoid_cross_entropy_with_logits(
            x=mask_logits, label=mask_label, ignore_index=-1, normalize=True)
        loss_mask = fluid.layers.reduce_sum(loss_mask, name='loss_mask')

        return loss_mask