bbox_head.py 4.6 KB
Newer Older
F
FDInSky 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
import paddle.fluid as fluid
from paddle.fluid.dygraph import Layer

from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Normal, MSRA
from paddle.fluid.regularizer import L2Decay
from paddle.fluid.dygraph.nn import Conv2D, Pool2D
from ppdet.core.workspace import register
from ..backbone.resnet import Blocks
from ..ops import RoIExtractor


@register
class BBoxFeat(Layer):
    __inject__ = ['roi_extractor']

    def __init__(self,
                 feat_in=1024,
                 feat_out=512,
                 roi_extractor=RoIExtractor().__dict__,
                 stage=0):
        super(BBoxFeat, self).__init__()
        self.roi_extractor = roi_extractor
        if isinstance(roi_extractor, dict):
            self.roi_extractor = RoIExtractor(**roi_extractor)
        if stage == 0:
            postfix = ''
        else:
            postfix = '_' + str(stage)
        self.res5 = Blocks(
            "res5", ch_in=feat_in, ch_out=feat_out, count=3, stride=2)
        self.res5_pool = fluid.dygraph.Pool2D(
            pool_type='avg', global_pooling=True)

    def forward(self, inputs):
        if inputs['mode'] == 'train':
            rois = inputs['rois']
            rois_num = inputs['rois_nums']
        elif inputs['mode'] == 'infer':
            rois = inputs['rpn_rois']
            rois_num = inputs['rpn_rois_nums']
        else:
            raise "BBoxFeat only support train or infer mode!"

        rois_feat = self.roi_extractor(inputs['res4'], rois, rois_num)
        # TODO: add others 
        y_res5 = self.res5(rois_feat)
        y = self.res5_pool(y_res5)
        y = fluid.layers.squeeze(y, axes=[2, 3])
        outs = {
            'rois_feat': rois_feat,
            'res5': y_res5,
            "bbox_feat": y,
            'shared_res5_block': self.res5,
            'shared_roi_extractor': self.roi_extractor
        }
        return outs


@register
class BBoxHead(Layer):
    __shared__ = ['num_classes']
    __inject__ = ['bbox_feat']

    def __init__(self,
                 in_feat=2048,
                 num_classes=81,
                 bbox_feat=BBoxFeat().__dict__,
                 stage=0):
        super(BBoxHead, self).__init__()
        self.num_classes = num_classes
        self.bbox_feat = bbox_feat
        if isinstance(bbox_feat, dict):
            self.bbox_feat = BBoxFeat(**bbox_feat)
        if stage == 0:
            postfix = ''
        else:
            postfix = '_' + str(stage)
        self.bbox_score = fluid.dygraph.Linear(
            input_dim=in_feat,
            output_dim=1 * self.num_classes,
            act=None,
            param_attr=ParamAttr(
                name='cls_score_w' + postfix,
                initializer=Normal(
                    loc=0.0, scale=0.001)),
            bias_attr=ParamAttr(
                name='cls_score_b' + postfix,
                learning_rate=2.,
                regularizer=L2Decay(0.)))

        self.bbox_delta = fluid.dygraph.Linear(
            input_dim=in_feat,
            output_dim=4 * self.num_classes,
            act=None,
            param_attr=ParamAttr(
                name='bbox_pred_w' + postfix,
                initializer=Normal(
                    loc=0.0, scale=0.01)),
            bias_attr=ParamAttr(
                name='bbox_pred_b' + postfix,
                learning_rate=2.,
                regularizer=L2Decay(0.)))

    def forward(self, inputs):
        outs = self.bbox_feat(inputs)
        x = outs['bbox_feat']
        bs = self.bbox_score(x)
        bd = self.bbox_delta(x)
        outs.update({'bbox_score': bs, 'bbox_delta': bd})
        if inputs['mode'] == 'infer':
            bbox_prob = fluid.layers.softmax(bs, use_cudnn=False)
            outs['bbox_prob'] = bbox_prob
        return outs

    def loss(self, inputs):
        # bbox cls  
        labels_int64 = fluid.layers.cast(
            x=inputs['labels_int32'], dtype='int64')
        labels_int64.stop_gradient = True
        bbox_score = fluid.layers.reshape(inputs['bbox_score'],
                                          (-1, self.num_classes))
        loss_bbox_cls = fluid.layers.softmax_with_cross_entropy(
            logits=bbox_score, label=labels_int64)
        loss_bbox_cls = fluid.layers.reduce_mean(
            loss_bbox_cls, name='loss_bbox_cls')
        # bbox reg
        loss_bbox_reg = fluid.layers.smooth_l1(
            x=inputs['bbox_delta'],
            y=inputs['bbox_targets'],
            inside_weight=inputs['bbox_inside_weights'],
            outside_weight=inputs['bbox_outside_weights'],
            sigma=1.0)
        loss_bbox_reg = fluid.layers.reduce_mean(
            loss_bbox_reg, name='loss_bbox_loc')

        return loss_bbox_cls, loss_bbox_reg