bbox_head.py 6.3 KB
Newer Older
F
FDInSky 已提交
1 2 3
import paddle.fluid as fluid
from paddle.fluid.dygraph import Layer
from paddle.fluid.param_attr import ParamAttr
4
from paddle.fluid.initializer import Normal, Xavier
F
FDInSky 已提交
5
from paddle.fluid.regularizer import L2Decay
6
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
F
FDInSky 已提交
7 8 9 10
from ppdet.core.workspace import register


@register
11 12
class TwoFCHead(Layer):

13
    __shared__ = ['num_stages']
F
FDInSky 已提交
14

15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
    def __init__(self, in_dim=256, mlp_dim=1024, resolution=7, num_stages=1):
        super(TwoFCHead, self).__init__()
        self.in_dim = in_dim
        self.mlp_dim = mlp_dim
        self.num_stages = num_stages
        fan = in_dim * resolution * resolution
        self.fc6_list = []
        self.fc7_list = []
        for stage in range(num_stages):
            fc6_name = 'fc6_{}'.format(stage)
            fc7_name = 'fc7_{}'.format(stage)
            fc6 = self.add_sublayer(
                fc6_name,
                Linear(
                    in_dim * resolution * resolution,
                    mlp_dim,
                    act='relu',
                    param_attr=ParamAttr(
                        #name='fc6_w',
                        initializer=Xavier(fan_out=fan)),
                    bias_attr=ParamAttr(
                        #name='fc6_b',
                        learning_rate=2.,
                        regularizer=L2Decay(0.))))
            fc7 = self.add_sublayer(
                fc7_name,
                Linear(
                    mlp_dim,
                    mlp_dim,
                    act='relu',
                    param_attr=ParamAttr(
                        #name='fc7_w',
                        initializer=Xavier()),
                    bias_attr=ParamAttr(
                        #name='fc7_b',
                        learning_rate=2.,
                        regularizer=L2Decay(0.))))
            self.fc6_list.append(fc6)
            self.fc7_list.append(fc7)

    def forward(self, rois_feat, stage=0):
        rois_feat = fluid.layers.flatten(rois_feat)
        fc6 = self.fc6_list[stage](rois_feat)
        fc7 = self.fc7_list[stage](fc6)
        return fc7


@register
class BBoxFeat(Layer):
    __inject__ = ['roi_extractor', 'head_feat']

    def __init__(self, roi_extractor, head_feat):
F
FDInSky 已提交
67 68
        super(BBoxFeat, self).__init__()
        self.roi_extractor = roi_extractor
69 70 71 72 73 74
        self.head_feat = head_feat

    def forward(self, body_feats, rois, spatial_scale, stage=0):
        rois_feat = self.roi_extractor(body_feats, rois, spatial_scale)
        bbox_feat = self.head_feat(rois_feat, stage)
        return bbox_feat
F
FDInSky 已提交
75 76 77 78


@register
class BBoxHead(Layer):
79
    __shared__ = ['num_classes', 'num_stages']
80
    __inject__ = ['bbox_feat']
F
FDInSky 已提交
81 82

    def __init__(self,
83
                 bbox_feat,
84
                 in_feat=1024,
F
FDInSky 已提交
85
                 num_classes=81,
86 87 88
                 cls_agnostic=False,
                 num_stages=1,
                 with_pool=False):
F
FDInSky 已提交
89
        super(BBoxHead, self).__init__()
90
        self.num_classes = num_classes
91 92
        self.delta_dim = 2 if cls_agnostic else num_classes
        self.bbox_feat = bbox_feat
93
        self.num_stages = num_stages
94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
        self.bbox_score_list = []
        self.bbox_delta_list = []
        self.with_pool = with_pool
        for stage in range(num_stages):
            score_name = 'bbox_score_{}'.format(stage)
            delta_name = 'bbox_delta_{}'.format(stage)
            bbox_score = self.add_sublayer(
                score_name,
                fluid.dygraph.Linear(
                    input_dim=in_feat,
                    output_dim=1 * self.num_classes,
                    act=None,
                    param_attr=ParamAttr(
                        #name='cls_score_w', 
                        initializer=Normal(
                            loc=0.0, scale=0.01)),
                    bias_attr=ParamAttr(
                        #name='cls_score_b',
                        learning_rate=2.,
                        regularizer=L2Decay(0.))))

            bbox_delta = self.add_sublayer(
                delta_name,
                fluid.dygraph.Linear(
                    input_dim=in_feat,
                    output_dim=4 * self.delta_dim,
                    act=None,
                    param_attr=ParamAttr(
                        #name='bbox_pred_w', 
                        initializer=Normal(
                            loc=0.0, scale=0.001)),
                    bias_attr=ParamAttr(
                        #name='bbox_pred_b',
                        learning_rate=2.,
                        regularizer=L2Decay(0.))))
            self.bbox_score_list.append(bbox_score)
            self.bbox_delta_list.append(bbox_delta)

    def forward(self, body_feats, rois, spatial_scale, stage=0):
        bbox_feat = self.bbox_feat(body_feats, rois, spatial_scale, stage)
        if self.with_pool:
            bbox_feat = fluid.layers.pool2d(
                bbox_feat, pool_type='avg', global_pooling=True)
        bbox_head_out = []
        scores = self.bbox_score_list[stage](bbox_feat)
        deltas = self.bbox_delta_list[stage](bbox_feat)
        bbox_head_out.append((scores, deltas))
        return bbox_feat, bbox_head_out

    def _get_head_loss(self, score, delta, target):
F
FDInSky 已提交
144 145
        # bbox cls  
        labels_int64 = fluid.layers.cast(
146
            x=target['labels_int32'], dtype='int64')
F
FDInSky 已提交
147 148
        labels_int64.stop_gradient = True
        loss_bbox_cls = fluid.layers.softmax_with_cross_entropy(
149 150
            logits=score, label=labels_int64)
        loss_bbox_cls = fluid.layers.reduce_mean(loss_bbox_cls)
F
FDInSky 已提交
151 152
        # bbox reg
        loss_bbox_reg = fluid.layers.smooth_l1(
153 154 155 156
            x=delta,
            y=target['bbox_targets'],
            inside_weight=target['bbox_inside_weights'],
            outside_weight=target['bbox_outside_weights'],
F
FDInSky 已提交
157
            sigma=1.0)
158
        loss_bbox_reg = fluid.layers.reduce_mean(loss_bbox_reg)
F
FDInSky 已提交
159
        return loss_bbox_cls, loss_bbox_reg
160 161 162 163 164 165 166 167 168 169 170 171

    def loss(self, bbox_head_out, targets):
        loss_bbox = {}
        for lvl, (bboxhead, target) in enumerate(zip(bbox_head_out, targets)):
            score, delta = bboxhead
            cls_name = 'loss_bbox_cls_{}'.format(lvl)
            reg_name = 'loss_bbox_reg_{}'.format(lvl)
            loss_bbox_cls, loss_bbox_reg = self._get_head_loss(score, delta,
                                                               target)
            loss_bbox[cls_name] = loss_bbox_cls
            loss_bbox[reg_name] = loss_bbox_reg
        return loss_bbox