bbox.py 9.3 KB
Newer Older
F
FDInSky 已提交
1 2 3 4 5 6
import numpy as np
import paddle.fluid as fluid
from ppdet.core.workspace import register


@register
7
class BBoxPostProcess(object):
8 9 10
    __shared__ = ['num_classes', 'num_stages']
    __inject__ = ['decode_clip_nms']

F
FDInSky 已提交
11
    def __init__(self,
12 13 14
                 decode_clip_nms,
                 num_classes=81,
                 num_stages=1,
F
FDInSky 已提交
15 16
                 decode=None,
                 clip=None,
17
                 nms=None):
F
FDInSky 已提交
18
        super(BBoxPostProcess, self).__init__()
19 20
        self.num_classes = num_classes
        self.num_stages = num_stages
F
FDInSky 已提交
21 22 23 24 25 26 27 28 29 30 31
        self.decode = decode
        self.clip = clip
        self.nms = nms
        self.decode_clip_nms = decode_clip_nms

    def __call__(self, inputs):
        # TODO: split into 3 steps
        # TODO: modify related ops for deploying
        # decode
        # clip
        # nms
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
        if self.num_stages > 0:
            bbox_prob_list = []
            for i in range(self.num_stages):
                bbox_prob_list.append(inputs['bbox_head_' + str(i)][
                    'bbox_prob'])
            bbox_prob = fluid.layers.sum(bbox_prob_list) / float(
                len(bbox_prob_list))
            bbox_delta = inputs['bbox_head_' + str(i)]['bbox_delta']
            if inputs['bbox_head_0']['cls_agnostic_bbox_reg'] == 2:
                bbox_delta = fluid.layers.slice(
                    bbox_delta, axes=1, starts=[1], ends=[2])
                bbox_delta = fluid.layers.expand(bbox_delta,
                                                 [1, self.num_classes, 1])
        else:
            bbox_prob = inputs['bbox_prob']
            bbox_delta = inputs['bbox_delta']

        outs = self.decode_clip_nms(inputs['rpn_rois'], bbox_prob, bbox_delta,
                                    inputs['im_info'])
F
FDInSky 已提交
51 52 53 54
        outs = {"predicted_bbox_nums": outs[0], "predicted_bbox": outs[1]}
        return outs


55 56 57
@register
class BBoxPostProcessYOLO(object):
    __shared__ = ['num_classes']
58
    __inject__ = ['yolo_box', 'nms']
59

60
    def __init__(self, yolo_box, nms, num_classes=80, decode=None, clip=None):
61
        super(BBoxPostProcessYOLO, self).__init__()
62 63
        self.yolo_box = yolo_box
        self.nms = nms
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
        self.num_classes = num_classes
        self.decode = decode
        self.clip = clip

    def __call__(self, inputs):
        # TODO: split yolo_box into 2 steps
        # decode
        # clip
        boxes_list = []
        scores_list = []
        for i, out in enumerate(inputs['yolo_outs']):
            boxes, scores = self.yolo_box(out, inputs['im_size'],
                                          inputs['mask_anchors'][i], i,
                                          "yolo_box_" + str(i))

            boxes_list.append(boxes)
            scores_list.append(fluid.layers.transpose(scores, perm=[0, 2, 1]))
        yolo_boxes = fluid.layers.concat(boxes_list, axis=1)
        yolo_scores = fluid.layers.concat(scores_list, axis=2)
        nmsed_bbox = self.nms(bboxes=yolo_boxes, scores=yolo_scores)
        # TODO: parse the lod of nmsed_bbox
        # default batch size is 1
        bbox_nums = np.array([0, int(nmsed_bbox.shape[0])], dtype=np.int32)
        outs = {"predicted_bbox_nums": bbox_nums, "predicted_bbox": nmsed_bbox}
        return outs


F
FDInSky 已提交
91
@register
92
class AnchorRPN(object):
F
FDInSky 已提交
93 94
    __inject__ = ['anchor_generator', 'anchor_target_generator']

95
    def __init__(self, anchor_generator, anchor_target_generator):
96
        super(AnchorRPN, self).__init__()
F
FDInSky 已提交
97 98 99 100 101 102 103 104 105 106
        self.anchor_generator = anchor_generator
        self.anchor_target_generator = anchor_target_generator

    def __call__(self, inputs):
        outs = self.generate_anchors(inputs)
        return outs

    def generate_anchors(self, inputs):
        # TODO: update here to use int to specify featmap size
        outs = self.anchor_generator(inputs['rpn_feat'])
107
        outs = {'anchor': outs[0], 'anchor_var': outs[1], 'anchor_module': self}
F
FDInSky 已提交
108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
        return outs

    def generate_anchors_target(self, inputs):
        rpn_rois_score = fluid.layers.transpose(
            inputs['rpn_rois_score'], perm=[0, 2, 3, 1])
        rpn_rois_delta = fluid.layers.transpose(
            inputs['rpn_rois_delta'], perm=[0, 2, 3, 1])
        rpn_rois_score = fluid.layers.reshape(
            x=rpn_rois_score, shape=(0, -1, 1))
        rpn_rois_delta = fluid.layers.reshape(
            x=rpn_rois_delta, shape=(0, -1, 4))

        anchor = fluid.layers.reshape(inputs['anchor'], shape=(-1, 4))

        score_pred, roi_pred, score_tgt, roi_tgt, roi_weight = self.anchor_target_generator(
            bbox_pred=rpn_rois_delta,
            cls_logits=rpn_rois_score,
            anchor_box=anchor,
            gt_boxes=inputs['gt_bbox'],
            is_crowd=inputs['is_crowd'],
128 129
            im_info=inputs['im_info'],
            open_debug=inputs['open_debug'])
F
FDInSky 已提交
130 131 132 133 134 135 136 137 138
        outs = {
            'rpn_score_pred': score_pred,
            'rpn_score_target': score_tgt,
            'rpn_rois_pred': roi_pred,
            'rpn_rois_target': roi_tgt,
            'rpn_rois_weight': roi_weight
        }
        return outs

139 140 141 142 143 144 145

@register
class AnchorYOLO(object):
    __inject__ = [
        'anchor_generator', 'anchor_target_generator', 'anchor_post_process'
    ]

146 147
    def __init__(self, anchor_generator, anchor_target_generator,
                 anchor_post_process):
148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
        super(AnchorYOLO, self).__init__()
        self.anchor_generator = anchor_generator
        self.anchor_target_generator = anchor_target_generator
        self.anchor_post_process = anchor_post_process

    def __call__(self, inputs):
        outs = self.generate_anchors(inputs)
        return outs

    def generate_anchors(self, inputs):
        outs = self.anchor_generator(inputs['yolo_outs'])
        outs['anchor_module'] = self
        return outs

    def generate_anchors_target(self, inputs):
        outs = self.anchor_target_generator()
        return outs

    def post_process(self, inputs):
        return self.anchor_post_process(inputs)
F
FDInSky 已提交
168 169 170 171 172 173 174 175


@register
class Proposal(object):
    __inject__ = [
        'proposal_generator', 'proposal_target_generator', 'bbox_post_process'
    ]

176 177
    def __init__(self, proposal_generator, proposal_target_generator,
                 bbox_post_process):
F
FDInSky 已提交
178 179 180 181
        super(Proposal, self).__init__()
        self.proposal_generator = proposal_generator
        self.proposal_target_generator = proposal_target_generator
        self.bbox_post_process = bbox_post_process
182 183

    def __call__(self, inputs):
F
FDInSky 已提交
184
        outs = {}
185
        if inputs['stage'] == 0:
F
FDInSky 已提交
186 187 188
            proposal_out = self.generate_proposal(inputs)
            inputs.update(proposal_out)
        if inputs['mode'] == 'train':
189
            proposal_target_out = self.generate_proposal_target(inputs)
F
FDInSky 已提交
190 191 192 193 194 195 196 197 198 199
            outs.update(proposal_target_out)
        return outs

    def generate_proposal(self, inputs):
        rpn_rois_prob = fluid.layers.sigmoid(
            inputs['rpn_rois_score'], name='rpn_rois_prob')
        outs = self.proposal_generator(
            scores=rpn_rois_prob,
            bbox_deltas=inputs['rpn_rois_delta'],
            anchors=inputs['anchor'],
200
            variances=inputs['anchor_var'],
F
FDInSky 已提交
201 202 203 204 205 206 207 208 209
            im_info=inputs['im_info'],
            mode=inputs['mode'])
        outs = {
            'rpn_rois': outs[0],
            'rpn_rois_probs': outs[1],
            'rpn_rois_nums': outs[2]
        }
        return outs

210 211 212 213 214 215 216 217 218
    def generate_proposal_target(self, inputs):
        if inputs['stage'] == 0:
            rois = inputs['rpn_rois']
            rois_num = inputs['rpn_rois_nums']
        elif inputs['stage'] > 0:
            last_proposal_out = inputs['proposal_' + str(inputs['stage'] - 1)]
            rois = last_proposal_out['refined_bbox']
            rois_num = last_proposal_out['rois_nums']

F
FDInSky 已提交
219
        outs = self.proposal_target_generator(
220 221
            rpn_rois=rois,
            rpn_rois_nums=rois_num,
F
FDInSky 已提交
222 223 224 225
            gt_classes=inputs['gt_class'],
            is_crowd=inputs['is_crowd'],
            gt_boxes=inputs['gt_bbox'],
            im_info=inputs['im_info'],
226 227
            stage=inputs['stage'],
            open_debug=inputs['open_debug'])
F
FDInSky 已提交
228 229 230 231 232 233 234 235 236 237
        outs = {
            'rois': outs[0],
            'labels_int32': outs[1],
            'bbox_targets': outs[2],
            'bbox_inside_weights': outs[3],
            'bbox_outside_weights': outs[4],
            'rois_nums': outs[5]
        }
        return outs

238
    def refine_bbox(self, inputs):
F
FDInSky 已提交
239
        if inputs['mode'] == 'train':
240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260
            rois = inputs['proposal_' + str(inputs['stage'])]['rois']
        else:
            rois = inputs['rpn_rois']
        bbox_head_out = inputs['bbox_head_' + str(inputs['stage'])]

        bbox_delta_r = fluid.layers.reshape(
            bbox_head_out['bbox_delta'],
            (-1, inputs['bbox_head_0']['cls_agnostic_bbox_reg'], 4))
        bbox_delta_s = fluid.layers.slice(
            bbox_delta_r, axes=[1], starts=[1], ends=[2])

        refined_bbox = fluid.layers.box_coder(
            prior_box=rois,
            prior_box_var=self.proposal_target_generator.bbox_reg_weights[
                inputs['stage']],
            target_box=bbox_delta_s,
            code_type='decode_center_size',
            box_normalized=False,
            axis=1)
        refined_bbox = fluid.layers.reshape(refined_bbox, shape=[-1, 4])
        outs = {'refined_bbox': refined_bbox}
F
FDInSky 已提交
261 262 263
        return outs

    def post_process(self, inputs):
264
        outs = self.bbox_post_process(inputs)
F
FDInSky 已提交
265
        return outs