diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_mask_rcnn.py b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_mask_rcnn.py index b5fb9017b84a62ab1da7f05518d622903cbce53d..ede3ffdfa4944f636c1204d3010278dede28990c 100644 --- a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_mask_rcnn.py +++ b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_mask_rcnn.py @@ -53,6 +53,7 @@ class CascadeMaskRCNN(object): bbox_assigner='CascadeBBoxAssigner', mask_assigner='MaskAssigner', mask_head='MaskHead', + rpn_only=False, fpn='FPN'): super(CascadeMaskRCNN, self).__init__() assert fpn is not None, "cascade RCNN requires FPN" @@ -64,6 +65,7 @@ class CascadeMaskRCNN(object): self.bbox_head = bbox_head self.mask_assigner = mask_assigner self.mask_head = mask_head + self.rpn_only = rpn_only # Cascade local cfg self.cls_agnostic_bbox_reg = 2 (brw0, brw1, brw2) = self.bbox_assigner.bbox_reg_weights @@ -191,8 +193,9 @@ class CascadeMaskRCNN(object): roi_feat = self.roi_extractor(body_feats, rois, spatial_scale) bbox_pred = self.bbox_head.get_prediction( - im_info, roi_feat_list, rcnn_pred_list, proposal_list, - self.cascade_bbox_reg_weights, self.cls_agnostic_bbox_reg) + im_info, feed_vars['im_shape'], roi_feat_list, rcnn_pred_list, + proposal_list, self.cascade_bbox_reg_weights, + self.cls_agnostic_bbox_reg) bbox_pred = bbox_pred['bbox'] @@ -204,7 +207,11 @@ class CascadeMaskRCNN(object): cond = fluid.layers.less_than(x=bbox_size, y=size) mask_pred = fluid.layers.create_global_var( - shape=[1], value=0.0, dtype='float32', persistable=False) + shape=[1], + value=0.0, + dtype='float32', + persistable=False, + name='mask_pred') with fluid.layers.control_flow.Switch() as switch: with switch.case(cond): diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_rcnn.py b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_rcnn.py index 42b940b3d487a5a25847c7ee2fd02e8aec8b1ac9..133281e4f13fa16bebd5031fd4241c1bb94f5124 100644 --- a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_rcnn.py +++ b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_rcnn.py @@ -72,7 +72,19 @@ class CascadeRCNN(object): def build(self, feed_vars, mode='train'): im = feed_vars['image'] + assert mode in ['train', 'test'], \ + "only 'train' and 'test' mode is supported" + if mode == 'train': + required_fields = [ + 'gt_label', 'gt_box', 'gt_mask', 'is_crowd', 'im_info' + ] + else: + required_fields = ['im_shape', 'im_info'] + for var in required_fields: + assert var in feed_vars, \ + "{} has no {} field".format(feed_vars, var) im_info = feed_vars['im_info'] + if mode == 'train': gt_box = feed_vars['gt_box'] is_crowd = feed_vars['is_crowd'] @@ -92,7 +104,8 @@ class CascadeRCNN(object): rpn_loss = self.rpn_head.get_loss(im_info, gt_box, is_crowd) else: if self.rpn_only: - im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3]) + im_scale = fluid.layers.slice( + im_info, [1], starts=[2], ends=[3]) im_scale = fluid.layers.sequence_expand(im_scale, rois) rois = rois / im_scale return {'proposal': rois} @@ -143,8 +156,9 @@ class CascadeRCNN(object): return loss else: pred = self.bbox_head.get_prediction( - im_info, roi_feat_list, rcnn_pred_list, proposal_list, - self.cascade_bbox_reg_weights, self.cls_agnostic_bbox_reg) + im_info, feed_vars['im_shape'], roi_feat_list, rcnn_pred_list, + proposal_list, self.cascade_bbox_reg_weights, + self.cls_agnostic_bbox_reg) return pred def _decode_box(self, proposals, bbox_pred, curr_stage): diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/bbox_head.py b/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/bbox_head.py index 7bdb64b989ceea5da0668f7a49f45cdd24a9a830..ce2fb63d756b378cba026a41ce329cc310cbcffc 100644 --- a/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/bbox_head.py +++ b/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/bbox_head.py @@ -274,11 +274,14 @@ class BBoxHead(object): Get prediction bounding box in test stage. Args: + roi_feat (Variable): RoI feature from RoIExtractor. rois (Variable): Output of generate_proposals in rpn head. im_info (Variable): A 2-D LoDTensor with shape [B, 3]. B is the number of input images, each element consists of im_height, im_width, im_scale. - cls_score (Variable), bbox_pred(Variable): Output of get_output. + im_shape (Variable): Actual shape of original image with shape + [B, 3]. B is the number of images, each element consists of + original_height, original_width, 1 Returns: pred_result(Variable): Prediction result with shape [N, 6]. Each diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/cascade_head.py b/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/cascade_head.py index 58936b3d4a07ab018d7b7268f92970d4e436ed35..7c4a3b8c7387c35c0605651eda4aa0e51c06dbf5 100644 --- a/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/cascade_head.py +++ b/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/cascade_head.py @@ -139,6 +139,7 @@ class CascadeBBoxHead(object): def get_prediction(self, im_info, + im_shape, roi_feat_list, rcnn_pred_list, proposal_list, @@ -151,6 +152,9 @@ class CascadeBBoxHead(object): im_info (Variable): A 2-D LoDTensor with shape [B, 3]. B is the number of input images, each element consists of im_height, im_width, im_scale. + im_shape (Variable): Actual shape of original image with shape + [B, 3]. B is the number of images, each element consists of + original_height, original_width, 1 rois_feat_list (List): RoI feature from RoIExtractor. rcnn_pred_list (Variable): Cascade rcnn's head's output including bbox_pred and cls_score @@ -197,7 +201,8 @@ class CascadeBBoxHead(object): # only use fg box delta to decode box bbox_pred_new = fluid.layers.slice( bbox_pred_new, axes=[1], starts=[1], ends=[2]) - bbox_pred_new = fluid.layers.expand(bbox_pred_new, [1, self.num_classes, 1]) + bbox_pred_new = fluid.layers.expand(bbox_pred_new, + [1, self.num_classes, 1]) decoded_box = fluid.layers.box_coder( prior_box=proposals_boxes, prior_box_var=bbox_reg_w, @@ -206,8 +211,7 @@ class CascadeBBoxHead(object): box_normalized=False, axis=1) - # TODO: notice detectron use img.shape - box_out = fluid.layers.box_clip(input=decoded_box, im_info=im_info) + box_out = fluid.layers.box_clip(input=decoded_box, im_info=im_shape) pred_result = self.nms(bboxes=box_out, scores=boxes_cls_prob_mean) return {"bbox": pred_result}