diff --git a/configs/vitdet/cascade_rcnn_vit_base_hrfpn_cae_1x_coco.yml b/configs/vitdet/cascade_rcnn_vit_base_hrfpn_cae_1x_coco.yml index 23f766d75aad37f238cfebc233941d36b2d9a295..081e371e2099aa189a1a4cc784a5f999a69aa59c 100644 --- a/configs/vitdet/cascade_rcnn_vit_base_hrfpn_cae_1x_coco.yml +++ b/configs/vitdet/cascade_rcnn_vit_base_hrfpn_cae_1x_coco.yml @@ -98,6 +98,8 @@ CascadeHead: reg_class_agnostic: False stage_loss_weights: [1, 0.5, 0.25] loss_normalize_pos: True + add_gt_as_proposals: [True, True, True] + BBoxAssigner: batch_size_per_im: 512 diff --git a/ppdet/modeling/heads/cascade_head.py b/ppdet/modeling/heads/cascade_head.py index 9efc6492b6afe450f08e794dbe3f9c22d621cea7..bb0beadbd38f2c2c34a730cfb1705058f3f538bd 100644 --- a/ppdet/modeling/heads/cascade_head.py +++ b/ppdet/modeling/heads/cascade_head.py @@ -163,7 +163,8 @@ class CascadeHead(BBoxHead): bbox_loss=None, reg_class_agnostic=True, stage_loss_weights=None, - loss_normalize_pos=False): + loss_normalize_pos=False, + add_gt_as_proposals=[True, False, False]): nn.Layer.__init__(self, ) self.head = head @@ -179,6 +180,8 @@ class CascadeHead(BBoxHead): self.stage_loss_weights = [ 1. / num_cascade_stages for _ in range(num_cascade_stages) ] if stage_loss_weights is None else stage_loss_weights + self.add_gt_as_proposals = add_gt_as_proposals + assert len( self.stage_loss_weights ) == num_cascade_stages, f'stage_loss_weights({len(self.stage_loss_weights)}) do not equal to num_cascade_stages({num_cascade_stages})' @@ -221,7 +224,11 @@ class CascadeHead(BBoxHead): """ targets = [] if self.training: - rois, rois_num, targets = self.bbox_assigner(rois, rois_num, inputs) + rois, rois_num, targets = self.bbox_assigner( + rois, + rois_num, + inputs, + add_gt_as_proposals=self.add_gt_as_proposals[0]) targets_list = [targets] self.assigned_rois = (rois, rois_num) self.assigned_targets = targets @@ -234,7 +241,12 @@ class CascadeHead(BBoxHead): inputs['im_shape']) if self.training: rois, rois_num, targets = self.bbox_assigner( - rois, rois_num, inputs, i, is_cascade=True) + rois, + rois_num, + inputs, + i, + is_cascade=True, + add_gt_as_proposals=self.add_gt_as_proposals[i]) targets_list.append(targets) rois_feat = self.roi_extractor(body_feats, rois, rois_num) @@ -304,8 +316,10 @@ class CascadeHead(BBoxHead): # NOTE(dev): num_prob will be tagged as LoDTensorArray because it # depends on batch_size under @to_static. However the argument # num_or_sections in paddle.split does not support LoDTensorArray, - # so we use [-1] to replace it and whitout lossing correctness. - num_prop = [-1] if len(num_prop) == 1 else num_prop + # so we use [-1] to replace it if num_prop is not list. The modification + # This ensures the correctness of both dynamic and static graphs. + if not isinstance(num_prop, list): + num_prop = [-1] return pred_bbox.split(num_prop) def get_prediction(self, head_out_list): diff --git a/ppdet/modeling/proposal_generator/target.py b/ppdet/modeling/proposal_generator/target.py index fd04f052219a00c919b945d8838436de018af873..9fbc3d48634766410c7057354484617528c5d596 100644 --- a/ppdet/modeling/proposal_generator/target.py +++ b/ppdet/modeling/proposal_generator/target.py @@ -186,7 +186,8 @@ def generate_proposal_target(rpn_rois, use_random=True, is_cascade=False, cascade_iou=0.5, - assign_on_cpu=False): + assign_on_cpu=False, + add_gt_as_proposals=True): rois_with_gt = [] tgt_labels = [] @@ -204,7 +205,7 @@ def generate_proposal_target(rpn_rois, gt_class = paddle.squeeze(gt_classes[i], axis=-1) # Concat RoIs and gt boxes except cascade rcnn or none gt - if not is_cascade and gt_bbox.shape[0] > 0: + if add_gt_as_proposals and gt_bbox.shape[0] > 0: bbox = paddle.concat([rpn_roi, gt_bbox]) else: bbox = rpn_roi diff --git a/ppdet/modeling/proposal_generator/target_layer.py b/ppdet/modeling/proposal_generator/target_layer.py index edcf973590763a27dafb4b0c7d2fa52ff7d2fa0a..c010c819de1b059a396019685f431ac822be8868 100644 --- a/ppdet/modeling/proposal_generator/target_layer.py +++ b/ppdet/modeling/proposal_generator/target_layer.py @@ -156,7 +156,8 @@ class BBoxAssigner(object): rpn_rois_num, inputs, stage=0, - is_cascade=False): + is_cascade=False, + add_gt_as_proposals=True): gt_classes = inputs['gt_class'] gt_boxes = inputs['gt_bbox'] is_crowd = inputs.get('is_crowd', None) @@ -166,7 +167,7 @@ class BBoxAssigner(object): rpn_rois, gt_classes, gt_boxes, self.batch_size_per_im, self.fg_fraction, self.fg_thresh, self.bg_thresh, self.num_classes, self.ignore_thresh, is_crowd, self.use_random, is_cascade, - self.cascade_iou[stage], self.assign_on_cpu) + self.cascade_iou[stage], self.assign_on_cpu, add_gt_as_proposals) rois = outs[0] rois_num = outs[-1] # tgt_labels, tgt_bboxes, tgt_gt_inds