From d1e1f174829fca3f98aad7a22718ac84788a7565 Mon Sep 17 00:00:00 2001 From: wangguanzhong Date: Tue, 20 Oct 2020 19:01:00 +0800 Subject: [PATCH] fix generate_proposal_labels in cascade-rcnn series model, test=develop (#27892) * fix generate_proposal_labels in cascade-rcnn series model, test=develop * fix example code & unittest, test=develop * update code from review comments, test=develop --- paddle/fluid/operators/detection/bbox_util.h | 15 ++ .../detection/generate_proposal_labels_op.cc | 176 +++++++++++++----- python/paddle/fluid/layers/detection.py | 43 +++-- python/paddle/fluid/tests/test_detection.py | 94 +++++----- .../test_generate_proposal_labels_op.py | 122 +++++++----- 5 files changed, 303 insertions(+), 147 deletions(-) diff --git a/paddle/fluid/operators/detection/bbox_util.h b/paddle/fluid/operators/detection/bbox_util.h index afc39c1db9f..6c9fea1fd44 100644 --- a/paddle/fluid/operators/detection/bbox_util.h +++ b/paddle/fluid/operators/detection/bbox_util.h @@ -149,5 +149,20 @@ void ClipTiledBoxes(const platform::DeviceContext& ctx, } } +// Calculate max IoU between each box and ground-truth and +// each row represents one box +template +void MaxIoU(const framework::Tensor& iou, framework::Tensor* max_iou) { + const T* iou_data = iou.data(); + int row = iou.dims()[0]; + int col = iou.dims()[1]; + T* max_iou_data = max_iou->data(); + for (int i = 0; i < row; ++i) { + const T* v = iou_data + i * col; + T max_v = *std::max_element(v, v + col); + max_iou_data[i] = max_v; + } +} + } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc b/paddle/fluid/operators/detection/generate_proposal_labels_op.cc index 884aa1f6f4e..0b8fcbb7427 100644 --- a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc +++ b/paddle/fluid/operators/detection/generate_proposal_labels_op.cc @@ -33,6 +33,28 @@ void AppendRois(LoDTensor* out, int64_t offset, Tensor* to_add) { memcpy(out_data + offset, to_add_data, to_add->numel() * sizeof(T)); } +// Filter the ground-truth in RoIs and the RoIs with non-positive area. +// The ground-truth has max overlap with itself so the max_overlap is 1 +// and the corresponding RoI will be removed. +template +void FilterRoIs(const platform::DeviceContext& ctx, const Tensor& rpn_rois, + const Tensor& max_overlap, Tensor* keep) { + const T* rpn_rois_dt = rpn_rois.data(); + const T* max_overlap_dt = max_overlap.data(); + int rois_num = max_overlap.numel(); + keep->Resize({rois_num}); + int* keep_data = keep->mutable_data(ctx.GetPlace()); + int keep_len = 0; + for (int i = 0; i < rois_num; ++i) { + if ((rpn_rois_dt[i * 4 + 2] - rpn_rois_dt[i * 4 + 0] + 1) > 0 && + (rpn_rois_dt[i * 4 + 3] - rpn_rois_dt[i * 4 + 1] + 1) > 0 && + max_overlap_dt[i] < 1.) { + keep_data[keep_len++] = i; + } + } + keep->Resize({keep_len}); +} + class GenerateProposalLabelsOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -98,12 +120,21 @@ class GenerateProposalLabelsOp : public framework::OperatorWithKernel { im_info_dims.size(), im_info_dims)); int class_nums = ctx->Attrs().Get("class_nums"); + bool is_cascade_rcnn = ctx->Attrs().Get("is_cascade_rcnn"); + if (is_cascade_rcnn) { + PADDLE_ENFORCE_EQ( + ctx->HasInput("MaxOverlap"), true, + platform::errors::NotFound( + "Input(MaxOverlap) of GenerateProposalLabelsOp " + "should not be null when is_cascade_rcnn is True.")); + } ctx->SetOutputDim("Rois", {-1, 4}); ctx->SetOutputDim("LabelsInt32", {-1, 1}); ctx->SetOutputDim("BboxTargets", {-1, 4 * class_nums}); ctx->SetOutputDim("BboxInsideWeights", {-1, 4 * class_nums}); ctx->SetOutputDim("BboxOutsideWeights", {-1, 4 * class_nums}); + ctx->SetOutputDim("MaxOverlapWithGT", {-1}); } protected: @@ -142,7 +173,6 @@ std::vector> SampleFgBgGt( int64_t row = iou->dims()[0]; int64_t col = iou->dims()[1]; float epsilon = 0.00001; - const T* rpn_rois_dt = rpn_rois.data(); // Follow the Faster RCNN's implementation for (int64_t i = 0; i < row; ++i) { const T* v = proposal_to_gt_overlaps + i * col; @@ -151,11 +181,6 @@ std::vector> SampleFgBgGt( if ((i < gt_num) && (crowd_data[i])) { max_overlap = -1.0; } - if (is_cascade_rcnn && - ((rpn_rois_dt[i * 4 + 2] - rpn_rois_dt[i * 4 + 0] + 1) <= 0 || - (rpn_rois_dt[i * 4 + 3] - rpn_rois_dt[i * 4 + 1] + 1) <= 0)) { - continue; - } if (max_overlap >= fg_thresh) { // fg mapped gt label index for (int64_t j = 0; j < col; ++j) { @@ -232,12 +257,13 @@ std::vector> SampleFgBgGt( template void GatherBoxesLabels(const platform::CPUDeviceContext& context, - const Tensor& boxes, const Tensor& gt_boxes, - const Tensor& gt_classes, + const Tensor& boxes, const Tensor& max_overlap, + const Tensor& gt_boxes, const Tensor& gt_classes, const std::vector& fg_inds, const std::vector& bg_inds, const std::vector& gt_inds, Tensor* sampled_boxes, - Tensor* sampled_labels, Tensor* sampled_gts) { + Tensor* sampled_labels, Tensor* sampled_gts, + Tensor* sampled_max_overlap) { int fg_num = fg_inds.size(); int bg_num = bg_inds.size(); Tensor fg_inds_t, bg_inds_t, gt_box_inds_t, gt_label_inds_t; @@ -264,6 +290,13 @@ void GatherBoxesLabels(const platform::CPUDeviceContext& context, bg_labels.mutable_data({bg_num}, context.GetPlace()); math::set_constant(context, &bg_labels, 0); Concat(context, fg_labels, bg_labels, sampled_labels); + + Tensor fg_max_overlap, bg_max_overlap; + fg_max_overlap.mutable_data({fg_num}, context.GetPlace()); + CPUGather(context, max_overlap, fg_inds_t, &fg_max_overlap); + bg_max_overlap.mutable_data({bg_num}, context.GetPlace()); + CPUGather(context, max_overlap, bg_inds_t, &bg_max_overlap); + Concat(context, fg_max_overlap, bg_max_overlap, sampled_max_overlap); } template @@ -274,43 +307,58 @@ std::vector SampleRoisForOneImage( const float fg_thresh, const float bg_thresh_hi, const float bg_thresh_lo, const std::vector& bbox_reg_weights, const int class_nums, std::minstd_rand engine, bool use_random, bool is_cascade_rcnn, - bool is_cls_agnostic) { + bool is_cls_agnostic, const Tensor& max_overlap) { // 1.1 map to original image auto im_scale = im_info.data()[2]; - Tensor rpn_rois; rpn_rois.mutable_data(rpn_rois_in.dims(), context.GetPlace()); const T* rpn_rois_in_dt = rpn_rois_in.data(); T* rpn_rois_dt = rpn_rois.data(); - int gt_num = gt_boxes.dims()[0] * 4; + for (int i = 0; i < rpn_rois.numel(); ++i) { - if (i < gt_num && is_cascade_rcnn) { - rpn_rois_dt[i] = rpn_rois_in_dt[i]; + rpn_rois_dt[i] = rpn_rois_in_dt[i] / im_scale; + } + + int proposals_num = 1; + + if (is_cascade_rcnn) { + Tensor keep; + FilterRoIs(context, rpn_rois, max_overlap, &keep); + Tensor roi_filter; + // Tensor box_filter; + if (keep.numel() == 0) { + math::SetConstant set_zero; + roi_filter.mutable_data({proposals_num, kBoxDim}, context.GetPlace()); + set_zero(context, &roi_filter, static_cast(0)); } else { - rpn_rois_dt[i] = rpn_rois_in_dt[i] / im_scale; + proposals_num = keep.numel(); + roi_filter.mutable_data({proposals_num, kBoxDim}, context.GetPlace()); + CPUGather(context, rpn_rois, keep, &roi_filter); } + T* roi_filter_dt = roi_filter.data(); + memcpy(rpn_rois_dt, roi_filter_dt, roi_filter.numel() * sizeof(T)); + rpn_rois.Resize(roi_filter.dims()); + } else { + proposals_num = rpn_rois.dims()[0]; } - // 1.2 compute overlaps - int proposals_num = rpn_rois.dims()[0]; - if (!is_cascade_rcnn) { - proposals_num += gt_boxes.dims()[0]; - } + proposals_num += gt_boxes.dims()[0]; + Tensor proposal_to_gt_overlaps; proposal_to_gt_overlaps.mutable_data({proposals_num, gt_boxes.dims()[0]}, context.GetPlace()); Tensor boxes; boxes.mutable_data({proposals_num, kBoxDim}, context.GetPlace()); - if (!is_cascade_rcnn) { - Concat(context, gt_boxes, rpn_rois, &boxes); - } else { - T* boxes_dt = boxes.data(); - for (int i = 0; i < boxes.numel(); ++i) { - boxes_dt[i] = rpn_rois_dt[i]; - } - } + Concat(context, gt_boxes, rpn_rois, &boxes); BboxOverlaps(boxes, gt_boxes, &proposal_to_gt_overlaps); + + Tensor proposal_with_max_overlap; + proposal_with_max_overlap.mutable_data({proposals_num}, + context.GetPlace()); + + MaxIoU(proposal_to_gt_overlaps, &proposal_with_max_overlap); + // Generate proposal index std::vector> fg_bg_gt = SampleFgBgGt(context, &proposal_to_gt_overlaps, is_crowd, @@ -321,7 +369,7 @@ std::vector SampleRoisForOneImage( std::vector mapped_gt_inds = fg_bg_gt[2]; // mapped_gt_labels // Gather boxes and labels - Tensor sampled_boxes, sampled_labels, sampled_gts; + Tensor sampled_boxes, sampled_labels, sampled_gts, sampled_max_overlap; int fg_num = fg_inds.size(); int bg_num = bg_inds.size(); int boxes_num = fg_num + bg_num; @@ -329,9 +377,11 @@ std::vector SampleRoisForOneImage( sampled_boxes.mutable_data(bbox_dim, context.GetPlace()); sampled_labels.mutable_data({boxes_num}, context.GetPlace()); sampled_gts.mutable_data({fg_num, kBoxDim}, context.GetPlace()); - GatherBoxesLabels(context, boxes, gt_boxes, gt_classes, fg_inds, bg_inds, - mapped_gt_inds, &sampled_boxes, &sampled_labels, - &sampled_gts); + sampled_max_overlap.mutable_data({boxes_num}, context.GetPlace()); + GatherBoxesLabels(context, boxes, proposal_with_max_overlap, gt_boxes, + gt_classes, fg_inds, bg_inds, mapped_gt_inds, + &sampled_boxes, &sampled_labels, &sampled_gts, + &sampled_max_overlap); // Compute targets Tensor bbox_targets_single; @@ -390,6 +440,7 @@ std::vector SampleRoisForOneImage( res.emplace_back(bbox_targets); res.emplace_back(bbox_inside_weights); res.emplace_back(bbox_outside_weights); + res.emplace_back(sampled_max_overlap); return res; } @@ -409,6 +460,7 @@ class GenerateProposalLabelsKernel : public framework::OpKernel { auto* bbox_inside_weights = context.Output("BboxInsideWeights"); auto* bbox_outside_weights = context.Output("BboxOutsideWeights"); + auto* max_overlap_with_gt = context.Output("MaxOverlapWithGT"); int batch_size_per_im = context.Attr("batch_size_per_im"); float fg_fraction = context.Attr("fg_fraction"); @@ -446,16 +498,21 @@ class GenerateProposalLabelsKernel : public framework::OpKernel { "received level of LoD is [%d], LoD is [%s].", gt_boxes->lod().size(), gt_boxes->lod())); int64_t n = static_cast(rpn_rois->lod().back().size() - 1); - - rois->mutable_data({n * batch_size_per_im, kBoxDim}, context.GetPlace()); - labels_int32->mutable_data({n * batch_size_per_im, 1}, - context.GetPlace()); - bbox_targets->mutable_data({n * batch_size_per_im, kBoxDim * class_nums}, + int64_t rois_num = rpn_rois->dims()[0]; + int64_t gts_num = gt_boxes->dims()[0]; + int64_t init_num = + is_cascade_rcnn ? rois_num + gts_num : n * batch_size_per_im; + + rois->mutable_data({init_num, kBoxDim}, context.GetPlace()); + labels_int32->mutable_data({init_num, 1}, context.GetPlace()); + bbox_targets->mutable_data({init_num, kBoxDim * class_nums}, context.GetPlace()); - bbox_inside_weights->mutable_data( - {n * batch_size_per_im, kBoxDim * class_nums}, context.GetPlace()); - bbox_outside_weights->mutable_data( - {n * batch_size_per_im, kBoxDim * class_nums}, context.GetPlace()); + bbox_inside_weights->mutable_data({init_num, kBoxDim * class_nums}, + context.GetPlace()); + bbox_outside_weights->mutable_data({init_num, kBoxDim * class_nums}, + context.GetPlace()); + max_overlap_with_gt->Resize({init_num}); + max_overlap_with_gt->mutable_data(context.GetPlace()); std::random_device rnd; std::minstd_rand engine; @@ -486,25 +543,36 @@ class GenerateProposalLabelsKernel : public framework::OpKernel { Tensor gt_boxes_slice = gt_boxes->Slice(gt_boxes_lod[i], gt_boxes_lod[i + 1]); Tensor im_info_slice = im_info->Slice(i, i + 1); + Tensor max_overlap_slice; + if (is_cascade_rcnn) { + auto* max_overlap = context.Input("MaxOverlap"); + max_overlap_slice = + max_overlap->Slice(rpn_rois_lod[i], rpn_rois_lod[i + 1]); + } else { + max_overlap_slice.mutable_data({rpn_rois_slice.dims()[0]}, + context.GetPlace()); + } std::vector tensor_output = SampleRoisForOneImage( dev_ctx, rpn_rois_slice, gt_classes_slice, is_crowd_slice, gt_boxes_slice, im_info_slice, batch_size_per_im, fg_fraction, fg_thresh, bg_thresh_hi, bg_thresh_lo, bbox_reg_weights, class_nums, - engine, use_random, is_cascade_rcnn, is_cls_agnostic); + engine, use_random, is_cascade_rcnn, is_cls_agnostic, + max_overlap_slice); Tensor sampled_rois = tensor_output[0]; Tensor sampled_labels_int32 = tensor_output[1]; Tensor sampled_bbox_targets = tensor_output[2]; Tensor sampled_bbox_inside_weights = tensor_output[3]; Tensor sampled_bbox_outside_weights = tensor_output[4]; + Tensor sampled_max_overlap = tensor_output[5]; AppendRois(rois, kBoxDim * num_rois, &sampled_rois); AppendRois(labels_int32, num_rois, &sampled_labels_int32); - AppendRois(bbox_targets, kBoxDim * num_rois * class_nums, - &sampled_bbox_targets); - AppendRois(bbox_inside_weights, kBoxDim * num_rois * class_nums, - &sampled_bbox_inside_weights); - AppendRois(bbox_outside_weights, kBoxDim * num_rois * class_nums, + int64_t offset = kBoxDim * num_rois * class_nums; + AppendRois(bbox_targets, offset, &sampled_bbox_targets); + AppendRois(bbox_inside_weights, offset, &sampled_bbox_inside_weights); + AppendRois(bbox_outside_weights, offset, &sampled_bbox_outside_weights); + AppendRois(max_overlap_with_gt, num_rois, &sampled_max_overlap); num_rois += sampled_rois.dims()[0]; lod0.emplace_back(num_rois); @@ -521,6 +589,8 @@ class GenerateProposalLabelsKernel : public framework::OpKernel { bbox_targets->Resize({num_rois, kBoxDim * class_nums}); bbox_inside_weights->Resize({num_rois, kBoxDim * class_nums}); bbox_outside_weights->Resize({num_rois, kBoxDim * class_nums}); + max_overlap_with_gt->Resize({num_rois}); + max_overlap_with_gt->set_lod(lod); } }; @@ -550,6 +620,12 @@ class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker { "(Tensor), This input is a 2D Tensor with shape [B, 3]. " "B is the number of input images, " "each element consists of im_height, im_width, im_scale."); + AddInput("MaxOverlap", + "(LoDTensor), This input is a 1D LoDTensor with shape [N]." + "N is the number of Input(RpnRois), " + "each element is the maximum overlap between " + "the proposal RoI and ground-truth.") + .AsDispensable(); AddOutput( "Rois", @@ -573,6 +649,12 @@ class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker { "(LoDTensor), This output is a 2D LoDTensor with shape [P, 4 * " "class_nums], " "each element indicates whether a box should contribute to loss."); + AddOutput("MaxOverlapWithGT", + "(LoDTensor), This output is a 1D LoDTensor with shape [P], " + "each element indicates the maxoverlap " + "between output RoIs and ground-truth. " + "The output RoIs may include ground-truth " + "and the output maxoverlap may contain 1."); AddAttr("batch_size_per_im", "Batch size of rois per images."); AddAttr("fg_fraction", diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index 951817db015..f7e79f79f8b 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -2601,7 +2601,9 @@ def generate_proposal_labels(rpn_rois, class_nums=None, use_random=True, is_cls_agnostic=False, - is_cascade_rcnn=False): + is_cascade_rcnn=False, + max_overlap=None, + return_max_overlap=False): """ **Generate Proposal Labels of Faster-RCNN** @@ -2638,25 +2640,29 @@ def generate_proposal_labels(rpn_rois, use_random(bool): Use random sampling to choose foreground and background boxes. is_cls_agnostic(bool): bbox regression use class agnostic simply which only represent fg and bg boxes. is_cascade_rcnn(bool): it will filter some bbox crossing the image's boundary when setting True. + max_overlap(Variable): Maximum overlap between each proposal box and ground-truth. + return_max_overlap(bool): Whether return the maximum overlap between each sampled RoI and ground-truth. Returns: tuple: - A tuple with format``(rois, labels_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights)``. + A tuple with format``(rois, labels_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights, max_overlap)``. - **rois**: 2-D LoDTensor with shape ``[batch_size_per_im * batch_size, 4]``. The data type is the same as ``rpn_rois``. - **labels_int32**: 2-D LoDTensor with shape ``[batch_size_per_im * batch_size, 1]``. The data type must be int32. - **bbox_targets**: 2-D LoDTensor with shape ``[batch_size_per_im * batch_size, 4 * class_num]``. The regression targets of all RoIs. The data type is the same as ``rpn_rois``. - **bbox_inside_weights**: 2-D LoDTensor with shape ``[batch_size_per_im * batch_size, 4 * class_num]``. The weights of foreground boxes' regression loss. The data type is the same as ``rpn_rois``. - **bbox_outside_weights**: 2-D LoDTensor with shape ``[batch_size_per_im * batch_size, 4 * class_num]``. The weights of regression loss. The data type is the same as ``rpn_rois``. - + - **max_overlap**: 1-D LoDTensor with shape ``[P]``. P is the number of output ``rois``. The maximum overlap between each sampled RoI and ground-truth. Examples: .. code-block:: python + import paddle import paddle.fluid as fluid + paddle.enable_static() rpn_rois = fluid.data(name='rpn_rois', shape=[None, 4], dtype='float32') - gt_classes = fluid.data(name='gt_classes', shape=[None, 1], dtype='float32') - is_crowd = fluid.data(name='is_crowd', shape=[None, 1], dtype='float32') + gt_classes = fluid.data(name='gt_classes', shape=[None, 1], dtype='int32') + is_crowd = fluid.data(name='is_crowd', shape=[None, 1], dtype='int32') gt_boxes = fluid.data(name='gt_boxes', shape=[None, 4], dtype='float32') im_info = fluid.data(name='im_info', shape=[None, 3], dtype='float32') rois, labels, bbox, inside_weights, outside_weights = fluid.layers.generate_proposal_labels( @@ -2673,6 +2679,8 @@ def generate_proposal_labels(rpn_rois, 'generate_proposal_labels') check_variable_and_dtype(is_crowd, 'is_crowd', ['int32'], 'generate_proposal_labels') + if is_cascade_rcnn: + assert max_overlap is not None, "Input max_overlap of generate_proposal_labels should not be None if is_cascade_rcnn is True" rois = helper.create_variable_for_type_inference(dtype=rpn_rois.dtype) labels_int32 = helper.create_variable_for_type_inference( @@ -2683,22 +2691,28 @@ def generate_proposal_labels(rpn_rois, dtype=rpn_rois.dtype) bbox_outside_weights = helper.create_variable_for_type_inference( dtype=rpn_rois.dtype) + max_overlap_with_gt = helper.create_variable_for_type_inference( + dtype=rpn_rois.dtype) + inputs = { + 'RpnRois': rpn_rois, + 'GtClasses': gt_classes, + 'IsCrowd': is_crowd, + 'GtBoxes': gt_boxes, + 'ImInfo': im_info, + } + if max_overlap is not None: + inputs['MaxOverlap'] = max_overlap helper.append_op( type="generate_proposal_labels", - inputs={ - 'RpnRois': rpn_rois, - 'GtClasses': gt_classes, - 'IsCrowd': is_crowd, - 'GtBoxes': gt_boxes, - 'ImInfo': im_info - }, + inputs=inputs, outputs={ 'Rois': rois, 'LabelsInt32': labels_int32, 'BboxTargets': bbox_targets, 'BboxInsideWeights': bbox_inside_weights, - 'BboxOutsideWeights': bbox_outside_weights + 'BboxOutsideWeights': bbox_outside_weights, + 'MaxOverlapWithGT': max_overlap_with_gt }, attrs={ 'batch_size_per_im': batch_size_per_im, @@ -2718,7 +2732,10 @@ def generate_proposal_labels(rpn_rois, bbox_targets.stop_gradient = True bbox_inside_weights.stop_gradient = True bbox_outside_weights.stop_gradient = True + max_overlap_with_gt.stop_gradient = True + if return_max_overlap: + return rois, labels_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights, max_overlap_with_gt return rois, labels_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights diff --git a/python/paddle/fluid/tests/test_detection.py b/python/paddle/fluid/tests/test_detection.py index 05b9067ec40..9348b0b50a1 100644 --- a/python/paddle/fluid/tests/test_detection.py +++ b/python/paddle/fluid/tests/test_detection.py @@ -289,40 +289,39 @@ class TestAnchorGenerator(unittest.TestCase): class TestGenerateProposalLabels(unittest.TestCase): + def check_out(self, outs): + rois = outs[0] + labels_int32 = outs[1] + bbox_targets = outs[2] + bbox_inside_weights = outs[3] + bbox_outside_weights = outs[4] + assert rois.shape[1] == 4 + assert rois.shape[0] == labels_int32.shape[0] + assert rois.shape[0] == bbox_targets.shape[0] + assert rois.shape[0] == bbox_inside_weights.shape[0] + assert rois.shape[0] == bbox_outside_weights.shape[0] + assert bbox_targets.shape[1] == 4 * self.class_nums + assert bbox_inside_weights.shape[1] == 4 * self.class_nums + assert bbox_outside_weights.shape[1] == 4 * self.class_nums + if len(outs) == 6: + max_overlap_with_gt = outs[5] + assert max_overlap_with_gt.shape[0] == rois.shape[0] + def test_generate_proposal_labels(self): program = Program() with program_guard(program): - rpn_rois = layers.data( - name='rpn_rois', - shape=[4, 4], - dtype='float32', - lod_level=1, - append_batch_size=False) - gt_classes = layers.data( - name='gt_classes', - shape=[6], - dtype='int32', - lod_level=1, - append_batch_size=False) - is_crowd = layers.data( - name='is_crowd', - shape=[6], - dtype='int32', - lod_level=1, - append_batch_size=False) - gt_boxes = layers.data( - name='gt_boxes', - shape=[6, 4], - dtype='float32', - lod_level=1, - append_batch_size=False) - im_info = layers.data( - name='im_info', - shape=[1, 3], - dtype='float32', - lod_level=1, - append_batch_size=False) - class_nums = 5 + rpn_rois = fluid.data( + name='rpn_rois', shape=[4, 4], dtype='float32', lod_level=1) + gt_classes = fluid.data( + name='gt_classes', shape=[6], dtype='int32', lod_level=1) + is_crowd = fluid.data( + name='is_crowd', shape=[6], dtype='int32', lod_level=1) + gt_boxes = fluid.data( + name='gt_boxes', shape=[6, 4], dtype='float32', lod_level=1) + im_info = fluid.data(name='im_info', shape=[1, 3], dtype='float32') + max_overlap = fluid.data( + name='max_overlap', shape=[4], dtype='float32', lod_level=1) + self.class_nums = 5 outs = fluid.layers.generate_proposal_labels( rpn_rois=rpn_rois, gt_classes=gt_classes, @@ -335,20 +334,27 @@ class TestGenerateProposalLabels(unittest.TestCase): bg_thresh_hi=0.5, bg_thresh_lo=0.0, bbox_reg_weights=[0.1, 0.1, 0.2, 0.2], - class_nums=class_nums) + class_nums=self.class_nums) + outs_1 = fluid.layers.generate_proposal_labels( + rpn_rois=rpn_rois, + gt_classes=gt_classes, + is_crowd=is_crowd, + gt_boxes=gt_boxes, + im_info=im_info, + batch_size_per_im=2, + fg_fraction=0.5, + fg_thresh=0.5, + bg_thresh_hi=0.5, + bg_thresh_lo=0.0, + bbox_reg_weights=[0.1, 0.1, 0.2, 0.2], + class_nums=self.class_nums, + is_cascade_rcnn=True, + max_overlap=max_overlap, + return_max_overlap=True) + + self.check_out(outs) + self.check_out(outs_1) rois = outs[0] - labels_int32 = outs[1] - bbox_targets = outs[2] - bbox_inside_weights = outs[3] - bbox_outside_weights = outs[4] - assert rois.shape[1] == 4 - assert rois.shape[0] == labels_int32.shape[0] - assert rois.shape[0] == bbox_targets.shape[0] - assert rois.shape[0] == bbox_inside_weights.shape[0] - assert rois.shape[0] == bbox_outside_weights.shape[0] - assert bbox_targets.shape[1] == 4 * class_nums - assert bbox_inside_weights.shape[1] == 4 * class_nums - assert bbox_outside_weights.shape[1] == 4 * class_nums class TestGenerateMaskLabels(unittest.TestCase): diff --git a/python/paddle/fluid/tests/unittests/test_generate_proposal_labels_op.py b/python/paddle/fluid/tests/unittests/test_generate_proposal_labels_op.py index 5054256ca72..2e9a5229e2e 100644 --- a/python/paddle/fluid/tests/unittests/test_generate_proposal_labels_op.py +++ b/python/paddle/fluid/tests/unittests/test_generate_proposal_labels_op.py @@ -22,66 +22,91 @@ import paddle.fluid as fluid from op_test import OpTest -def generate_proposal_labels_in_python( - rpn_rois, gt_classes, is_crowd, gt_boxes, im_info, batch_size_per_im, - fg_fraction, fg_thresh, bg_thresh_hi, bg_thresh_lo, bbox_reg_weights, - class_nums, use_random, is_cls_agnostic, is_cascade_rcnn): +def generate_proposal_labels_in_python(rpn_rois, + gt_classes, + is_crowd, + gt_boxes, + im_info, + batch_size_per_im, + fg_fraction, + fg_thresh, + bg_thresh_hi, + bg_thresh_lo, + bbox_reg_weights, + class_nums, + use_random, + is_cls_agnostic, + is_cascade_rcnn, + max_overlaps=None): rois = [] labels_int32 = [] bbox_targets = [] bbox_inside_weights = [] bbox_outside_weights = [] + max_overlap_with_gt = [] lod = [] assert len(rpn_rois) == len( im_info), 'batch size of rpn_rois and ground_truth is not matched' for im_i in range(len(im_info)): + max_overlap = max_overlaps[im_i] if is_cascade_rcnn else None frcn_blobs = _sample_rois( rpn_rois[im_i], gt_classes[im_i], is_crowd[im_i], gt_boxes[im_i], im_info[im_i], batch_size_per_im, fg_fraction, fg_thresh, bg_thresh_hi, bg_thresh_lo, bbox_reg_weights, class_nums, - use_random, is_cls_agnostic, is_cascade_rcnn) + use_random, is_cls_agnostic, is_cascade_rcnn, max_overlap) lod.append(frcn_blobs['rois'].shape[0]) rois.append(frcn_blobs['rois']) labels_int32.append(frcn_blobs['labels_int32']) bbox_targets.append(frcn_blobs['bbox_targets']) bbox_inside_weights.append(frcn_blobs['bbox_inside_weights']) bbox_outside_weights.append(frcn_blobs['bbox_outside_weights']) + max_overlap_with_gt.append(frcn_blobs['max_overlap']) - return rois, labels_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights, lod + return rois, labels_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights, max_overlap_with_gt, lod + + +def filter_roi(rois, max_overlap): + ws = rois[:, 2] - rois[:, 0] + 1 + hs = rois[:, 3] - rois[:, 1] + 1 + keep = np.where((ws > 0) & (hs > 0) & (max_overlap < 1.0))[0] + if len(keep) > 0: + return rois[keep, :] + return np.zeros((1, 4)).astype('float32') def _sample_rois(rpn_rois, gt_classes, is_crowd, gt_boxes, im_info, batch_size_per_im, fg_fraction, fg_thresh, bg_thresh_hi, bg_thresh_lo, bbox_reg_weights, class_nums, use_random, - is_cls_agnostic, is_cascade_rcnn): + is_cls_agnostic, is_cascade_rcnn, max_overlap): rois_per_image = int(batch_size_per_im) fg_rois_per_im = int(np.round(fg_fraction * rois_per_image)) # Roidb im_scale = im_info[2] inv_im_scale = 1. / im_scale - if is_cascade_rcnn: - rpn_rois = rpn_rois[len(gt_boxes):, :] rpn_rois = rpn_rois * inv_im_scale + + if is_cascade_rcnn: + rpn_rois = filter_roi(rpn_rois, max_overlap) + boxes = np.vstack([gt_boxes, rpn_rois]) gt_overlaps = np.zeros((boxes.shape[0], class_nums)) box_to_gt_ind_map = np.zeros((boxes.shape[0]), dtype=np.int32) - if len(gt_boxes) > 0: - proposal_to_gt_overlaps = _bbox_overlaps(boxes, gt_boxes) - - overlaps_argmax = proposal_to_gt_overlaps.argmax(axis=1) - overlaps_max = proposal_to_gt_overlaps.max(axis=1) - # Boxes which with non-zero overlap with gt boxes - overlapped_boxes_ind = np.where(overlaps_max > 0)[0] - overlapped_boxes_gt_classes = gt_classes[overlaps_argmax[ - overlapped_boxes_ind]] - gt_overlaps[overlapped_boxes_ind, - overlapped_boxes_gt_classes] = overlaps_max[ - overlapped_boxes_ind] - box_to_gt_ind_map[overlapped_boxes_ind] = overlaps_argmax[ - overlapped_boxes_ind] + proposal_to_gt_overlaps = _bbox_overlaps(boxes, gt_boxes) + + overlaps_argmax = proposal_to_gt_overlaps.argmax(axis=1) + overlaps_max = proposal_to_gt_overlaps.max(axis=1) + # Boxes which with non-zero overlap with gt boxes + overlapped_boxes_ind = np.where(overlaps_max > 0)[0] + overlapped_boxes_gt_classes = gt_classes[overlaps_argmax[ + overlapped_boxes_ind]] + gt_overlaps[overlapped_boxes_ind, + overlapped_boxes_gt_classes] = overlaps_max[ + overlapped_boxes_ind] + box_to_gt_ind_map[overlapped_boxes_ind] = overlaps_argmax[ + overlapped_boxes_ind] crowd_ind = np.where(is_crowd)[0] gt_overlaps[crowd_ind] = -1.0 @@ -90,11 +115,6 @@ def _sample_rois(rpn_rois, gt_classes, is_crowd, gt_boxes, im_info, if is_cascade_rcnn: # Cascade RCNN Decode Filter - ws = boxes[:, 2] - boxes[:, 0] + 1 - hs = boxes[:, 3] - boxes[:, 1] + 1 - keep = np.where((ws > 0) & (hs > 0))[0] - boxes = boxes[keep] - max_overlaps = max_overlaps[keep] fg_inds = np.where(max_overlaps >= fg_thresh)[0] bg_inds = np.where((max_overlaps < bg_thresh_hi) & (max_overlaps >= bg_thresh_lo))[0] @@ -125,6 +145,7 @@ def _sample_rois(rpn_rois, gt_classes, is_crowd, gt_boxes, im_info, sampled_labels = max_classes[keep_inds] sampled_labels[fg_rois_per_this_image:] = 0 sampled_boxes = boxes[keep_inds] + sampled_max_overlap = max_overlaps[keep_inds] sampled_gts = gt_boxes[box_to_gt_ind_map[keep_inds]] sampled_gts[fg_rois_per_this_image:, :] = gt_boxes[0] bbox_label_targets = _compute_targets(sampled_boxes, sampled_gts, @@ -142,7 +163,8 @@ def _sample_rois(rpn_rois, gt_classes, is_crowd, gt_boxes, im_info, labels_int32=sampled_labels, bbox_targets=bbox_targets, bbox_inside_weights=bbox_inside_weights, - bbox_outside_weights=bbox_outside_weights) + bbox_outside_weights=bbox_outside_weights, + max_overlap=sampled_max_overlap) return frcn_blobs @@ -226,9 +248,9 @@ class TestGenerateProposalLabelsOp(OpTest): def set_data(self): #self.use_random = False self.init_use_random() - self.init_test_cascade() self.init_test_params() self.init_test_input() + self.init_test_cascade() self.init_test_output() self.inputs = { @@ -236,8 +258,12 @@ class TestGenerateProposalLabelsOp(OpTest): 'GtClasses': (self.gt_classes[0], self.gts_lod), 'IsCrowd': (self.is_crowd[0], self.gts_lod), 'GtBoxes': (self.gt_boxes[0], self.gts_lod), - 'ImInfo': self.im_info + 'ImInfo': self.im_info, } + if self.max_overlaps is not None: + self.inputs['MaxOverlap'] = (self.max_overlaps[0], + self.rpn_rois_lod) + self.attrs = { 'batch_size_per_im': self.batch_size_per_im, 'fg_fraction': self.fg_fraction, @@ -256,6 +282,7 @@ class TestGenerateProposalLabelsOp(OpTest): 'BboxTargets': (self.bbox_targets, [self.lod]), 'BboxInsideWeights': (self.bbox_inside_weights, [self.lod]), 'BboxOutsideWeights': (self.bbox_outside_weights, [self.lod]), + 'MaxOverlapWithGT': (self.max_overlap_with_gt, [self.lod]), } def test_check_output(self): @@ -267,12 +294,13 @@ class TestGenerateProposalLabelsOp(OpTest): def init_test_cascade(self, ): self.is_cascade_rcnn = False + self.max_overlaps = None def init_use_random(self): self.use_random = False def init_test_params(self): - self.batch_size_per_im = 512 + self.batch_size_per_im = 100 self.fg_fraction = 0.25 self.fg_thresh = 0.5 self.bg_thresh_hi = 0.5 @@ -284,7 +312,7 @@ class TestGenerateProposalLabelsOp(OpTest): def init_test_input(self): np.random.seed(0) gt_nums = 6 # Keep same with batch_size_per_im for unittest - proposal_nums = 2000 if not self.is_cascade_rcnn else 512 #self.batch_size_per_im - gt_nums + proposal_nums = 200 images_shape = [[64, 64]] self.im_info = np.ones((len(images_shape), 3)).astype(np.float32) for i in range(len(images_shape)): @@ -301,24 +329,16 @@ class TestGenerateProposalLabelsOp(OpTest): self.gt_boxes = [gt['boxes'] for gt in ground_truth] self.is_crowd = [gt['is_crowd'] for gt in ground_truth] - if self.is_cascade_rcnn: - rpn_rois_new = [] - for im_i in range(len(self.im_info)): - gt_boxes = self.gt_boxes[im_i] - rpn_rois = np.vstack( - [gt_boxes, self.rpn_rois[im_i][len(gt_boxes):, :]]) - rpn_rois_new.append(rpn_rois) - self.rpn_rois = rpn_rois_new - def init_test_output(self): self.rois, self.labels_int32, self.bbox_targets, \ self.bbox_inside_weights, self.bbox_outside_weights, \ + self.max_overlap_with_gt, \ self.lod = generate_proposal_labels_in_python( self.rpn_rois, self.gt_classes, self.is_crowd, self.gt_boxes, self.im_info, self.batch_size_per_im, self.fg_fraction, self.fg_thresh, self.bg_thresh_hi, self.bg_thresh_lo, self.bbox_reg_weights, self.class_nums, self.use_random, - self.is_cls_agnostic, self.is_cascade_rcnn + self.is_cls_agnostic, self.is_cascade_rcnn, self.max_overlaps ) self.rois = np.vstack(self.rois) self.labels_int32 = np.hstack(self.labels_int32) @@ -326,11 +346,18 @@ class TestGenerateProposalLabelsOp(OpTest): self.bbox_targets = np.vstack(self.bbox_targets) self.bbox_inside_weights = np.vstack(self.bbox_inside_weights) self.bbox_outside_weights = np.vstack(self.bbox_outside_weights) + self.max_overlap_with_gt = np.vstack(self.max_overlap_with_gt) class TestCascade(TestGenerateProposalLabelsOp): def init_test_cascade(self): self.is_cascade_rcnn = True + roi_num = len(self.rpn_rois[0]) + self.max_overlaps = [] + max_overlap = np.random.rand(roi_num).astype('float32') + # Make GT samples with overlap = 1 + max_overlap[max_overlap > 0.9] = 1. + self.max_overlaps.append(max_overlap) class TestUseRandom(TestGenerateProposalLabelsOp): @@ -389,6 +416,15 @@ class TestOnlyGT(TestCascade): self.rpn_rois_lod = self.gts_lod +class TestOnlyGT2(TestCascade): + def init_test_cascade(self): + self.is_cascade_rcnn = True + roi_num = len(self.rpn_rois[0]) + self.max_overlaps = [] + max_overlap = np.ones(roi_num).astype('float32') + self.max_overlaps.append(max_overlap) + + def _generate_proposals(images_shape, proposal_nums): rpn_rois = [] rpn_rois_lod = [] -- GitLab