From 9e2e893f5975b282fa7a7d0bc599971ab342bef8 Mon Sep 17 00:00:00 2001 From: Xingyuan Bu Date: Fri, 14 Sep 2018 14:53:37 +0800 Subject: [PATCH] Enhence generate_proposal_labels_op and fix some bug. (#13239) * Enhence generate_proposal_labels_op * Fix bug in generate_proposals_op * Refine rpn_target_assign_op. * by Bu Xingyuan, Wang Guanzhong and Dang Qingqing --- paddle/fluid/API.spec | 4 +- paddle/fluid/operators/detection/bbox_util.h | 33 +- .../detection/generate_proposal_labels_op.cc | 139 ++-- .../detection/generate_proposals_op.cc | 46 +- .../detection/rpn_target_assign_op.cc | 602 ++++++++++++------ python/paddle/fluid/layers/detection.py | 85 ++- python/paddle/fluid/tests/test_detection.py | 143 +++-- ...py => test_generate_proposal_labels_op.py} | 93 +-- ...osals.py => test_generate_proposals_op.py} | 41 +- .../unittests/test_rpn_target_assign_op.py | 214 ++++--- 10 files changed, 874 insertions(+), 526 deletions(-) rename python/paddle/fluid/tests/unittests/{test_generate_proposal_labels.py => test_generate_proposal_labels_op.py} (77%) rename python/paddle/fluid/tests/unittests/{test_generate_proposals.py => test_generate_proposals_op.py} (88%) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 1971774527..e362d34864 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -305,9 +305,9 @@ paddle.fluid.layers.target_assign ArgSpec(args=['input', 'matched_indices', 'neg paddle.fluid.layers.detection_output ArgSpec(args=['loc', 'scores', 'prior_box', 'prior_box_var', 'background_label', 'nms_threshold', 'nms_top_k', 'keep_top_k', 'score_threshold', 'nms_eta'], varargs=None, keywords=None, defaults=(0, 0.3, 400, 200, 0.01, 1.0)) paddle.fluid.layers.ssd_loss ArgSpec(args=['location', 'confidence', 'gt_box', 'gt_label', 'prior_box', 'prior_box_var', 'background_label', 'overlap_threshold', 'neg_pos_ratio', 'neg_overlap', 'loc_loss_weight', 'conf_loss_weight', 'match_type', 'mining_type', 'normalize', 'sample_size'], varargs=None, keywords=None, defaults=(None, 0, 0.5, 3.0, 0.5, 1.0, 1.0, 'per_prediction', 'max_negative', True, None)) paddle.fluid.layers.detection_map ArgSpec(args=['detect_res', 'label', 'class_num', 'background_label', 'overlap_threshold', 'evaluate_difficult', 'has_state', 'input_states', 'out_states', 'ap_version'], varargs=None, keywords=None, defaults=(0, 0.3, True, None, None, None, 'integral')) -paddle.fluid.layers.rpn_target_assign ArgSpec(args=['loc', 'scores', 'anchor_box', 'anchor_var', 'gt_box', 'rpn_batch_size_per_im', 'fg_fraction', 'rpn_positive_overlap', 'rpn_negative_overlap'], varargs=None, keywords=None, defaults=(256, 0.25, 0.7, 0.3)) +paddle.fluid.layers.rpn_target_assign ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'is_crowd', 'im_info', 'rpn_batch_size_per_im', 'rpn_straddle_thresh', 'rpn_fg_fraction', 'rpn_positive_overlap', 'rpn_negative_overlap', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.0, 0.5, 0.7, 0.3, True)) paddle.fluid.layers.anchor_generator ArgSpec(args=['input', 'anchor_sizes', 'aspect_ratios', 'variance', 'stride', 'offset', 'name'], varargs=None, keywords=None, defaults=(None, None, [0.1, 0.1, 0.2, 0.2], None, 0.5, None)) -paddle.fluid.layers.generate_proposal_labels ArgSpec(args=['rpn_rois', 'gt_classes', 'gt_boxes', 'im_scales', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None)) +paddle.fluid.layers.generate_proposal_labels ArgSpec(args=['rpn_rois', 'gt_classes', 'is_crowd', 'gt_boxes', 'im_info', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None, True)) paddle.fluid.layers.generate_proposals ArgSpec(args=['scores', 'bbox_deltas', 'im_info', 'anchors', 'variances', 'pre_nms_top_n', 'post_nms_top_n', 'nms_thresh', 'min_size', 'eta', 'name'], varargs=None, keywords=None, defaults=(6000, 1000, 0.5, 0.1, 1.0, None)) paddle.fluid.layers.iou_similarity ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) paddle.fluid.layers.box_coder ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) diff --git a/paddle/fluid/operators/detection/bbox_util.h b/paddle/fluid/operators/detection/bbox_util.h index 0dee178162..6abeca1da4 100644 --- a/paddle/fluid/operators/detection/bbox_util.h +++ b/paddle/fluid/operators/detection/bbox_util.h @@ -9,6 +9,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/tensor.h" @@ -21,7 +22,7 @@ namespace operators { */ template inline void BoxToDelta(const int box_num, const framework::Tensor& ex_boxes, - const framework::Tensor& gt_boxes, const T* weights, + const framework::Tensor& gt_boxes, const float* weights, const bool normalized, framework::Tensor* box_delta) { auto ex_boxes_et = framework::EigenTensor::From(ex_boxes); auto gt_boxes_et = framework::EigenTensor::From(gt_boxes); @@ -62,5 +63,35 @@ void Gather(const T* in, const int in_stride, const int* index, const int num, } } +template +void BboxOverlaps(const framework::Tensor& r_boxes, + const framework::Tensor& c_boxes, + framework::Tensor* overlaps) { + auto r_boxes_et = framework::EigenTensor::From(r_boxes); + auto c_boxes_et = framework::EigenTensor::From(c_boxes); + auto overlaps_et = framework::EigenTensor::From(*overlaps); + int r_num = r_boxes.dims()[0]; + int c_num = c_boxes.dims()[0]; + auto zero = static_cast(0.0); + T r_box_area, c_box_area, x_min, y_min, x_max, y_max, inter_w, inter_h, + inter_area; + for (int i = 0; i < r_num; ++i) { + r_box_area = (r_boxes_et(i, 2) - r_boxes_et(i, 0) + 1) * + (r_boxes_et(i, 3) - r_boxes_et(i, 1) + 1); + for (int j = 0; j < c_num; ++j) { + c_box_area = (c_boxes_et(j, 2) - c_boxes_et(j, 0) + 1) * + (c_boxes_et(j, 3) - c_boxes_et(j, 1) + 1); + x_min = std::max(r_boxes_et(i, 0), c_boxes_et(j, 0)); + y_min = std::max(r_boxes_et(i, 1), c_boxes_et(j, 1)); + x_max = std::min(r_boxes_et(i, 2), c_boxes_et(j, 2)); + y_max = std::min(r_boxes_et(i, 3), c_boxes_et(j, 3)); + inter_w = std::max(x_max - x_min + 1, zero); + inter_h = std::max(y_max - y_min + 1, zero); + inter_area = inter_w * inter_h; + overlaps_et(i, j) = inter_area / (r_box_area + c_box_area - inter_area); + } + } +} + } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc b/paddle/fluid/operators/detection/generate_proposal_labels_op.cc index be06dc1974..d7a53f1bef 100644 --- a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc +++ b/paddle/fluid/operators/detection/generate_proposal_labels_op.cc @@ -42,10 +42,11 @@ class GenerateProposalLabelsOp : public framework::OperatorWithKernel { "Input(RpnRois) shouldn't be null."); PADDLE_ENFORCE(ctx->HasInput("GtClasses"), "Input(GtClasses) shouldn't be null."); + PADDLE_ENFORCE(ctx->HasInput("IsCrowd"), + "Input(IsCrowd) shouldn't be null."); PADDLE_ENFORCE(ctx->HasInput("GtBoxes"), "Input(GtBoxes) shouldn't be null."); - PADDLE_ENFORCE(ctx->HasInput("ImScales"), - "Input(ImScales) shouldn't be null."); + PADDLE_ENFORCE(ctx->HasInput("ImInfo"), "Input(ImInfo) shouldn't be null."); PADDLE_ENFORCE(ctx->HasOutput("Rois"), "Output(Rois) of RpnTargetAssignOp should not be null"); @@ -64,22 +65,21 @@ class GenerateProposalLabelsOp : public framework::OperatorWithKernel { auto rpn_rois_dims = ctx->GetInputDim("RpnRois"); auto gt_classes_dims = ctx->GetInputDim("GtClasses"); + auto is_crowd_dims = ctx->GetInputDim("IsCrowd"); auto gt_boxes_dims = ctx->GetInputDim("GtBoxes"); - auto im_scales_dims = ctx->GetInputDim("ImScales"); + auto im_info_dims = ctx->GetInputDim("ImInfo"); PADDLE_ENFORCE_EQ(rpn_rois_dims.size(), 2, "The rank of Input(RpnRois) must be 2."); - PADDLE_ENFORCE_EQ(gt_classes_dims.size(), 1, - "The rank of Input(GtClasses) must be 1."); PADDLE_ENFORCE_EQ(gt_boxes_dims.size(), 2, "The rank of Input(GtBoxes) must be 2."); - PADDLE_ENFORCE_EQ(im_scales_dims.size(), 1, - "The rank of Input(ImScales) must be 1."); + PADDLE_ENFORCE_EQ(im_info_dims.size(), 2, + "The rank of Input(ImInfo) must be 2."); int class_nums = ctx->Attrs().Get("class_nums"); ctx->SetOutputDim("Rois", {-1, 4}); - ctx->SetOutputDim("LabelsInt32", {-1}); + ctx->SetOutputDim("LabelsInt32", {-1, 1}); ctx->SetOutputDim("BboxTargets", {-1, 4 * class_nums}); ctx->SetOutputDim("BboxInsideWeights", {-1, 4 * class_nums}); ctx->SetOutputDim("BboxOutsideWeights", {-1, 4 * class_nums}); @@ -105,45 +105,18 @@ void Concat(const platform::CPUDeviceContext& context, concat_functor(context, inputs, axis, out_tensor); } -template -void BboxOverlaps(const Tensor& r_boxes, const Tensor& c_boxes, - Tensor* overlaps) { - auto r_boxes_et = framework::EigenTensor::From(r_boxes); - auto c_boxes_et = framework::EigenTensor::From(c_boxes); - auto overlaps_et = framework::EigenTensor::From(*overlaps); - int r_num = r_boxes.dims()[0]; - int c_num = c_boxes.dims()[0]; - auto zero = static_cast(0.0); - T r_box_area, c_box_area, x_min, y_min, x_max, y_max, inter_w, inter_h, - inter_area; - for (int i = 0; i < r_num; ++i) { - r_box_area = (r_boxes_et(i, 2) - r_boxes_et(i, 0) + 1) * - (r_boxes_et(i, 3) - r_boxes_et(i, 1) + 1); - for (int j = 0; j < c_num; ++j) { - c_box_area = (c_boxes_et(j, 2) - c_boxes_et(j, 0) + 1) * - (c_boxes_et(j, 3) - c_boxes_et(j, 1) + 1); - x_min = std::max(r_boxes_et(i, 0), c_boxes_et(j, 0)); - y_min = std::max(r_boxes_et(i, 1), c_boxes_et(j, 1)); - x_max = std::min(r_boxes_et(i, 2), c_boxes_et(j, 2)); - y_max = std::min(r_boxes_et(i, 3), c_boxes_et(j, 3)); - inter_w = std::max(x_max - x_min + 1, zero); - inter_h = std::max(y_max - y_min + 1, zero); - inter_area = inter_w * inter_h; - overlaps_et(i, j) = inter_area / (r_box_area + c_box_area - inter_area); - } - } -} - template std::vector> SampleFgBgGt( const platform::CPUDeviceContext& context, Tensor* iou, - const int batch_size_per_im, const float fg_fraction, const float fg_thresh, - const float bg_thresh_hi, const float bg_thresh_lo, - std::minstd_rand engine) { + const Tensor& is_crowd, const int batch_size_per_im, + const float fg_fraction, const float fg_thresh, const float bg_thresh_hi, + const float bg_thresh_lo, std::minstd_rand engine, const bool use_random) { std::vector fg_inds; std::vector bg_inds; std::vector gt_inds; - T* proposal_to_gt_overlaps = iou->mutable_data(context.GetPlace()); + int64_t gt_num = is_crowd.numel(); + const int* crowd_data = is_crowd.data(); + T* proposal_to_gt_overlaps = iou->data(); int64_t row = iou->dims()[0]; int64_t col = iou->dims()[1]; float epsilon = 0.00001; @@ -152,6 +125,9 @@ std::vector> SampleFgBgGt( for (int64_t i = 0; i < row; ++i) { const T* v = proposal_to_gt_overlaps + i * col; T max_overlap = *std::max_element(v, v + col); + if ((i < gt_num) && (crowd_data[i])) { + max_overlap = -1.0; + } if (max_overlap > fg_thresh) { for (int64_t j = 0; j < col; ++j) { T val = proposal_to_gt_overlaps[i * col + j]; @@ -170,17 +146,19 @@ std::vector> SampleFgBgGt( } // Reservoir Sampling + std::uniform_real_distribution uniform(0, 1); int fg_rois_per_im = std::floor(batch_size_per_im * fg_fraction); int fg_rois_this_image = fg_inds.size(); int fg_rois_per_this_image = std::min(fg_rois_per_im, fg_rois_this_image); - std::uniform_real_distribution uniform(0, 1); - const int64_t fg_size = static_cast(fg_inds.size()); - if (fg_size > fg_rois_per_this_image) { - for (int64_t i = fg_rois_per_this_image; i < fg_size; ++i) { - int rng_ind = std::floor(uniform(engine) * i); - if (rng_ind < fg_rois_per_this_image) { - std::iter_swap(fg_inds.begin() + rng_ind, fg_inds.begin() + i); - std::iter_swap(gt_inds.begin() + rng_ind, gt_inds.begin() + i); + if (use_random) { + const int64_t fg_size = static_cast(fg_inds.size()); + if (fg_size > fg_rois_per_this_image) { + for (int64_t i = fg_rois_per_this_image; i < fg_size; ++i) { + int rng_ind = std::floor(uniform(engine) * i); + if (rng_ind < fg_rois_per_this_image) { + std::iter_swap(fg_inds.begin() + rng_ind, fg_inds.begin() + i); + std::iter_swap(gt_inds.begin() + rng_ind, gt_inds.begin() + i); + } } } } @@ -192,12 +170,14 @@ std::vector> SampleFgBgGt( int bg_rois_per_image = batch_size_per_im - fg_rois_per_this_image; int bg_rois_this_image = bg_inds.size(); int bg_rois_per_this_image = std::min(bg_rois_per_image, bg_rois_this_image); - const int64_t bg_size = static_cast(bg_inds.size()); - if (bg_size > bg_rois_per_this_image) { - for (int64_t i = bg_rois_per_this_image; i < bg_size; ++i) { - int rng_ind = std::floor(uniform(engine) * i); - if (rng_ind < fg_rois_per_this_image) - std::iter_swap(bg_inds.begin() + rng_ind, bg_inds.begin() + i); + if (use_random) { + const int64_t bg_size = static_cast(bg_inds.size()); + if (bg_size > bg_rois_per_this_image) { + for (int64_t i = bg_rois_per_this_image; i < bg_size; ++i) { + int rng_ind = std::floor(uniform(engine) * i); + if (rng_ind < fg_rois_per_this_image) + std::iter_swap(bg_inds.begin() + rng_ind, bg_inds.begin() + i); + } } } std::vector new_bg_inds(bg_inds.begin(), @@ -248,14 +228,14 @@ void GatherBoxesLabels(const platform::CPUDeviceContext& context, template std::vector SampleRoisForOneImage( const platform::CPUDeviceContext& context, Tensor* rpn_rois, - Tensor* gt_classes, Tensor* gt_boxes, Tensor* im_scale, + Tensor* gt_classes, Tensor* is_crowd, Tensor* gt_boxes, Tensor* im_info, const int batch_size_per_im, const float fg_fraction, const float fg_thresh, const float bg_thresh_hi, const float bg_thresh_lo, const std::vector& bbox_reg_weights, const int class_nums, - std::minstd_rand engine) { + std::minstd_rand engine, bool use_random) { auto rpn_rois_et = framework::EigenTensor::From(*rpn_rois); - auto im_scale_data = im_scale->data()[0]; - rpn_rois_et = rpn_rois_et / im_scale_data; + auto im_scale = im_info->data()[2]; + rpn_rois_et = rpn_rois_et / im_scale; Tensor boxes; int proposals_num = gt_boxes->dims()[0] + rpn_rois->dims()[0]; @@ -270,8 +250,8 @@ std::vector SampleRoisForOneImage( // Generate proposal index std::vector> fg_bg_gt = SampleFgBgGt( - context, &proposal_to_gt_overlaps, batch_size_per_im, fg_fraction, - fg_thresh, bg_thresh_hi, bg_thresh_lo, engine); + context, &proposal_to_gt_overlaps, *is_crowd, batch_size_per_im, + fg_fraction, fg_thresh, bg_thresh_hi, bg_thresh_lo, engine, use_random); std::vector fg_inds = fg_bg_gt[0]; std::vector bg_inds = fg_bg_gt[1]; std::vector gt_inds = fg_bg_gt[2]; @@ -291,15 +271,15 @@ std::vector SampleRoisForOneImage( // Compute targets Tensor bbox_targets_single; bbox_targets_single.mutable_data(bbox_dim, context.GetPlace()); - BoxToDelta(fg_num, sampled_boxes, sampled_gts, nullptr, false, - &bbox_targets_single); + BoxToDelta(fg_num, sampled_boxes, sampled_gts, bbox_reg_weights.data(), + false, &bbox_targets_single); // Scale rois Tensor sampled_rois; sampled_rois.mutable_data(sampled_boxes.dims(), context.GetPlace()); auto sampled_rois_et = framework::EigenTensor::From(sampled_rois); auto sampled_boxes_et = framework::EigenTensor::From(sampled_boxes); - sampled_rois_et = sampled_boxes_et * im_scale_data; + sampled_rois_et = sampled_boxes_et * im_scale; // Expand box targets Tensor bbox_targets, bbox_inside_weights, bbox_outside_weights; @@ -351,8 +331,9 @@ class GenerateProposalLabelsKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { auto* rpn_rois = context.Input("RpnRois"); auto* gt_classes = context.Input("GtClasses"); + auto* is_crowd = context.Input("IsCrowd"); auto* gt_boxes = context.Input("GtBoxes"); - auto* im_scales = context.Input("ImScales"); + auto* im_info = context.Input("ImInfo"); auto* rois = context.Output("Rois"); auto* labels_int32 = context.Output("LabelsInt32"); @@ -369,18 +350,21 @@ class GenerateProposalLabelsKernel : public framework::OpKernel { std::vector bbox_reg_weights = context.Attr>("bbox_reg_weights"); int class_nums = context.Attr("class_nums"); + bool use_random = context.Attr("use_random"); PADDLE_ENFORCE_EQ(rpn_rois->lod().size(), 1UL, "GenerateProposalLabelsOp rpn_rois needs 1 level of LoD"); PADDLE_ENFORCE_EQ( gt_classes->lod().size(), 1UL, "GenerateProposalLabelsOp gt_classes needs 1 level of LoD"); + PADDLE_ENFORCE_EQ(is_crowd->lod().size(), 1UL, + "GenerateProposalLabelsOp is_crowd needs 1 level of LoD"); PADDLE_ENFORCE_EQ(gt_boxes->lod().size(), 1UL, "GenerateProposalLabelsOp gt_boxes needs 1 level of LoD"); int64_t n = static_cast(rpn_rois->lod().back().size() - 1); rois->mutable_data({n * batch_size_per_im, kBoxDim}, context.GetPlace()); - labels_int32->mutable_data({n * batch_size_per_im}, + labels_int32->mutable_data({n * batch_size_per_im, 1}, context.GetPlace()); bbox_targets->mutable_data({n * batch_size_per_im, kBoxDim * class_nums}, context.GetPlace()); @@ -391,8 +375,7 @@ class GenerateProposalLabelsKernel : public framework::OpKernel { std::random_device rnd; std::minstd_rand engine; - int seed = - context.Attr("fix_seed") ? context.Attr("seed") : rnd(); + int seed = rnd(); engine.seed(seed); framework::LoD lod; @@ -403,19 +386,23 @@ class GenerateProposalLabelsKernel : public framework::OpKernel { auto rpn_rois_lod = rpn_rois->lod().back(); auto gt_classes_lod = gt_classes->lod().back(); + auto is_crowd_lod = is_crowd->lod().back(); auto gt_boxes_lod = gt_boxes->lod().back(); for (int i = 0; i < n; ++i) { Tensor rpn_rois_slice = rpn_rois->Slice(rpn_rois_lod[i], rpn_rois_lod[i + 1]); Tensor gt_classes_slice = gt_classes->Slice(gt_classes_lod[i], gt_classes_lod[i + 1]); + Tensor is_crowd_slice = + is_crowd->Slice(is_crowd_lod[i], is_crowd_lod[i + 1]); Tensor gt_boxes_slice = gt_boxes->Slice(gt_boxes_lod[i], gt_boxes_lod[i + 1]); - Tensor im_scales_slice = im_scales->Slice(i, i + 1); + Tensor im_info_slice = im_info->Slice(i, i + 1); std::vector tensor_output = SampleRoisForOneImage( - dev_ctx, &rpn_rois_slice, >_classes_slice, >_boxes_slice, - &im_scales_slice, batch_size_per_im, fg_fraction, fg_thresh, - bg_thresh_hi, bg_thresh_lo, bbox_reg_weights, class_nums, engine); + dev_ctx, &rpn_rois_slice, >_classes_slice, &is_crowd_slice, + >_boxes_slice, &im_info_slice, batch_size_per_im, fg_fraction, + fg_thresh, bg_thresh_hi, bg_thresh_lo, bbox_reg_weights, class_nums, + engine, use_random); Tensor sampled_rois = tensor_output[0]; Tensor sampled_labels_int32 = tensor_output[1]; Tensor sampled_bbox_targets = tensor_output[2]; @@ -442,7 +429,7 @@ class GenerateProposalLabelsKernel : public framework::OpKernel { bbox_inside_weights->set_lod(lod); bbox_outside_weights->set_lod(lod); rois->Resize({num_rois, kBoxDim}); - labels_int32->Resize({num_rois}); + labels_int32->Resize({num_rois, 1}); bbox_targets->Resize({num_rois, kBoxDim * class_nums}); bbox_inside_weights->Resize({num_rois, kBoxDim * class_nums}); bbox_outside_weights->Resize({num_rois, kBoxDim * class_nums}); @@ -455,8 +442,9 @@ class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker { // TODO(buxingyuan): Add Document AddInput("RpnRois", "RpnRois."); AddInput("GtClasses", "GtClasses."); + AddInput("IsCrowd", "IsCrowd."); AddInput("GtBoxes", "GtBoxes."); - AddInput("ImScales", "ImScales."); + AddInput("ImInfo", "ImInfo."); AddOutput("Rois", "Rois."); AddOutput("LabelsInt32", "LabelsInt32."); @@ -471,8 +459,7 @@ class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker { AddAttr("bg_thresh_lo", "bg_thresh_lo"); AddAttr>("bbox_reg_weights", "bbox_reg_weights"); AddAttr("class_nums", "class_nums"); - AddAttr("fix_seed", "fix_seed").SetDefault(false); - AddAttr("seed", "seed").SetDefault(0); + AddAttr("use_random", "use_random").SetDefault(true); AddComment(R"DOC( Generate Proposals Labels Operator. diff --git a/paddle/fluid/operators/detection/generate_proposals_op.cc b/paddle/fluid/operators/detection/generate_proposals_op.cc index ebe6830ecc..c33aa25536 100644 --- a/paddle/fluid/operators/detection/generate_proposals_op.cc +++ b/paddle/fluid/operators/detection/generate_proposals_op.cc @@ -89,12 +89,11 @@ void BoxCoder(const platform::DeviceContext &ctx, Tensor *all_anchors, } for (int64_t i = 0; i < row; ++i) { - T anchor_width = anchor_data[i * len + 2] - anchor_data[i * len]; - T anchor_height = anchor_data[i * len + 3] - anchor_data[i * len + 1]; + T anchor_width = anchor_data[i * len + 2] - anchor_data[i * len] + 1.0; + T anchor_height = anchor_data[i * len + 3] - anchor_data[i * len + 1] + 1.0; - T anchor_center_x = (anchor_data[i * len + 2] + anchor_data[i * len]) / 2; - T anchor_center_y = - (anchor_data[i * len + 3] + anchor_data[i * len + 1]) / 2; + T anchor_center_x = anchor_data[i * len] + 0.5 * anchor_width; + T anchor_center_y = anchor_data[i * len + 1] + 0.5 * anchor_height; T bbox_center_x = 0, bbox_center_y = 0; T bbox_width = 0, bbox_height = 0; @@ -106,25 +105,31 @@ void BoxCoder(const platform::DeviceContext &ctx, Tensor *all_anchors, bbox_center_y = variances_data[i * len + 1] * bbox_deltas_data[i * len + 1] * anchor_height + anchor_center_y; - bbox_width = std::exp(variances_data[i * len + 2] * - bbox_deltas_data[i * len + 2]) * + bbox_width = std::exp(std::min(variances_data[i * len + 2] * + bbox_deltas_data[i * len + 2], + std::log(1000.0 / 16.0))) * anchor_width; - bbox_height = std::exp(variances_data[i * len + 3] * - bbox_deltas_data[i * len + 3]) * + bbox_height = std::exp(std::min(variances_data[i * len + 3] * + bbox_deltas_data[i * len + 3], + std::log(1000.0 / 16.0))) * anchor_height; } else { bbox_center_x = bbox_deltas_data[i * len] * anchor_width + anchor_center_x; bbox_center_y = bbox_deltas_data[i * len + 1] * anchor_height + anchor_center_y; - bbox_width = std::exp(bbox_deltas_data[i * len + 2]) * anchor_width; - bbox_height = std::exp(bbox_deltas_data[i * len + 3]) * anchor_height; + bbox_width = std::exp(std::min(bbox_deltas_data[i * len + 2], + std::log(1000.0 / 16.0))) * + anchor_width; + bbox_height = std::exp(std::min(bbox_deltas_data[i * len + 3], + std::log(1000.0 / 16.0))) * + anchor_height; } proposals_data[i * len] = bbox_center_x - bbox_width / 2; proposals_data[i * len + 1] = bbox_center_y - bbox_height / 2; - proposals_data[i * len + 2] = bbox_center_x + bbox_width / 2; - proposals_data[i * len + 3] = bbox_center_y + bbox_height / 2; + proposals_data[i * len + 2] = bbox_center_x + bbox_width / 2 - 1; + proposals_data[i * len + 3] = bbox_center_y + bbox_height / 2 - 1; } // return proposals; } @@ -156,18 +161,23 @@ void FilterBoxes(const platform::DeviceContext &ctx, Tensor *boxes, float min_size, const Tensor &im_info, Tensor *keep) { const T *im_info_data = im_info.data(); T *boxes_data = boxes->mutable_data(ctx.GetPlace()); - min_size *= im_info_data[2]; + T im_scale = im_info_data[2]; keep->Resize({boxes->dims()[0], 1}); + min_size = std::max(min_size, 1.0f); int *keep_data = keep->mutable_data(ctx.GetPlace()); int keep_len = 0; for (int i = 0; i < boxes->dims()[0]; ++i) { T ws = boxes_data[4 * i + 2] - boxes_data[4 * i] + 1; T hs = boxes_data[4 * i + 3] - boxes_data[4 * i + 1] + 1; + T ws_origin_scale = + (boxes_data[4 * i + 2] - boxes_data[4 * i]) / im_scale + 1; + T hs_origin_scale = + (boxes_data[4 * i + 3] - boxes_data[4 * i + 1]) / im_scale + 1; T x_ctr = boxes_data[4 * i] + ws / 2; T y_ctr = boxes_data[4 * i + 1] + hs / 2; - if (ws >= min_size && hs >= min_size && x_ctr <= im_info_data[1] && - y_ctr <= im_info_data[0]) { + if (ws_origin_scale >= min_size && hs_origin_scale >= min_size && + x_ctr <= im_info_data[1] && y_ctr <= im_info_data[0]) { keep_data[keep_len++] = i; } } @@ -218,8 +228,8 @@ T JaccardOverlap(const T *box1, const T *box2, const bool normalized) { const T inter_ymin = std::max(box1[1], box2[1]); const T inter_xmax = std::min(box1[2], box2[2]); const T inter_ymax = std::min(box1[3], box2[3]); - const T inter_w = inter_xmax - inter_xmin; - const T inter_h = inter_ymax - inter_ymin; + const T inter_w = std::max(0.0f, inter_xmax - inter_xmin + 1); + const T inter_h = std::max(0.0f, inter_ymax - inter_ymin + 1); const T inter_area = inter_w * inter_h; const T bbox1_area = BBoxArea(box1, normalized); const T bbox2_area = BBoxArea(box2, normalized); diff --git a/paddle/fluid/operators/detection/rpn_target_assign_op.cc b/paddle/fluid/operators/detection/rpn_target_assign_op.cc index 88757f25cd..dda423efd3 100644 --- a/paddle/fluid/operators/detection/rpn_target_assign_op.cc +++ b/paddle/fluid/operators/detection/rpn_target_assign_op.cc @@ -31,8 +31,14 @@ class RpnTargetAssignOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("DistMat"), - "Input(DistMat) of RpnTargetAssignOp should not be null"); + PADDLE_ENFORCE(ctx->HasInput("Anchor"), + "Input(Anchor) of RpnTargetAssignOp should not be null"); + PADDLE_ENFORCE(ctx->HasInput("GtBoxes"), + "Input(GtBoxes) of RpnTargetAssignOp should not be null"); + PADDLE_ENFORCE(ctx->HasInput("IsCrowd"), + "Input(Anchor) of RpnTargetAssignOp should not be null"); + PADDLE_ENFORCE(ctx->HasInput("ImInfo"), + "Input(ImInfo) of RpnTargetAssignOp should not be null"); PADDLE_ENFORCE( ctx->HasOutput("LocationIndex"), @@ -43,10 +49,20 @@ class RpnTargetAssignOp : public framework::OperatorWithKernel { PADDLE_ENFORCE( ctx->HasOutput("TargetLabel"), "Output(TargetLabel) of RpnTargetAssignOp should not be null"); - - auto in_dims = ctx->GetInputDim("DistMat"); - PADDLE_ENFORCE_EQ(in_dims.size(), 2, - "The rank of Input(DistMat) must be 2."); + PADDLE_ENFORCE( + ctx->HasOutput("TargetBBox"), + "Output(TargetBBox) of RpnTargetAssignOp should not be null"); + + auto anchor_dims = ctx->GetInputDim("Anchor"); + auto gt_boxes_dims = ctx->GetInputDim("GtBoxes"); + auto is_crowd_dims = ctx->GetInputDim("IsCrowd"); + auto im_info_dims = ctx->GetInputDim("ImInfo"); + PADDLE_ENFORCE_EQ(anchor_dims.size(), 2, + "The rank of Input(Anchor) must be 2."); + PADDLE_ENFORCE_EQ(gt_boxes_dims.size(), 2, + "The rank of Input(GtBoxes) must be 2."); + PADDLE_ENFORCE_EQ(im_info_dims.size(), 2, + "The rank of Input(ImInfo) must be 2."); ctx->SetOutputDim("LocationIndex", {-1}); ctx->SetOutputDim("ScoreIndex", {-1}); @@ -59,198 +75,383 @@ class RpnTargetAssignOp : public framework::OperatorWithKernel { const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType( - ctx.Input("DistMat")->type()), + ctx.Input("Anchor")->type()), platform::CPUPlace()); } }; template -class RpnTargetAssignKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto* anchor_t = context.Input("Anchor"); // (H*W*A) * 4 - auto* gt_bbox_t = context.Input("GtBox"); - auto* dist_t = context.Input("DistMat"); +void AppendRpns(LoDTensor* out, int64_t offset, Tensor* to_add) { + auto* out_data = out->data(); + auto* to_add_data = to_add->data(); + memcpy(out_data + offset, to_add_data, to_add->numel() * sizeof(T)); +} + +template +std::vector FilterStraddleAnchor( + const platform::CPUDeviceContext& context, const Tensor* anchor, + const float rpn_straddle_thresh, T im_height, T im_width) { + std::vector inds_inside; + int anchor_num = anchor->dims()[0]; + auto* anchor_data = anchor->data(); + if (rpn_straddle_thresh >= 0) { + int index; + for (int i = 0; i < anchor_num; ++i) { + index = i * 4; + if ((anchor_data[index + 0] >= -rpn_straddle_thresh) && + (anchor_data[index + 1] >= -rpn_straddle_thresh) && + (anchor_data[index + 2] < im_width + rpn_straddle_thresh) && + (anchor_data[index + 3] < im_height + rpn_straddle_thresh)) { + inds_inside.emplace_back(i); + } + } + } else { + for (int i = 0; i < anchor_num; ++i) { + inds_inside.emplace_back(i); + } + } + int inside_num = inds_inside.size(); + Tensor inds_inside_t; + int* inds_inside_data = + inds_inside_t.mutable_data({inside_num}, context.GetPlace()); + std::copy(inds_inside.begin(), inds_inside.end(), inds_inside_data); + Tensor inside_anchor_t; + T* inside_anchor_data = + inside_anchor_t.mutable_data({inside_num, 4}, context.GetPlace()); + Gather(anchor->data(), 4, inds_inside_data, inside_num, + inside_anchor_data); + std::vector res; + res.emplace_back(inds_inside_t); + res.emplace_back(inside_anchor_t); + return res; +} + +template +Tensor FilterCrowdGt(const platform::CPUDeviceContext& context, + Tensor* gt_boxes, Tensor* is_crowd) { + int gt_num = gt_boxes->dims()[0]; + std::vector not_crowd_inds; + auto* is_crowd_data = is_crowd->data(); + for (int i = 0; i < gt_num; ++i) { + if (is_crowd_data[i] == 0) { + not_crowd_inds.emplace_back(i); + } + } + int ncrowd_num = not_crowd_inds.size(); + Tensor ncrowd_gt_boxes; + T* ncrowd_gt_boxes_data = + ncrowd_gt_boxes.mutable_data({ncrowd_num, 4}, context.GetPlace()); + Gather(gt_boxes->data(), 4, not_crowd_inds.data(), ncrowd_num, + ncrowd_gt_boxes_data); + return ncrowd_gt_boxes; +} + +void ReservoirSampling(const int num, std::vector* inds, + std::minstd_rand engine, bool use_random) { + std::uniform_real_distribution uniform(0, 1); + size_t len = inds->size(); + if (len > static_cast(num)) { + if (use_random) { + for (size_t i = num; i < len; ++i) { + int rng_ind = std::floor(uniform(engine) * i); + if (rng_ind < num) + std::iter_swap(inds->begin() + rng_ind, inds->begin() + i); + } + } + inds->resize(num); + } +} + +template +void ScoreAssign(const T* anchor_by_gt_overlap_data, + const Tensor& anchor_to_gt_max, const Tensor& gt_to_anchor_max, + const int rpn_batch_size_per_im, const float rpn_fg_fraction, + const float rpn_positive_overlap, + const float rpn_negative_overlap, std::vector* fg_inds, + std::vector* bg_inds, std::vector* tgt_lbl, + std::minstd_rand engine, bool use_random) { + float epsilon = 0.00001; + int anchor_num = anchor_to_gt_max.dims()[0]; + int gt_num = gt_to_anchor_max.dims()[0]; + std::vector target_label(anchor_num, -1); + std::vector fg_inds_fake; + std::vector bg_inds_fake; + const T* anchor_to_gt_max_data = anchor_to_gt_max.data(); + const T* gt_to_anchor_max_data = gt_to_anchor_max.data(); + // TODO(buxingyuan): Match with Detectron now + // but it seems here is a bug in two directions assignment + // in which the later one may overwrites the former one. + for (int64_t i = 0; i < anchor_num; ++i) { + bool is_anchors_with_max_overlap = false; + for (int64_t j = 0; j < gt_num; ++j) { + T value = anchor_by_gt_overlap_data[i * gt_num + j]; + T diff = std::abs(value - gt_to_anchor_max_data[j]); + if (diff < epsilon) { + is_anchors_with_max_overlap = true; + break; + } + } + bool is_anchor_great_than_thresh = + (anchor_to_gt_max_data[i] >= rpn_positive_overlap); + if (is_anchors_with_max_overlap || is_anchor_great_than_thresh) { + fg_inds_fake.push_back(i); + } + } - auto* loc_index_t = context.Output("LocationIndex"); - auto* score_index_t = context.Output("ScoreIndex"); - auto* tgt_bbox_t = context.Output("TargetBBox"); - auto* tgt_lbl_t = context.Output("TargetLabel"); + // Reservoir Sampling + int fg_num = static_cast(rpn_fg_fraction * rpn_batch_size_per_im); + ReservoirSampling(fg_num, &fg_inds_fake, engine, use_random); + fg_num = static_cast(fg_inds_fake.size()); + for (int64_t i = 0; i < fg_num; ++i) { + target_label[fg_inds_fake[i]] = 1; + } - auto lod = dist_t->lod().back(); - int64_t batch_num = static_cast(lod.size() - 1); - int64_t anchor_num = dist_t->dims()[1]; - PADDLE_ENFORCE_EQ(anchor_num, anchor_t->dims()[0]); + int bg_num = rpn_batch_size_per_im - fg_num; + for (int64_t i = 0; i < anchor_num; ++i) { + if (anchor_to_gt_max_data[i] < rpn_negative_overlap) { + bg_inds_fake.push_back(i); + } + } + ReservoirSampling(bg_num, &bg_inds_fake, engine, use_random); + bg_num = static_cast(bg_inds_fake.size()); + for (int64_t i = 0; i < bg_num; ++i) { + target_label[bg_inds_fake[i]] = 0; + } - int rpn_batch_size = context.Attr("rpn_batch_size_per_im"); - float pos_threshold = context.Attr("rpn_positive_overlap"); - float neg_threshold = context.Attr("rpn_negative_overlap"); - float fg_fraction = context.Attr("fg_fraction"); + for (int64_t i = 0; i < anchor_num; ++i) { + if (target_label[i] == 1) fg_inds->emplace_back(i); + if (target_label[i] == 0) bg_inds->emplace_back(i); + } + fg_num = fg_inds->size(); + bg_num = bg_inds->size(); + + tgt_lbl->resize(fg_num + bg_num, 0); + std::vector fg_lbl(fg_num, 1); + std::vector bg_lbl(bg_num, 0); + std::copy(fg_lbl.begin(), fg_lbl.end(), tgt_lbl->data()); + std::copy(bg_lbl.begin(), bg_lbl.end(), tgt_lbl->data() + fg_num); +} + +template +std::vector SampleRpnFgBgGt(const platform::CPUDeviceContext& ctx, + const Tensor& anchor_by_gt_overlap, + const int rpn_batch_size_per_im, + const float rpn_positive_overlap, + const float rpn_negative_overlap, + const float rpn_fg_fraction, + std::minstd_rand engine, bool use_random) { + auto* anchor_by_gt_overlap_data = anchor_by_gt_overlap.data(); + int anchor_num = anchor_by_gt_overlap.dims()[0]; + int gt_num = anchor_by_gt_overlap.dims()[1]; + + std::vector fg_inds; + std::vector bg_inds; + std::vector gt_inds; + std::vector tgt_lbl; + + // Calculate the max IoU between anchors and gt boxes + // Map from anchor to gt box that has highest overlap + auto place = ctx.GetPlace(); + Tensor anchor_to_gt_max, anchor_to_gt_argmax, gt_to_anchor_max; + anchor_to_gt_max.mutable_data({anchor_num}, place); + int* argmax = anchor_to_gt_argmax.mutable_data({anchor_num}, place); + gt_to_anchor_max.mutable_data({gt_num}, place); + + auto anchor_by_gt_overlap_et = + framework::EigenMatrix::From(anchor_by_gt_overlap); + auto anchor_to_gt_max_et = + framework::EigenVector::Flatten(anchor_to_gt_max); + auto gt_to_anchor_max_et = + framework::EigenVector::Flatten(gt_to_anchor_max); + auto anchor_to_gt_argmax_et = + framework::EigenVector::Flatten(anchor_to_gt_argmax); + anchor_to_gt_max_et = + anchor_by_gt_overlap_et.maximum(Eigen::DSizes(1)); + anchor_to_gt_argmax_et = + anchor_by_gt_overlap_et.argmax(1).template cast(); + gt_to_anchor_max_et = + anchor_by_gt_overlap_et.maximum(Eigen::DSizes(0)); + + // Follow the Faster RCNN's implementation + ScoreAssign(anchor_by_gt_overlap_data, anchor_to_gt_max, gt_to_anchor_max, + rpn_batch_size_per_im, rpn_fg_fraction, rpn_positive_overlap, + rpn_negative_overlap, &fg_inds, &bg_inds, &tgt_lbl, engine, + use_random); + + int fg_num = fg_inds.size(); + int bg_num = bg_inds.size(); + gt_inds.reserve(fg_num); + for (int i = 0; i < fg_num; ++i) { + gt_inds.emplace_back(argmax[fg_inds[i]]); + } - int fg_num_per_batch = static_cast(rpn_batch_size * fg_fraction); + Tensor loc_index_t, score_index_t, tgt_lbl_t, gt_inds_t; + int* loc_index_data = loc_index_t.mutable_data({fg_num}, place); + int* score_index_data = + score_index_t.mutable_data({fg_num + bg_num}, place); + int* tgt_lbl_data = tgt_lbl_t.mutable_data({fg_num + bg_num}, place); + int* gt_inds_data = gt_inds_t.mutable_data({fg_num}, place); + std::copy(fg_inds.begin(), fg_inds.end(), loc_index_data); + std::copy(fg_inds.begin(), fg_inds.end(), score_index_data); + std::copy(bg_inds.begin(), bg_inds.end(), score_index_data + fg_num); + std::copy(tgt_lbl.begin(), tgt_lbl.end(), tgt_lbl_data); + std::copy(gt_inds.begin(), gt_inds.end(), gt_inds_data); + std::vector loc_score_tgtlbl_gt; + loc_score_tgtlbl_gt.emplace_back(loc_index_t); + loc_score_tgtlbl_gt.emplace_back(score_index_t); + loc_score_tgtlbl_gt.emplace_back(tgt_lbl_t); + loc_score_tgtlbl_gt.emplace_back(gt_inds_t); + + return loc_score_tgtlbl_gt; +} - int64_t max_num = batch_num * anchor_num; +template +class RpnTargetAssignKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* anchor = context.Input("Anchor"); // (H*W*A) * 4 + auto* gt_boxes = context.Input("GtBoxes"); + auto* is_crowd = context.Input("IsCrowd"); + auto* im_info = context.Input("ImInfo"); + + auto* loc_index = context.Output("LocationIndex"); + auto* score_index = context.Output("ScoreIndex"); + auto* tgt_bbox = context.Output("TargetBBox"); + auto* tgt_lbl = context.Output("TargetLabel"); + + PADDLE_ENFORCE_EQ(gt_boxes->lod().size(), 1UL, + "RpnTargetAssignOp gt_boxes needs 1 level of LoD"); + PADDLE_ENFORCE_EQ(is_crowd->lod().size(), 1UL, + "RpnTargetAssignOp is_crowd needs 1 level of LoD"); + int64_t anchor_num = static_cast(anchor->dims()[0]); + int64_t batch_num = static_cast(gt_boxes->lod().back().size() - 1); + + int rpn_batch_size_per_im = context.Attr("rpn_batch_size_per_im"); + float rpn_straddle_thresh = context.Attr("rpn_straddle_thresh"); + float rpn_positive_overlap = context.Attr("rpn_positive_overlap"); + float rpn_negative_overlap = context.Attr("rpn_negative_overlap"); + float rpn_fg_fraction = context.Attr("rpn_fg_fraction"); + bool use_random = context.Attr("use_random"); + + int64_t max_num = batch_num * rpn_batch_size_per_im; auto place = context.GetPlace(); - tgt_bbox_t->mutable_data({max_num, 4}, place); - auto* loc_index = loc_index_t->mutable_data({max_num}, place); - auto* score_index = score_index_t->mutable_data({max_num}, place); + loc_index->mutable_data({max_num}, place); + score_index->mutable_data({max_num}, place); + tgt_bbox->mutable_data({max_num, 4}, place); + tgt_lbl->mutable_data({max_num, 1}, place); - Tensor tmp_tgt_lbl; - auto* tmp_lbl_data = tmp_tgt_lbl.mutable_data({max_num}, place); auto& dev_ctx = context.device_context(); - math::SetConstant iset; - iset(dev_ctx, &tmp_tgt_lbl, static_cast(-1)); std::random_device rnd; std::minstd_rand engine; - int seed = - context.Attr("fix_seed") ? context.Attr("seed") : rnd(); + int seed = rnd(); engine.seed(seed); - int fg_num = 0; - int bg_num = 0; + framework::LoD lod_loc, loc_score; + std::vector lod0_loc(1, 0); + std::vector lod0_score(1, 0); + + int total_loc_num = 0; + int total_score_num = 0; + auto gt_boxes_lod = gt_boxes->lod().back(); + auto is_crowd_lod = is_crowd->lod().back(); for (int i = 0; i < batch_num; ++i) { - Tensor dist = dist_t->Slice(lod[i], lod[i + 1]); - Tensor gt_bbox = gt_bbox_t->Slice(lod[i], lod[i + 1]); - auto fg_bg_gt = SampleFgBgGt(dev_ctx, dist, pos_threshold, neg_threshold, - rpn_batch_size, fg_num_per_batch, engine, - tmp_lbl_data + i * anchor_num); - - int cur_fg_num = fg_bg_gt[0].size(); - int cur_bg_num = fg_bg_gt[1].size(); - std::transform(fg_bg_gt[0].begin(), fg_bg_gt[0].end(), loc_index, - [i, anchor_num](int d) { return d + i * anchor_num; }); - memcpy(score_index, loc_index, cur_fg_num * sizeof(int)); - std::transform(fg_bg_gt[1].begin(), fg_bg_gt[1].end(), - score_index + cur_fg_num, - [i, anchor_num](int d) { return d + i * anchor_num; }); + Tensor gt_boxes_slice = + gt_boxes->Slice(gt_boxes_lod[i], gt_boxes_lod[i + 1]); + Tensor is_crowd_slice = + is_crowd->Slice(is_crowd_lod[i], is_crowd_lod[i + 1]); + Tensor im_info_slice = im_info->Slice(i, i + 1); + auto* im_info_data = im_info_slice.data(); + auto im_height = im_info_data[0]; + auto im_width = im_info_data[1]; + auto im_scale = im_info_data[2]; + + // Filter straddle anchor + std::vector filter_output = FilterStraddleAnchor( + dev_ctx, anchor, rpn_straddle_thresh, im_height, im_width); + Tensor inds_inside = filter_output[0]; + Tensor inside_anchor = filter_output[1]; + + // Filter crowd gt + Tensor ncrowd_gt_boxes = + FilterCrowdGt(dev_ctx, >_boxes_slice, &is_crowd_slice); + auto ncrowd_gt_boxes_et = + framework::EigenTensor::From(ncrowd_gt_boxes); + ncrowd_gt_boxes_et = ncrowd_gt_boxes_et * im_scale; + + Tensor anchor_by_gt_overlap; + anchor_by_gt_overlap.mutable_data( + {inside_anchor.dims()[0], ncrowd_gt_boxes.dims()[0]}, place); + BboxOverlaps(inside_anchor, ncrowd_gt_boxes, &anchor_by_gt_overlap); + + auto loc_score_tgtlbl_gt = SampleRpnFgBgGt( + dev_ctx, anchor_by_gt_overlap, rpn_batch_size_per_im, + rpn_positive_overlap, rpn_negative_overlap, rpn_fg_fraction, engine, + use_random); + + Tensor sampled_loc_index = loc_score_tgtlbl_gt[0]; + Tensor sampled_score_index = loc_score_tgtlbl_gt[1]; + Tensor sampled_tgtlbl = loc_score_tgtlbl_gt[2]; + Tensor sampled_gt_index = loc_score_tgtlbl_gt[3]; + + int loc_num = sampled_loc_index.dims()[0]; + int score_num = sampled_score_index.dims()[0]; + // unmap to all anchor + Tensor sampled_loc_index_unmap, sampled_score_index_unmap; + sampled_loc_index_unmap.mutable_data({loc_num}, place); + sampled_score_index_unmap.mutable_data({score_num}, place); + Gather(inds_inside.data(), 1, sampled_loc_index.data(), + loc_num, sampled_loc_index_unmap.data()); + Gather(inds_inside.data(), 1, sampled_score_index.data(), + score_num, sampled_score_index_unmap.data()); // get target bbox deltas - if (cur_fg_num) { - Tensor fg_gt; - T* gt_data = fg_gt.mutable_data({cur_fg_num, 4}, place); - Tensor tgt_bbox = tgt_bbox_t->Slice(fg_num, fg_num + cur_fg_num); - T* tgt_data = tgt_bbox.data(); - Gather(anchor_t->data(), 4, - reinterpret_cast(&fg_bg_gt[0][0]), cur_fg_num, - tgt_data); - Gather(gt_bbox.data(), 4, reinterpret_cast(&fg_bg_gt[2][0]), - cur_fg_num, gt_data); - BoxToDelta(cur_fg_num, tgt_bbox, fg_gt, nullptr, false, &tgt_bbox); - } - - loc_index += cur_fg_num; - score_index += cur_fg_num + cur_bg_num; - fg_num += cur_fg_num; - bg_num += cur_bg_num; - } - - int lbl_num = fg_num + bg_num; - PADDLE_ENFORCE_LE(fg_num, max_num); - PADDLE_ENFORCE_LE(lbl_num, max_num); - - tgt_bbox_t->Resize({fg_num, 4}); - loc_index_t->Resize({fg_num}); - score_index_t->Resize({lbl_num}); - auto* lbl_data = tgt_lbl_t->mutable_data({lbl_num, 1}, place); - Gather(tmp_lbl_data, 1, score_index_t->data(), lbl_num, - lbl_data); - } - - private: - void ScoreAssign(const T* dist_data, const Tensor& anchor_to_gt_max, - const int row, const int col, const float pos_threshold, - const float neg_threshold, int64_t* target_label, - std::vector* fg_inds, std::vector* bg_inds) const { - float epsilon = 0.0001; - for (int64_t i = 0; i < row; ++i) { - const T* v = dist_data + i * col; - T max = *std::max_element(v, v + col); - for (int64_t j = 0; j < col; ++j) { - if (std::abs(max - v[j]) < epsilon) { - target_label[j] = 1; - } - } - } - - // Pick the fg/bg - const T* anchor_to_gt_max_data = anchor_to_gt_max.data(); - for (int64_t j = 0; j < col; ++j) { - if (anchor_to_gt_max_data[j] >= pos_threshold) { - target_label[j] = 1; - } else if (anchor_to_gt_max_data[j] < neg_threshold) { - target_label[j] = 0; - } - if (target_label[j] == 1) { - fg_inds->push_back(j); - } else if (target_label[j] == 0) { - bg_inds->push_back(j); - } + Tensor sampled_anchor, sampled_gt, sampled_tgt_bbox; + auto* sampled_anchor_data = + sampled_anchor.mutable_data({loc_num, 4}, place); + auto* sampled_gt_data = sampled_gt.mutable_data({loc_num, 4}, place); + Gather(anchor->data(), 4, sampled_loc_index_unmap.data(), + loc_num, sampled_anchor_data); + Gather(ncrowd_gt_boxes.data(), 4, sampled_gt_index.data(), + loc_num, sampled_gt_data); + sampled_tgt_bbox.mutable_data({loc_num, 4}, place); + BoxToDelta(loc_num, sampled_anchor, sampled_gt, nullptr, false, + &sampled_tgt_bbox); + + // Add anchor offset + int anchor_offset = i * anchor_num; + auto sampled_loc_index_unmap_et = + framework::EigenTensor::From(sampled_loc_index_unmap); + sampled_loc_index_unmap_et = sampled_loc_index_unmap_et + anchor_offset; + auto sampled_score_index_unmap_et = + framework::EigenTensor::From(sampled_score_index_unmap); + sampled_score_index_unmap_et = + sampled_score_index_unmap_et + anchor_offset; + AppendRpns(loc_index, total_loc_num, &sampled_loc_index_unmap); + AppendRpns(score_index, total_score_num, &sampled_score_index_unmap); + AppendRpns(tgt_bbox, total_loc_num * 4, &sampled_tgt_bbox); + AppendRpns(tgt_lbl, total_score_num, &sampled_tgtlbl); + total_loc_num += loc_num; + + total_score_num += score_num; + lod0_loc.emplace_back(total_loc_num); + lod0_score.emplace_back(total_score_num); } - } - - void ReservoirSampling(const int num, std::minstd_rand engine, - std::vector* inds) const { - std::uniform_real_distribution uniform(0, 1); - size_t len = inds->size(); - if (len > static_cast(num)) { - for (size_t i = num; i < len; ++i) { - int rng_ind = std::floor(uniform(engine) * i); - if (rng_ind < num) - std::iter_swap(inds->begin() + rng_ind, inds->begin() + i); - } - inds->resize(num); - } - } - // std::vector> RpnTargetAssign( - std::vector> SampleFgBgGt( - const platform::CPUDeviceContext& ctx, const Tensor& dist, - const float pos_threshold, const float neg_threshold, - const int rpn_batch_size, const int fg_num, std::minstd_rand engine, - int64_t* target_label) const { - auto* dist_data = dist.data(); - int row = dist.dims()[0]; - int col = dist.dims()[1]; - - std::vector fg_inds; - std::vector bg_inds; - std::vector gt_inds; - - // Calculate the max IoU between anchors and gt boxes - // Map from anchor to gt box that has highest overlap - auto place = ctx.GetPlace(); - Tensor anchor_to_gt_max, anchor_to_gt_argmax; - anchor_to_gt_max.mutable_data({col}, place); - int* argmax = anchor_to_gt_argmax.mutable_data({col}, place); - - auto x = framework::EigenMatrix::From(dist); - auto x_col_max = framework::EigenVector::Flatten(anchor_to_gt_max); - auto x_col_argmax = - framework::EigenVector::Flatten(anchor_to_gt_argmax); - x_col_max = x.maximum(Eigen::DSizes(0)); - x_col_argmax = x.argmax(0).template cast(); - - // Follow the Faster RCNN's implementation - ScoreAssign(dist_data, anchor_to_gt_max, row, col, pos_threshold, - neg_threshold, target_label, &fg_inds, &bg_inds); - // Reservoir Sampling - ReservoirSampling(fg_num, engine, &fg_inds); - int fg_num2 = static_cast(fg_inds.size()); - int bg_num = rpn_batch_size - fg_num2; - ReservoirSampling(bg_num, engine, &bg_inds); - - gt_inds.reserve(fg_num2); - for (int i = 0; i < fg_num2; ++i) { - gt_inds.emplace_back(argmax[fg_inds[i]]); - } - std::vector> fg_bg_gt; - fg_bg_gt.emplace_back(fg_inds); - fg_bg_gt.emplace_back(bg_inds); - fg_bg_gt.emplace_back(gt_inds); - - return fg_bg_gt; + PADDLE_ENFORCE_LE(total_loc_num, max_num); + PADDLE_ENFORCE_LE(total_score_num, max_num); + + lod_loc.emplace_back(lod0_loc); + loc_score.emplace_back(lod0_score); + loc_index->set_lod(lod_loc); + score_index->set_lod(loc_score); + tgt_bbox->set_lod(lod_loc); + tgt_lbl->set_lod(loc_score); + loc_index->Resize({total_loc_num}); + score_index->Resize({total_score_num}); + tgt_bbox->Resize({total_loc_num, 4}); + tgt_lbl->Resize({total_score_num, 1}); } }; @@ -259,18 +460,22 @@ class RpnTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker { void Make() override { AddInput("Anchor", "(Tensor) input anchor is a 2-D Tensor with shape [H*W*A, 4]."); - AddInput("GtBox", "(LoDTensor) input groud-truth bbox with shape [K, 4]."); - AddInput( - "DistMat", - "(LoDTensor or Tensor) this input is a 2-D LoDTensor with shape " - "[K, M]. It is pair-wise distance matrix between the entities " - "represented by each row and each column. For example, assumed one " - "entity is A with shape [K], another entity is B with shape [M]. The " - "DistMat[i][j] is the distance between A[i] and B[j]. The bigger " - "the distance is, the better macthing the pairs are. Please note, " - "This tensor can contain LoD information to represent a batch of " - "inputs. One instance of this batch can contain different numbers of " - "entities."); + AddInput("GtBoxes", + "(LoDTensor) input groud-truth bbox with shape [K, 4]."); + AddInput("IsCrowd", + "(LoDTensor) input which indicates groud-truth is crowd."); + AddInput("ImInfo", + "(LoDTensor) input image information with shape [N, 3]. " + "N is the batch size, each image information includes height, " + "width and scale."); + AddAttr("rpn_batch_size_per_im", + "Total number of RPN examples per image.") + .SetDefault(256); + AddAttr( + "rpn_straddle_thresh", + "Remove RPN anchors that go outside the image by straddle_thresh " + "pixels, " + "Set to -1 or a large value, e.g. 100000, to disable pruning anchors."); AddAttr( "rpn_positive_overlap", "Minimum overlap required between an anchor and ground-truth " @@ -282,20 +487,15 @@ class RpnTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker { "box for the (anchor, gt box) pair to be a negative examples.") .SetDefault(0.3); AddAttr( - "fg_fraction", + "rpn_fg_fraction", "Target fraction of RoI minibatch that " "is labeled foreground (i.e. class > 0), 0-th class is background.") .SetDefault(0.25); - AddAttr("rpn_batch_size_per_im", - "Total number of RPN examples per image.") - .SetDefault(256); - AddAttr("fix_seed", - "A flag indicating whether to use a fixed seed to generate " - "random mask. NOTE: DO NOT set this flag to true in " - "training. Setting this flag to true is only useful in " - "unittest.") - .SetDefault(false); - AddAttr("seed", "RpnTargetAssign random seed.").SetDefault(0); + AddAttr("use_random", + "A flag indicating whether to use a ReservoirSampling. " + "NOTE: DO NOT set this flag to false in training. " + "Setting this flag to false is only useful in unittest.") + .SetDefault(true); AddOutput( "LocationIndex", "(Tensor), The indexes of foreground anchors in all RPN anchors, the " @@ -308,16 +508,16 @@ class RpnTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker { "ScoreIndex is [F + B], F and B are sampled foreground and backgroud " " number."); AddOutput("TargetBBox", - "(Tensor), The target bbox deltas with shape " + "(Tensor), The target bbox deltas with shape " "[F, 4], F is the sampled foreground number."); AddOutput( "TargetLabel", - "(Tensor), The target labels of each anchor with shape " + "(Tensor), The target labels of each anchor with shape " "[F + B, 1], F and B are sampled foreground and backgroud number."); AddComment(R"DOC( -This operator can be, for given the IoU between the ground truth bboxes and the +This operator can be, for a given set of ground truth bboxes and the anchors, to assign classification and regression targets to each prediction. -The Score index and LocationIndex will be generated according to the DistMat. +The ScoreIndex and LocationIndex will be generated according to the anchor-groundtruth IOU. The rest anchors would not contibute to the RPN training loss ScoreIndex is composed of foreground anchor indexes(positive labels) and diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index 1bc1dbbeca..1c73c837e2 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -55,15 +55,19 @@ for _OP in set(__auto__): globals()[_OP] = generate_layer_fn(_OP) -def rpn_target_assign(loc, - scores, +def rpn_target_assign(bbox_pred, + cls_logits, anchor_box, anchor_var, - gt_box, + gt_boxes, + is_crowd, + im_info, rpn_batch_size_per_im=256, - fg_fraction=0.25, + rpn_straddle_thresh=0.0, + rpn_fg_fraction=0.5, rpn_positive_overlap=0.7, - rpn_negative_overlap=0.3): + rpn_negative_overlap=0.3, + use_random=True): """ ** Target Assign Layer for region proposal network (RPN) in Faster-RCNN detection. ** @@ -83,14 +87,13 @@ def rpn_target_assign(loc, the positive anchors. Args: - loc(Variable): A 3-D Tensor with shape [N, M, 4] represents the + bbox_pred(Variable): A 3-D Tensor with shape [N, M, 4] represents the predicted locations of M bounding bboxes. N is the batch size, and each bounding box has four coordinate values and the layout is [xmin, ymin, xmax, ymax]. - scores(Variable): A 3-D Tensor with shape [N, M, C] represents the - predicted confidence predictions. N is the batch size, C is the - class number, M is number of bounding boxes. For each category - there are total M scores which corresponding M bounding boxes. + cls_logits(Variable): A 3-D Tensor with shape [N, M, 1] represents the + predicted confidence predictions. N is the batch size, 1 is the + frontground and background sigmoid, M is number of bounding boxes. anchor_box(Variable): A 2-D Tensor with shape [M, 4] holds M boxes, each box is represented as [xmin, ymin, xmax, ymax], [xmin, ymin] is the left top coordinate of the anchor box, @@ -99,11 +102,16 @@ def rpn_target_assign(loc, coordinate of the anchor box. anchor_var(Variable): A 2-D Tensor with shape [M,4] holds expanded variances of anchors. - gt_box (Variable): The ground-truth boudding boxes (bboxes) are a 2D + gt_boxes (Variable): The ground-truth boudding boxes (bboxes) are a 2D LoDTensor with shape [Ng, 4], Ng is the total number of ground-truth bboxes of mini-batch input. + is_crowd (Variable): A 1-D LoDTensor which indicates groud-truth is crowd. + im_info (Variable): A 2-D LoDTensor with shape [N, 3]. N is the batch size, + 3 is the height, width and scale. rpn_batch_size_per_im(int): Total number of RPN examples per image. - fg_fraction(float): Target fraction of RoI minibatch that is labeled + rpn_straddle_thresh(float): Remove RPN anchors that go outside the image + by straddle_thresh pixels. + rpn_fg_fraction(float): Target fraction of RoI minibatch that is labeled foreground (i.e. class > 0), 0-th class is background. rpn_positive_overlap(float): Minimum overlap required between an anchor and ground-truth box for the (anchor, gt box) pair to be a positive @@ -129,45 +137,48 @@ def rpn_target_assign(loc, Examples: .. code-block:: python - loc = layers.data(name='location', shape=[2, 80], + bbox_pred = layers.data(name='bbox_pred', shape=[100, 4], append_batch_size=False, dtype='float32') - scores = layers.data(name='scores', shape=[2, 40], + cls_logits = layers.data(name='cls_logits', shape=[100, 1], append_batch_size=False, dtype='float32') anchor_box = layers.data(name='anchor_box', shape=[20, 4], append_batch_size=False, dtype='float32') - gt_box = layers.data(name='gt_box', shape=[10, 4], + gt_boxes = layers.data(name='gt_boxes', shape=[10, 4], append_batch_size=False, dtype='float32') loc_pred, score_pred, loc_target, score_target = - fluid.layers.detection_output(loc=location, - scores=scores, + fluid.layers.rpn_target_assign(bbox_pred=bbox_pred, + cls_logits=cls_logits, anchor_box=anchor_box, - gt_box=gt_box) + gt_boxes=gt_boxes) """ helper = LayerHelper('rpn_target_assign', **locals()) - # Compute overlaps between the prior boxes and the gt boxes overlaps - iou = iou_similarity(x=gt_box, y=anchor_box) # Assign target label to anchors loc_index = helper.create_tmp_variable(dtype='int32') score_index = helper.create_tmp_variable(dtype='int32') - target_label = helper.create_tmp_variable(dtype='int64') + target_label = helper.create_tmp_variable(dtype='int32') target_bbox = helper.create_tmp_variable(dtype=anchor_box.dtype) helper.append_op( type="rpn_target_assign", - inputs={'Anchor': anchor_box, - 'GtBox': gt_box, - 'DistMat': iou}, + inputs={ + 'Anchor': anchor_box, + 'GtBoxes': gt_boxes, + 'IsCrowd': is_crowd, + 'ImInfo': im_info + }, outputs={ 'LocationIndex': loc_index, 'ScoreIndex': score_index, 'TargetLabel': target_label, - 'TargetBBox': target_bbox, + 'TargetBBox': target_bbox }, attrs={ 'rpn_batch_size_per_im': rpn_batch_size_per_im, + 'rpn_straddle_thresh': rpn_straddle_thresh, 'rpn_positive_overlap': rpn_positive_overlap, 'rpn_negative_overlap': rpn_negative_overlap, - 'fg_fraction': fg_fraction + 'rpn_fg_fraction': rpn_fg_fraction, + 'use_random': use_random }) loc_index.stop_gradient = True @@ -175,12 +186,12 @@ def rpn_target_assign(loc, target_label.stop_gradient = True target_bbox.stop_gradient = True - scores = nn.reshape(x=scores, shape=(-1, 1)) - loc = nn.reshape(x=loc, shape=(-1, 4)) - predicted_scores = nn.gather(scores, score_index) - predicted_location = nn.gather(loc, loc_index) + cls_logits = nn.reshape(x=cls_logits, shape=(-1, 1)) + bbox_pred = nn.reshape(x=bbox_pred, shape=(-1, 4)) + predicted_cls_logits = nn.gather(cls_logits, score_index) + predicted_bbox_pred = nn.gather(bbox_pred, loc_index) - return predicted_scores, predicted_location, target_label, target_bbox + return predicted_cls_logits, predicted_bbox_pred, target_label, target_bbox def detection_output(loc, @@ -1258,15 +1269,17 @@ def anchor_generator(input, def generate_proposal_labels(rpn_rois, gt_classes, + is_crowd, gt_boxes, - im_scales, + im_info, batch_size_per_im=256, fg_fraction=0.25, fg_thresh=0.25, bg_thresh_hi=0.5, bg_thresh_lo=0.0, bbox_reg_weights=[0.1, 0.1, 0.2, 0.2], - class_nums=None): + class_nums=None, + use_random=True): """ ** Generate proposal labels Faster-RCNN ** TODO(buxingyuan): Add Document @@ -1285,8 +1298,9 @@ def generate_proposal_labels(rpn_rois, inputs={ 'RpnRois': rpn_rois, 'GtClasses': gt_classes, + 'IsCrowd': is_crowd, 'GtBoxes': gt_boxes, - 'ImScales': im_scales + 'ImInfo': im_info }, outputs={ 'Rois': rois, @@ -1302,7 +1316,8 @@ def generate_proposal_labels(rpn_rois, 'bg_thresh_hi': bg_thresh_hi, 'bg_thresh_lo': bg_thresh_lo, 'bbox_reg_weights': bbox_reg_weights, - 'class_nums': class_nums + 'class_nums': class_nums, + 'use_random': use_random }) rois.stop_gradient = True diff --git a/python/paddle/fluid/tests/test_detection.py b/python/paddle/fluid/tests/test_detection.py index e2564763d1..56129641ce 100644 --- a/python/paddle/fluid/tests/test_detection.py +++ b/python/paddle/fluid/tests/test_detection.py @@ -148,51 +148,60 @@ class TestAnchorGenerator(unittest.TestCase): class TestGenerateProposalLabels(unittest.TestCase): def test_generate_proposal_labels(self): - rpn_rois = layers.data( - name='rpn_rois', - shape=[4, 4], - dtype='float32', - lod_level=1, - append_batch_size=False) - gt_classes = layers.data( - name='gt_classes', - shape=[6], - dtype='int32', - lod_level=1, - append_batch_size=False) - gt_boxes = layers.data( - name='gt_boxes', - shape=[6, 4], - dtype='float32', - lod_level=1, - append_batch_size=False) - im_scales = layers.data( - name='im_scales', - shape=[1], - dtype='float32', - lod_level=1, - append_batch_size=False) - class_nums = 5 - rois, labels_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights = fluid.layers.generate_proposal_labels( - rpn_rois=rpn_rois, - gt_classes=gt_classes, - gt_boxes=gt_boxes, - im_scales=im_scales, - batch_size_per_im=2, - fg_fraction=0.5, - fg_thresh=0.5, - bg_thresh_hi=0.5, - bg_thresh_lo=0.0, - bbox_reg_weights=[0.1, 0.1, 0.2, 0.2], - class_nums=class_nums) - assert rois.shape[1] == 4 - assert rois.shape[0] == labels_int32.shape[0] - assert rois.shape[0] == bbox_targets.shape[0] - assert rois.shape[0] == bbox_inside_weights.shape[0] - assert rois.shape[0] == bbox_outside_weights.shape[0] - assert bbox_targets.shape[1] == 4 * class_nums - assert bbox_inside_weights.shape[1] == 4 * class_nums - assert bbox_outside_weights.shape[1] == 4 * class_nums + program = Program() + with program_guard(program): + rpn_rois = layers.data( + name='rpn_rois', + shape=[4, 4], + dtype='float32', + lod_level=1, + append_batch_size=False) + gt_classes = layers.data( + name='gt_classes', + shape=[6], + dtype='int32', + lod_level=1, + append_batch_size=False) + is_crowd = layers.data( + name='is_crowd', + shape=[6], + dtype='int32', + lod_level=1, + append_batch_size=False) + gt_boxes = layers.data( + name='gt_boxes', + shape=[6, 4], + dtype='float32', + lod_level=1, + append_batch_size=False) + im_info = layers.data( + name='im_info', + shape=[1, 3], + dtype='float32', + lod_level=1, + append_batch_size=False) + class_nums = 5 + rois, labels_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights = fluid.layers.generate_proposal_labels( + rpn_rois=rpn_rois, + gt_classes=gt_classes, + is_crowd=is_crowd, + gt_boxes=gt_boxes, + im_info=im_info, + batch_size_per_im=2, + fg_fraction=0.5, + fg_thresh=0.5, + bg_thresh_hi=0.5, + bg_thresh_lo=0.0, + bbox_reg_weights=[0.1, 0.1, 0.2, 0.2], + class_nums=class_nums) + assert rois.shape[1] == 4 + assert rois.shape[0] == labels_int32.shape[0] + assert rois.shape[0] == bbox_targets.shape[0] + assert rois.shape[0] == bbox_inside_weights.shape[0] + assert rois.shape[0] == bbox_outside_weights.shape[0] + assert bbox_targets.shape[1] == 4 * class_nums + assert bbox_inside_weights.shape[1] == 4 * class_nums + assert bbox_outside_weights.shape[1] == 4 * class_nums class TestMultiBoxHead(unittest.TestCase): @@ -254,18 +263,18 @@ class TestRpnTargetAssign(unittest.TestCase): def test_rpn_target_assign(self): program = Program() with program_guard(program): - loc_shape = [10, 50, 4] - score_shape = [10, 50, 2] + bbox_pred_shape = [10, 50, 4] + cls_logits_shape = [10, 50, 2] anchor_shape = [50, 4] - loc = layers.data( - name='loc', - shape=loc_shape, + bbox_pred = layers.data( + name='bbox_pred', + shape=bbox_pred_shape, append_batch_size=False, dtype='float32') - scores = layers.data( - name='scores', - shape=score_shape, + cls_logits = layers.data( + name='cls_logits', + shape=cls_logits_shape, append_batch_size=False, dtype='float32') anchor_box = layers.data( @@ -278,17 +287,31 @@ class TestRpnTargetAssign(unittest.TestCase): shape=anchor_shape, append_batch_size=False, dtype='float32') - gt_box = layers.data( - name='gt_box', shape=[4], lod_level=1, dtype='float32') - + gt_boxes = layers.data( + name='gt_boxes', shape=[4], lod_level=1, dtype='float32') + is_crowd = layers.data( + name='is_crowd', + shape=[10], + dtype='int32', + lod_level=1, + append_batch_size=False) + im_info = layers.data( + name='im_info', + shape=[1, 3], + dtype='float32', + lod_level=1, + append_batch_size=False) pred_scores, pred_loc, tgt_lbl, tgt_bbox = layers.rpn_target_assign( - loc=loc, - scores=scores, + bbox_pred=bbox_pred, + cls_logits=cls_logits, anchor_box=anchor_box, anchor_var=anchor_var, - gt_box=gt_box, + gt_boxes=gt_boxes, + is_crowd=is_crowd, + im_info=im_info, rpn_batch_size_per_im=256, - fg_fraction=0.25, + rpn_straddle_thresh=0.0, + rpn_fg_fraction=0.5, rpn_positive_overlap=0.7, rpn_negative_overlap=0.3) diff --git a/python/paddle/fluid/tests/unittests/test_generate_proposal_labels.py b/python/paddle/fluid/tests/unittests/test_generate_proposal_labels_op.py similarity index 77% rename from python/paddle/fluid/tests/unittests/test_generate_proposal_labels.py rename to python/paddle/fluid/tests/unittests/test_generate_proposal_labels_op.py index 6dc101b6da..2d5cd3b24b 100644 --- a/python/paddle/fluid/tests/unittests/test_generate_proposal_labels.py +++ b/python/paddle/fluid/tests/unittests/test_generate_proposal_labels_op.py @@ -20,10 +20,10 @@ import paddle.fluid as fluid from op_test import OpTest -def generate_proposal_labels_in_python( - rpn_rois, gt_classes, gt_boxes, im_scales, batch_size_per_im, - fg_fraction, fg_thresh, bg_thresh_hi, bg_thresh_lo, bbox_reg_weights, - class_nums): +def generate_proposal_labels_in_python(rpn_rois, gt_classes, is_crowd, gt_boxes, + im_info, batch_size_per_im, fg_fraction, + fg_thresh, bg_thresh_hi, bg_thresh_lo, + bbox_reg_weights, class_nums): rois = [] labels_int32 = [] bbox_targets = [] @@ -31,13 +31,13 @@ def generate_proposal_labels_in_python( bbox_outside_weights = [] lod = [] assert len(rpn_rois) == len( - im_scales), 'batch size of rpn_rois and ground_truth is not matched' + im_info), 'batch size of rpn_rois and ground_truth is not matched' - for im_i in range(len(im_scales)): + for im_i in range(len(im_info)): frcn_blobs = _sample_rois( - rpn_rois[im_i], gt_classes[im_i], gt_boxes[im_i], im_scales[im_i], - batch_size_per_im, fg_fraction, fg_thresh, bg_thresh_hi, - bg_thresh_lo, bbox_reg_weights, class_nums) + rpn_rois[im_i], gt_classes[im_i], is_crowd[im_i], gt_boxes[im_i], + im_info[im_i], batch_size_per_im, fg_fraction, fg_thresh, + bg_thresh_hi, bg_thresh_lo, bbox_reg_weights, class_nums) lod.append(frcn_blobs['rois'].shape[0]) @@ -50,13 +50,14 @@ def generate_proposal_labels_in_python( return rois, labels_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights, lod -def _sample_rois(rpn_rois, gt_classes, gt_boxes, im_scale, batch_size_per_im, - fg_fraction, fg_thresh, bg_thresh_hi, bg_thresh_lo, - bbox_reg_weights, class_nums): +def _sample_rois(rpn_rois, gt_classes, is_crowd, gt_boxes, im_info, + batch_size_per_im, fg_fraction, fg_thresh, bg_thresh_hi, + bg_thresh_lo, bbox_reg_weights, class_nums): rois_per_image = int(batch_size_per_im) fg_rois_per_im = int(np.round(fg_fraction * rois_per_image)) # Roidb + im_scale = im_info[2] inv_im_scale = 1. / im_scale rpn_rois = rpn_rois * inv_im_scale @@ -78,6 +79,9 @@ def _sample_rois(rpn_rois, gt_classes, gt_boxes, im_scale, batch_size_per_im, box_to_gt_ind_map[overlapped_boxes_ind] = overlaps_argmax[ overlapped_boxes_ind] + crowd_ind = np.where(is_crowd)[0] + gt_overlaps[crowd_ind] = -1 + max_overlaps = gt_overlaps.max(axis=1) max_classes = gt_overlaps.argmax(axis=1) @@ -85,9 +89,10 @@ def _sample_rois(rpn_rois, gt_classes, gt_boxes, im_scale, batch_size_per_im, fg_inds = np.where(max_overlaps >= fg_thresh)[0] fg_rois_per_this_image = np.minimum(fg_rois_per_im, fg_inds.shape[0]) # Sample foreground if there are too many - if fg_inds.shape[0] > fg_rois_per_this_image: - fg_inds = np.random.choice( - fg_inds, size=fg_rois_per_this_image, replace=False) + # if fg_inds.shape[0] > fg_rois_per_this_image: + # fg_inds = np.random.choice( + # fg_inds, size=fg_rois_per_this_image, replace=False) + fg_inds = fg_inds[:fg_rois_per_this_image] # Background bg_inds = np.where((max_overlaps < bg_thresh_hi) & (max_overlaps >= @@ -96,9 +101,10 @@ def _sample_rois(rpn_rois, gt_classes, gt_boxes, im_scale, batch_size_per_im, bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_inds.shape[0]) # Sample background if there are too many - if bg_inds.shape[0] > bg_rois_per_this_image: - bg_inds = np.random.choice( - bg_inds, size=bg_rois_per_this_image, replace=False) + # if bg_inds.shape[0] > bg_rois_per_this_image: + # bg_inds = np.random.choice( + # bg_inds, size=bg_rois_per_this_image, replace=False) + bg_inds = bg_inds[:bg_rois_per_this_image] keep_inds = np.append(fg_inds, bg_inds) sampled_labels = max_classes[keep_inds] @@ -208,8 +214,9 @@ class TestGenerateProposalLabelsOp(OpTest): self.inputs = { 'RpnRois': (self.rpn_rois[0], self.rpn_rois_lod), 'GtClasses': (self.gt_classes[0], self.gts_lod), + 'IsCrowd': (self.is_crowd[0], self.gts_lod), 'GtBoxes': (self.gt_boxes[0], self.gts_lod), - 'ImScales': self.im_scales[0] + 'ImInfo': self.im_info } self.attrs = { 'batch_size_per_im': self.batch_size_per_im, @@ -218,14 +225,15 @@ class TestGenerateProposalLabelsOp(OpTest): 'bg_thresh_hi': self.bg_thresh_hi, 'bg_thresh_lo': self.bg_thresh_lo, 'bbox_reg_weights': self.bbox_reg_weights, - 'class_nums': self.class_nums + 'class_nums': self.class_nums, + 'use_random': False } self.outputs = { - 'Rois': (self.rois[0], [self.lod]), - 'LabelsInt32': (self.labels_int32[0], [self.lod]), - 'BboxTargets': (self.bbox_targets[0], [self.lod]), - 'BboxInsideWeights': (self.bbox_inside_weights[0], [self.lod]), - 'BboxOutsideWeights': (self.bbox_outside_weights[0], [self.lod]), + 'Rois': (self.rois, [self.lod]), + 'LabelsInt32': (self.labels_int32, [self.lod]), + 'BboxTargets': (self.bbox_targets, [self.lod]), + 'BboxInsideWeights': (self.bbox_inside_weights, [self.lod]), + 'BboxOutsideWeights': (self.bbox_outside_weights, [self.lod]), } def test_check_output(self): @@ -236,8 +244,8 @@ class TestGenerateProposalLabelsOp(OpTest): self.set_data() def init_test_params(self): - self.batch_size_per_im = 10 - self.fg_fraction = 1.0 + self.batch_size_per_im = 512 + self.fg_fraction = 0.25 self.fg_thresh = 0.5 self.bg_thresh_hi = 0.5 self.bg_thresh_lo = 0.0 @@ -246,14 +254,14 @@ class TestGenerateProposalLabelsOp(OpTest): def init_test_input(self): np.random.seed(0) - image_nums = 1 gt_nums = 6 # Keep same with batch_size_per_im for unittest - proposal_nums = self.batch_size_per_im - gt_nums - images_shape = [] - self.im_scales = [] - for i in range(image_nums): - images_shape.append(np.random.randint(200, size=2)) - self.im_scales.append(np.ones((1)).astype(np.float32)) + proposal_nums = 2000 #self.batch_size_per_im - gt_nums + images_shape = [[64, 64]] + self.im_info = np.ones((len(images_shape), 3)).astype(np.float32) + for i in range(len(images_shape)): + self.im_info[i, 0] = images_shape[i][0] + self.im_info[i, 1] = images_shape[i][1] + self.im_info[i, 2] = 0.8 #scale self.rpn_rois, self.rpn_rois_lod = _generate_proposals(images_shape, proposal_nums) @@ -261,16 +269,23 @@ class TestGenerateProposalLabelsOp(OpTest): images_shape, self.class_nums, gt_nums) self.gt_classes = [gt['gt_classes'] for gt in ground_truth] self.gt_boxes = [gt['boxes'] for gt in ground_truth] + self.is_crowd = [gt['is_crowd'] for gt in ground_truth] def init_test_output(self): self.rois, self.labels_int32, self.bbox_targets, \ self.bbox_inside_weights, self.bbox_outside_weights, \ self.lod = generate_proposal_labels_in_python( - self.rpn_rois, self.gt_classes, self.gt_boxes, self.im_scales, + self.rpn_rois, self.gt_classes, self.is_crowd, self.gt_boxes, self.im_info, self.batch_size_per_im, self.fg_fraction, self.fg_thresh, self.bg_thresh_hi, self.bg_thresh_lo, self.bbox_reg_weights, self.class_nums ) + self.rois = np.vstack(self.rois) + self.labels_int32 = np.hstack(self.labels_int32) + self.labels_int32 = self.labels_int32[:, np.newaxis] + self.bbox_targets = np.vstack(self.bbox_targets) + self.bbox_inside_weights = np.vstack(self.bbox_inside_weights) + self.bbox_outside_weights = np.vstack(self.bbox_outside_weights) def _generate_proposals(images_shape, proposal_nums): @@ -280,7 +295,7 @@ def _generate_proposals(images_shape, proposal_nums): for i, image_shape in enumerate(images_shape): proposals = _generate_boxes(image_shape, proposal_nums) rpn_rois.append(proposals) - num_proposals += len(proposals) + num_proposals = len(proposals) rpn_rois_lod.append(num_proposals) return rpn_rois, [rpn_rois_lod] @@ -294,7 +309,11 @@ def _generate_groundtruth(images_shape, class_nums, gt_nums): gt_classes = np.random.randint( low=1, high=class_nums, size=gt_nums).astype(np.int32) gt_boxes = _generate_boxes(image_shape, gt_nums) - ground_truth.append(dict(gt_classes=gt_classes, boxes=gt_boxes)) + is_crowd = np.zeros((gt_nums), dtype=np.int32) + is_crowd[0] = 1 + ground_truth.append( + dict( + gt_classes=gt_classes, boxes=gt_boxes, is_crowd=is_crowd)) num_gts += len(gt_classes) gts_lod.append(num_gts) return ground_truth, [gts_lod] diff --git a/python/paddle/fluid/tests/unittests/test_generate_proposals.py b/python/paddle/fluid/tests/unittests/test_generate_proposals_op.py similarity index 88% rename from python/paddle/fluid/tests/unittests/test_generate_proposals.py rename to python/paddle/fluid/tests/unittests/test_generate_proposals_op.py index 3fbd2ce95a..86e27fe29e 100644 --- a/python/paddle/fluid/tests/unittests/test_generate_proposals.py +++ b/python/paddle/fluid/tests/unittests/test_generate_proposals_op.py @@ -114,10 +114,10 @@ def box_coder(all_anchors, bbox_deltas, variances): #anchor_loc: width, height, center_x, center_y anchor_loc = np.zeros_like(bbox_deltas, dtype=np.float32) - anchor_loc[:, 0] = all_anchors[:, 2] - all_anchors[:, 0] - anchor_loc[:, 1] = all_anchors[:, 3] - all_anchors[:, 1] - anchor_loc[:, 2] = (all_anchors[:, 2] + all_anchors[:, 0]) / 2 - anchor_loc[:, 3] = (all_anchors[:, 3] + all_anchors[:, 1]) / 2 + anchor_loc[:, 0] = all_anchors[:, 2] - all_anchors[:, 0] + 1 + anchor_loc[:, 1] = all_anchors[:, 3] - all_anchors[:, 1] + 1 + anchor_loc[:, 2] = all_anchors[:, 0] + 0.5 * anchor_loc[:, 0] + anchor_loc[:, 3] = all_anchors[:, 1] + 0.5 * anchor_loc[:, 1] #predicted bbox: bbox_center_x, bbox_center_y, bbox_width, bbox_height pred_bbox = np.zeros_like(bbox_deltas, dtype=np.float32) @@ -127,23 +127,29 @@ def box_coder(all_anchors, bbox_deltas, variances): i, 0] + anchor_loc[i, 2] pred_bbox[i, 1] = variances[i, 1] * bbox_deltas[i, 1] * anchor_loc[ i, 1] + anchor_loc[i, 3] - pred_bbox[i, 2] = math.exp(variances[i, 2] * - bbox_deltas[i, 2]) * anchor_loc[i, 0] - pred_bbox[i, 3] = math.exp(variances[i, 3] * - bbox_deltas[i, 3]) * anchor_loc[i, 1] + pred_bbox[i, 2] = math.exp( + min(variances[i, 2] * bbox_deltas[i, 2], math.log( + 1000 / 16.0))) * anchor_loc[i, 0] + pred_bbox[i, 3] = math.exp( + min(variances[i, 3] * bbox_deltas[i, 3], math.log( + 1000 / 16.0))) * anchor_loc[i, 1] else: for i in range(bbox_deltas.shape[0]): pred_bbox[i, 0] = bbox_deltas[i, 0] * anchor_loc[i, 0] + anchor_loc[ i, 2] pred_bbox[i, 1] = bbox_deltas[i, 1] * anchor_loc[i, 1] + anchor_loc[ i, 3] - pred_bbox[i, 2] = math.exp(bbox_deltas[i, 2]) * anchor_loc[i, 0] - pred_bbox[i, 3] = math.exp(bbox_deltas[i, 3]) * anchor_loc[i, 1] + pred_bbox[i, 2] = math.exp( + min(bbox_deltas[i, 2], math.log(1000 / 16.0))) * anchor_loc[i, + 0] + pred_bbox[i, 3] = math.exp( + min(bbox_deltas[i, 3], math.log(1000 / 16.0))) * anchor_loc[i, + 1] proposals[:, 0] = pred_bbox[:, 0] - pred_bbox[:, 2] / 2 proposals[:, 1] = pred_bbox[:, 1] - pred_bbox[:, 3] / 2 - proposals[:, 2] = pred_bbox[:, 0] + pred_bbox[:, 2] / 2 - proposals[:, 3] = pred_bbox[:, 1] + pred_bbox[:, 3] / 2 + proposals[:, 2] = pred_bbox[:, 0] + pred_bbox[:, 2] / 2 - 1 + proposals[:, 3] = pred_bbox[:, 1] + pred_bbox[:, 3] / 2 - 1 return proposals @@ -170,13 +176,16 @@ def filter_boxes(boxes, min_size, im_info): """Only keep boxes with both sides >= min_size and center within the image. """ # Scale min_size to match image scale - min_size *= im_info[2] + im_scale = im_info[2] + min_size = max(min_size, 1.0) ws = boxes[:, 2] - boxes[:, 0] + 1 hs = boxes[:, 3] - boxes[:, 1] + 1 + ws_orig_scale = (boxes[:, 2] - boxes[:, 0]) / im_scale + 1 + hs_orig_scale = (boxes[:, 3] - boxes[:, 1]) / im_scale + 1 x_ctr = boxes[:, 0] + ws / 2. y_ctr = boxes[:, 1] + hs / 2. - keep = np.where((ws >= min_size) & (hs >= min_size) & (x_ctr < im_info[1]) & - (y_ctr < im_info[0]))[0] + keep = np.where((ws_orig_scale >= min_size) & (hs_orig_scale >= min_size) & + (x_ctr < im_info[1]) & (y_ctr < im_info[0]))[0] return keep @@ -204,7 +213,7 @@ def iou(box_a, box_b): xb = min(xmax_a, xmax_b) yb = min(ymax_a, ymax_b) - inter_area = max(xb - xa, 0.0) * max(yb - ya, 0.0) + inter_area = max(xb - xa + 1, 0.0) * max(yb - ya + 1, 0.0) iou_ratio = inter_area / (area_a + area_b - inter_area) diff --git a/python/paddle/fluid/tests/unittests/test_rpn_target_assign_op.py b/python/paddle/fluid/tests/unittests/test_rpn_target_assign_op.py index bd548009b3..f63dbcd3d7 100644 --- a/python/paddle/fluid/tests/unittests/test_rpn_target_assign_op.py +++ b/python/paddle/fluid/tests/unittests/test_rpn_target_assign_op.py @@ -19,48 +19,58 @@ import numpy as np import paddle.fluid.core as core from op_test import OpTest from test_anchor_generator_op import anchor_generator_in_python -from test_generate_proposal_labels import _generate_groundtruth -from test_generate_proposal_labels import _bbox_overlaps, _box_to_delta - - -def rpn_target_assign(gt_anchor_iou, rpn_batch_size_per_im, - rpn_positive_overlap, rpn_negative_overlap, fg_fraction): - iou = np.transpose(gt_anchor_iou) - anchor_to_gt_max = iou.max(axis=1) - anchor_to_gt_argmax = iou.argmax(axis=1) - - gt_to_anchor_argmax = iou.argmax(axis=0) - gt_to_anchor_max = iou[gt_to_anchor_argmax, np.arange(iou.shape[1])] - anchors_with_max_overlap = np.where(iou == gt_to_anchor_max)[0] - - tgt_lbl = np.ones((iou.shape[0], ), dtype=np.int32) * -1 - tgt_lbl[anchors_with_max_overlap] = 1 - tgt_lbl[anchor_to_gt_max >= rpn_positive_overlap] = 1 - - num_fg = int(fg_fraction * rpn_batch_size_per_im) - fg_inds = np.where(tgt_lbl == 1)[0] - if len(fg_inds) > num_fg: +from test_generate_proposal_labels_op import _generate_groundtruth +from test_generate_proposal_labels_op import _bbox_overlaps, _box_to_delta + + +def rpn_target_assign(anchor_by_gt_overlap, + rpn_batch_size_per_im, + rpn_positive_overlap, + rpn_negative_overlap, + rpn_fg_fraction, + use_random=True): + anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1) + anchor_to_gt_max = anchor_by_gt_overlap[np.arange( + anchor_by_gt_overlap.shape[0]), anchor_to_gt_argmax] + + gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0) + gt_to_anchor_max = anchor_by_gt_overlap[gt_to_anchor_argmax, np.arange( + anchor_by_gt_overlap.shape[1])] + anchors_with_max_overlap = np.where( + anchor_by_gt_overlap == gt_to_anchor_max)[0] + + labels = np.ones((anchor_by_gt_overlap.shape[0], ), dtype=np.int32) * -1 + labels[anchors_with_max_overlap] = 1 + labels[anchor_to_gt_max >= rpn_positive_overlap] = 1 + + num_fg = int(rpn_fg_fraction * rpn_batch_size_per_im) + fg_inds = np.where(labels == 1)[0] + if len(fg_inds) > num_fg and use_random: disable_inds = np.random.choice( fg_inds, size=(len(fg_inds) - num_fg), replace=False) - tgt_lbl[disable_inds] = -1 - fg_inds = np.where(tgt_lbl == 1)[0] + else: + disable_inds = fg_inds[num_fg:] + labels[disable_inds] = -1 + fg_inds = np.where(labels == 1)[0] - num_bg = rpn_batch_size_per_im - np.sum(tgt_lbl == 1) + num_bg = rpn_batch_size_per_im - np.sum(labels == 1) bg_inds = np.where(anchor_to_gt_max < rpn_negative_overlap)[0] - tgt_lbl[bg_inds] = 0 - if len(bg_inds) > num_bg: + if len(bg_inds) > num_bg and use_random: enable_inds = bg_inds[np.random.randint(len(bg_inds), size=num_bg)] - tgt_lbl[enable_inds] = 0 - bg_inds = np.where(tgt_lbl == 0)[0] - tgt_lbl[bg_inds] = 0 + else: + enable_inds = bg_inds[:num_bg] + labels[enable_inds] = 0 + fg_inds = np.where(labels == 1)[0] + bg_inds = np.where(labels == 0)[0] loc_index = fg_inds score_index = np.hstack((fg_inds, bg_inds)) - tgt_lbl = np.expand_dims(tgt_lbl, axis=1) + labels = labels[score_index] + assert not np.any(labels == -1), "Wrong labels with -1" gt_inds = anchor_to_gt_argmax[fg_inds] - return loc_index, score_index, tgt_lbl, gt_inds + return loc_index, score_index, labels, gt_inds def get_anchor(n, c, h, w): @@ -75,85 +85,129 @@ def get_anchor(n, c, h, w): return anchors -def rpn_blob(anchor, gt_boxes, iou, lod, rpn_batch_size_per_im, - rpn_positive_overlap, rpn_negative_overlap, fg_fraction): - - loc_indexes = [] - score_indexes = [] - tmp_tgt_labels = [] - tgt_bboxes = [] - anchor_num = anchor.shape[0] - +def rpn_target_assign_in_python(all_anchors, + gt_boxes, + is_crowd, + im_info, + lod, + rpn_straddle_thresh, + rpn_batch_size_per_im, + rpn_positive_overlap, + rpn_negative_overlap, + rpn_fg_fraction, + use_random=True): + anchor_num = all_anchors.shape[0] batch_size = len(lod) - 1 for i in range(batch_size): + im_height = im_info[i][0] + im_width = im_info[i][1] + im_scale = im_info[i][2] + if rpn_straddle_thresh >= 0: + # Only keep anchors inside the image by a margin of straddle_thresh + inds_inside = np.where( + (all_anchors[:, 0] >= -rpn_straddle_thresh) & + (all_anchors[:, 1] >= -rpn_straddle_thresh) & ( + all_anchors[:, 2] < im_width + rpn_straddle_thresh) & ( + all_anchors[:, 3] < im_height + rpn_straddle_thresh))[0] + # keep only inside anchors + inside_anchors = all_anchors[inds_inside, :] + else: + inds_inside = np.arange(all_anchors.shape[0]) + inside_anchors = all_anchors + b, e = lod[i], lod[i + 1] - iou_slice = iou[b:e, :] - bboxes_slice = gt_boxes[b:e, :] + gt_boxes_slice = gt_boxes[b:e, :] * im_scale + is_crowd_slice = is_crowd[b:e] - loc_idx, score_idx, tgt_lbl, gt_inds = rpn_target_assign( - iou_slice, rpn_batch_size_per_im, rpn_positive_overlap, - rpn_negative_overlap, fg_fraction) + not_crowd_inds = np.where(is_crowd_slice == 0)[0] + gt_boxes_slice = gt_boxes_slice[not_crowd_inds] + iou = _bbox_overlaps(inside_anchors, gt_boxes_slice) - fg_bboxes = bboxes_slice[gt_inds] - fg_anchors = anchor[loc_idx] - box_deltas = _box_to_delta(fg_anchors, fg_bboxes, [1., 1., 1., 1.]) + loc_inds, score_inds, labels, gt_inds = rpn_target_assign( + iou, rpn_batch_size_per_im, rpn_positive_overlap, + rpn_negative_overlap, rpn_fg_fraction, use_random) + # unmap to all anchor + loc_inds = inds_inside[loc_inds] + score_inds = inds_inside[score_inds] + + sampled_gt = gt_boxes_slice[gt_inds] + sampled_anchor = all_anchors[loc_inds] + box_deltas = _box_to_delta(sampled_anchor, sampled_gt, [1., 1., 1., 1.]) if i == 0: - loc_indexes = loc_idx - score_indexes = score_idx - tmp_tgt_labels = tgt_lbl + loc_indexes = loc_inds + score_indexes = score_inds + tgt_labels = labels tgt_bboxes = box_deltas else: loc_indexes = np.concatenate( - [loc_indexes, loc_idx + i * anchor_num]) + [loc_indexes, loc_inds + i * anchor_num]) score_indexes = np.concatenate( - [score_indexes, score_idx + i * anchor_num]) - tmp_tgt_labels = np.concatenate([tmp_tgt_labels, tgt_lbl]) + [score_indexes, score_inds + i * anchor_num]) + tgt_labels = np.concatenate([tgt_labels, labels]) tgt_bboxes = np.vstack([tgt_bboxes, box_deltas]) - tgt_labels = tmp_tgt_labels[score_indexes] return loc_indexes, score_indexes, tgt_bboxes, tgt_labels class TestRpnTargetAssignOp(OpTest): def setUp(self): n, c, h, w = 2, 4, 14, 14 - anchor = get_anchor(n, c, h, w) + all_anchors = get_anchor(n, c, h, w) gt_num = 10 - anchor = anchor.reshape(-1, 4) - anchor_num = anchor.shape[0] - - im_shapes = [[64, 64], [64, 64]] - gt_box, lod = _generate_groundtruth(im_shapes, 3, 4) - bbox = np.vstack([v['boxes'] for v in gt_box]) - - iou = _bbox_overlaps(bbox, anchor) - - anchor = anchor.astype('float32') - bbox = bbox.astype('float32') - iou = iou.astype('float32') - - loc_index, score_index, tgt_bbox, tgt_lbl = rpn_blob( - anchor, bbox, iou, [0, 4, 8], 25600, 0.95, 0.03, 0.25) + all_anchors = all_anchors.reshape(-1, 4) + anchor_num = all_anchors.shape[0] + + images_shape = [[64, 64], [64, 64]] + #images_shape = [[64, 64]] + groundtruth, lod = _generate_groundtruth(images_shape, 3, 4) + lod = [0, 4, 8] + #lod = [0, 4] + + im_info = np.ones((len(images_shape), 3)).astype(np.float32) + for i in range(len(images_shape)): + im_info[i, 0] = images_shape[i][0] + im_info[i, 1] = images_shape[i][1] + im_info[i, 2] = 0.8 #scale + gt_boxes = np.vstack([v['boxes'] for v in groundtruth]) + is_crowd = np.hstack([v['is_crowd'] for v in groundtruth]) + + all_anchors = all_anchors.astype('float32') + gt_boxes = gt_boxes.astype('float32') + + rpn_straddle_thresh = 0.0 + rpn_batch_size_per_im = 256 + rpn_positive_overlap = 0.7 + rpn_negative_overlap = 0.3 + rpn_fg_fraction = 0.5 + use_random = False + + loc_index, score_index, tgt_bbox, labels = rpn_target_assign_in_python( + all_anchors, gt_boxes, is_crowd, im_info, lod, rpn_straddle_thresh, + rpn_batch_size_per_im, rpn_positive_overlap, rpn_negative_overlap, + rpn_fg_fraction, use_random) + labels = labels[:, np.newaxis] self.op_type = "rpn_target_assign" self.inputs = { - 'Anchor': anchor, - 'GtBox': (bbox, [[4, 4]]), - 'DistMat': (iou, [[4, 4]]), + 'Anchor': all_anchors, + 'GtBoxes': (gt_boxes, [[4, 4]]), + 'IsCrowd': (is_crowd, [[4, 4]]), + 'ImInfo': (im_info, [[1, 1]]) } self.attrs = { - 'rpn_batch_size_per_im': 25600, - 'rpn_positive_overlap': 0.95, - 'rpn_negative_overlap': 0.03, - 'fg_fraction': 0.25, - 'fix_seed': True + 'rpn_batch_size_per_im': rpn_batch_size_per_im, + 'rpn_straddle_thresh': rpn_straddle_thresh, + 'rpn_positive_overlap': rpn_positive_overlap, + 'rpn_negative_overlap': rpn_negative_overlap, + 'rpn_fg_fraction': rpn_fg_fraction, + 'use_random': use_random } self.outputs = { 'LocationIndex': loc_index.astype('int32'), 'ScoreIndex': score_index.astype('int32'), 'TargetBBox': tgt_bbox.astype('float32'), - 'TargetLabel': tgt_lbl.astype('int64'), + 'TargetLabel': labels.astype('int32') } def test_check_output(self): -- GitLab