diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index f723b1ec2174849c23754254dd4a886f50e46090..46726ab4945b051880494321d0e9336f73f3bd2e 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -303,6 +303,7 @@ paddle.fluid.layers.ssd_loss ArgSpec(args=['location', 'confidence', 'gt_box', ' paddle.fluid.layers.detection_map ArgSpec(args=['detect_res', 'label', 'class_num', 'background_label', 'overlap_threshold', 'evaluate_difficult', 'has_state', 'input_states', 'out_states', 'ap_version'], varargs=None, keywords=None, defaults=(0, 0.3, True, None, None, None, 'integral')) paddle.fluid.layers.rpn_target_assign ArgSpec(args=['loc', 'scores', 'anchor_box', 'gt_box', 'rpn_batch_size_per_im', 'fg_fraction', 'rpn_positive_overlap', 'rpn_negative_overlap'], varargs=None, keywords=None, defaults=(256, 0.25, 0.7, 0.3)) paddle.fluid.layers.anchor_generator ArgSpec(args=['input', 'anchor_sizes', 'aspect_ratios', 'variance', 'stride', 'offset', 'name'], varargs=None, keywords=None, defaults=(None, None, [0.1, 0.1, 0.2, 0.2], None, 0.5, None)) +paddle.fluid.layers.generate_proposal_labels ArgSpec(args=['rpn_rois', 'gt_classes', 'gt_boxes', 'im_scales', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None)) paddle.fluid.layers.generate_proposals ArgSpec(args=['scores', 'bbox_deltas', 'im_info', 'anchors', 'variances', 'pre_nms_top_n', 'post_nms_top_n', 'nms_thresh', 'min_size', 'eta', 'name'], varargs=None, keywords=None, defaults=(6000, 1000, 0.5, 0.1, 1.0, None)) paddle.fluid.layers.iou_similarity ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) paddle.fluid.layers.box_coder ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) diff --git a/paddle/fluid/operators/detection/CMakeLists.txt b/paddle/fluid/operators/detection/CMakeLists.txt index 1301c8ae2b145298b18f68f86dadd3c5cbe4271a..f4983c65432991a45f226d97f0fb05b08a30ca89 100644 --- a/paddle/fluid/operators/detection/CMakeLists.txt +++ b/paddle/fluid/operators/detection/CMakeLists.txt @@ -29,6 +29,7 @@ target_assign_op.cu) detection_library(polygon_box_transform_op SRCS polygon_box_transform_op.cc polygon_box_transform_op.cu) detection_library(rpn_target_assign_op SRCS rpn_target_assign_op.cc) +detection_library(generate_proposal_labels_op SRCS generate_proposal_labels_op.cc) detection_library(generate_proposals_op SRCS generate_proposals_op.cc) #Export local libraries to parent set(DETECTION_LIBRARY ${LOCAL_DETECTION_LIBS} PARENT_SCOPE) diff --git a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc b/paddle/fluid/operators/detection/generate_proposal_labels_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..0571c46f6be99c9a06b7dd2abb310eeda506ecd5 --- /dev/null +++ b/paddle/fluid/operators/detection/generate_proposal_labels_op.cc @@ -0,0 +1,515 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include +#include +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/gather.h" +#include "paddle/fluid/operators/math/concat.h" +#include "paddle/fluid/operators/math/math_function.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +using LoDTensor = framework::LoDTensor; +const int kBoxDim = 4; + +template +void AppendRois(LoDTensor* out, int64_t offset, Tensor* to_add) { + auto* out_data = out->data(); + auto* to_add_data = to_add->data(); + memcpy(out_data + offset, to_add_data, to_add->numel() * sizeof(T)); +} + +class GenerateProposalLabelsOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("RpnRois"), + "Input(RpnRois) shouldn't be null."); + PADDLE_ENFORCE(ctx->HasInput("GtClasses"), + "Input(GtClasses) shouldn't be null."); + PADDLE_ENFORCE(ctx->HasInput("GtBoxes"), + "Input(GtBoxes) shouldn't be null."); + PADDLE_ENFORCE(ctx->HasInput("ImScales"), + "Input(ImScales) shouldn't be null."); + + PADDLE_ENFORCE(ctx->HasOutput("Rois"), + "Output(Rois) of RpnTargetAssignOp should not be null"); + PADDLE_ENFORCE( + ctx->HasOutput("LabelsInt32"), + "Output(LabelsInt32) of RpnTargetAssignOp should not be null"); + PADDLE_ENFORCE( + ctx->HasOutput("BboxTargets"), + "Output(BboxTargets) of RpnTargetAssignOp should not be null"); + PADDLE_ENFORCE( + ctx->HasOutput("BboxInsideWeights"), + "Output(BboxInsideWeights) of RpnTargetAssignOp should not be null"); + PADDLE_ENFORCE( + ctx->HasOutput("BboxOutsideWeights"), + "Output(BboxOutsideWeights) of RpnTargetAssignOp should not be null"); + + auto rpn_rois_dims = ctx->GetInputDim("RpnRois"); + auto gt_classes_dims = ctx->GetInputDim("GtClasses"); + auto gt_boxes_dims = ctx->GetInputDim("GtBoxes"); + auto im_scales_dims = ctx->GetInputDim("ImScales"); + + PADDLE_ENFORCE_EQ(rpn_rois_dims.size(), 2, + "The rank of Input(RpnRois) must be 2."); + PADDLE_ENFORCE_EQ(gt_classes_dims.size(), 1, + "The rank of Input(GtClasses) must be 1."); + PADDLE_ENFORCE_EQ(gt_boxes_dims.size(), 2, + "The rank of Input(GtBoxes) must be 2."); + PADDLE_ENFORCE_EQ(im_scales_dims.size(), 1, + "The rank of Input(ImScales) must be 1."); + + int class_nums = ctx->Attrs().Get("class_nums"); + + ctx->SetOutputDim("Rois", {-1, 4}); + ctx->SetOutputDim("LabelsInt32", {-1}); + ctx->SetOutputDim("BboxTargets", {-1, 4 * class_nums}); + ctx->SetOutputDim("BboxInsideWeights", {-1, 4 * class_nums}); + ctx->SetOutputDim("BboxOutsideWeights", {-1, 4 * class_nums}); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + auto data_type = framework::GetDataTypeOfVar(ctx.InputVar("RpnRois")); + return framework::OpKernelType(data_type, platform::CPUPlace()); + } +}; + +template +void Concat(const platform::CPUDeviceContext& context, + const Tensor& in_tensor_a, const Tensor& in_tensor_b, + Tensor* out_tensor) { + int axis = 0; + std::vector inputs; + inputs.emplace_back(in_tensor_a); + inputs.emplace_back(in_tensor_b); + math::ConcatFunctor concat_functor; + concat_functor(context, inputs, axis, out_tensor); +} + +template +void BboxOverlaps(const Tensor& r_boxes, const Tensor& c_boxes, + Tensor* overlaps) { + auto r_boxes_et = framework::EigenTensor::From(r_boxes); + auto c_boxes_et = framework::EigenTensor::From(c_boxes); + auto overlaps_et = framework::EigenTensor::From(*overlaps); + int r_num = r_boxes.dims()[0]; + int c_num = c_boxes.dims()[0]; + auto zero = static_cast(0.0); + T r_box_area, c_box_area, x_min, y_min, x_max, y_max, inter_w, inter_h, + inter_area; + for (int i = 0; i < r_num; ++i) { + r_box_area = (r_boxes_et(i, 2) - r_boxes_et(i, 0) + 1) * + (r_boxes_et(i, 3) - r_boxes_et(i, 1) + 1); + for (int j = 0; j < c_num; ++j) { + c_box_area = (c_boxes_et(j, 2) - c_boxes_et(j, 0) + 1) * + (c_boxes_et(j, 3) - c_boxes_et(j, 1) + 1); + x_min = std::max(r_boxes_et(i, 0), c_boxes_et(j, 0)); + y_min = std::max(r_boxes_et(i, 1), c_boxes_et(j, 1)); + x_max = std::min(r_boxes_et(i, 2), c_boxes_et(j, 2)); + y_max = std::min(r_boxes_et(i, 3), c_boxes_et(j, 3)); + inter_w = std::max(x_max - x_min + 1, zero); + inter_h = std::max(y_max - y_min + 1, zero); + inter_area = inter_w * inter_h; + overlaps_et(i, j) = inter_area / (r_box_area + c_box_area - inter_area); + } + } +} + +template +void BoxToDelta(int box_num, const Tensor& ex_boxes, const Tensor& gt_boxes, + const std::vector& weights, Tensor* box_delta) { + auto ex_boxes_et = framework::EigenTensor::From(ex_boxes); + auto gt_boxes_et = framework::EigenTensor::From(gt_boxes); + auto box_delta_et = framework::EigenTensor::From(*box_delta); + T ex_w, ex_h, ex_ctr_x, ex_ctr_y, gt_w, gt_h, gt_ctr_x, gt_ctr_y; + for (int64_t i = 0; i < box_num; ++i) { + ex_w = ex_boxes_et(i, 2) - ex_boxes_et(i, 0) + 1; + ex_h = ex_boxes_et(i, 3) - ex_boxes_et(i, 1) + 1; + ex_ctr_x = ex_boxes_et(i, 0) + 0.5 * ex_w; + ex_ctr_y = ex_boxes_et(i, 1) + 0.5 * ex_h; + + gt_w = gt_boxes_et(i, 2) - gt_boxes_et(i, 0) + 1; + gt_h = gt_boxes_et(i, 3) - gt_boxes_et(i, 1) + 1; + gt_ctr_x = gt_boxes_et(i, 0) + 0.5 * gt_w; + gt_ctr_y = gt_boxes_et(i, 1) + 0.5 * gt_h; + + box_delta_et(i, 0) = (gt_ctr_x - ex_ctr_x) / ex_w / weights[0]; + box_delta_et(i, 1) = (gt_ctr_y - ex_ctr_y) / ex_h / weights[1]; + box_delta_et(i, 2) = log(gt_w / ex_w) / ex_w / weights[2]; + box_delta_et(i, 3) = log(gt_h / ex_h) / ex_h / weights[3]; + } +} + +template +std::vector> SampleFgBgGt( + const platform::CPUDeviceContext& context, Tensor* iou, + const int batch_size_per_im, const float fg_fraction, const float fg_thresh, + const float bg_thresh_hi, const float bg_thresh_lo, + std::minstd_rand engine) { + std::vector fg_inds; + std::vector bg_inds; + std::vector gt_inds; + T* proposal_to_gt_overlaps = iou->mutable_data(context.GetPlace()); + int64_t row = iou->dims()[0]; + int64_t col = iou->dims()[1]; + float epsilon = 0.00001; + + // Follow the Faster RCNN's implementation + for (int64_t i = 0; i < row; ++i) { + const T* v = proposal_to_gt_overlaps + i * col; + T max_overlap = *std::max_element(v, v + col); + if (max_overlap > fg_thresh) { + for (int64_t j = 0; j < col; ++j) { + T val = proposal_to_gt_overlaps[i * col + j]; + auto diff = std::abs(max_overlap - val); + if (diff < epsilon) { + fg_inds.emplace_back(i); + gt_inds.emplace_back(j); + break; + } + } + } else { + if ((max_overlap >= bg_thresh_lo) && (max_overlap < bg_thresh_hi)) { + bg_inds.emplace_back(i); + } + } + } + + // Reservoir Sampling + int fg_rois_per_im = std::floor(batch_size_per_im * fg_fraction); + int fg_rois_this_image = fg_inds.size(); + int fg_rois_per_this_image = std::min(fg_rois_per_im, fg_rois_this_image); + std::uniform_real_distribution uniform(0, 1); + const int64_t fg_size = static_cast(fg_inds.size()); + if (fg_size > fg_rois_per_this_image) { + for (int64_t i = fg_rois_per_this_image; i < fg_size; ++i) { + int rng_ind = std::floor(uniform(engine) * i); + if (rng_ind < fg_rois_per_this_image) { + std::iter_swap(fg_inds.begin() + rng_ind, fg_inds.begin() + i); + std::iter_swap(gt_inds.begin() + rng_ind, gt_inds.begin() + i); + } + } + } + std::vector new_fg_inds(fg_inds.begin(), + fg_inds.begin() + fg_rois_per_this_image); + std::vector new_gt_inds(gt_inds.begin(), + gt_inds.begin() + fg_rois_per_this_image); + + int bg_rois_per_image = batch_size_per_im - fg_rois_per_this_image; + int bg_rois_this_image = bg_inds.size(); + int bg_rois_per_this_image = std::min(bg_rois_per_image, bg_rois_this_image); + const int64_t bg_size = static_cast(bg_inds.size()); + if (bg_size > bg_rois_per_this_image) { + for (int64_t i = bg_rois_per_this_image; i < bg_size; ++i) { + int rng_ind = std::floor(uniform(engine) * i); + if (rng_ind < fg_rois_per_this_image) + std::iter_swap(bg_inds.begin() + rng_ind, bg_inds.begin() + i); + } + } + std::vector new_bg_inds(bg_inds.begin(), + bg_inds.begin() + bg_rois_per_this_image); + std::vector> res; + res.emplace_back(new_fg_inds); + res.emplace_back(new_bg_inds); + res.emplace_back(new_gt_inds); + return res; +} + +template +void GatherBoxesLabels(const platform::CPUDeviceContext& context, + const Tensor& boxes, const Tensor& gt_boxes, + const Tensor& gt_classes, + const std::vector& fg_inds, + const std::vector& bg_inds, + const std::vector& gt_inds, Tensor* sampled_boxes, + Tensor* sampled_labels, Tensor* sampled_gts) { + int fg_num = fg_inds.size(); + int bg_num = bg_inds.size(); + int gt_num = fg_num + bg_num; + Tensor fg_inds_t, bg_inds_t, gt_box_inds_t, gt_label_inds_t; + int* fg_inds_data = fg_inds_t.mutable_data({fg_num}, context.GetPlace()); + int* bg_inds_data = bg_inds_t.mutable_data({bg_num}, context.GetPlace()); + int* gt_box_inds_data = + gt_box_inds_t.mutable_data({gt_num}, context.GetPlace()); + int* gt_label_inds_data = + gt_label_inds_t.mutable_data({fg_num}, context.GetPlace()); + std::copy(fg_inds.begin(), fg_inds.end(), fg_inds_data); + std::copy(bg_inds.begin(), bg_inds.end(), bg_inds_data); + std::copy(gt_inds.begin(), gt_inds.end(), gt_box_inds_data); + std::copy(gt_inds.begin(), gt_inds.end(), gt_label_inds_data); + + Tensor fg_boxes, bg_boxes, fg_labels, bg_labels; + fg_boxes.mutable_data({fg_num, kBoxDim}, context.GetPlace()); + CPUGather(context, boxes, fg_inds_t, &fg_boxes); + bg_boxes.mutable_data({bg_num, kBoxDim}, context.GetPlace()); + CPUGather(context, boxes, bg_inds_t, &bg_boxes); + Concat(context, fg_boxes, bg_boxes, sampled_boxes); + CPUGather(context, gt_boxes, gt_box_inds_t, sampled_gts); + fg_labels.mutable_data({fg_num}, context.GetPlace()); + CPUGather(context, gt_classes, gt_label_inds_t, &fg_labels); + bg_labels.mutable_data({bg_num}, context.GetPlace()); + math::set_constant(context, &bg_labels, 0); + Concat(context, fg_labels, bg_labels, sampled_labels); +} + +template +std::vector SampleRoisForOneImage( + const platform::CPUDeviceContext& context, Tensor* rpn_rois, + Tensor* gt_classes, Tensor* gt_boxes, Tensor* im_scale, + const int batch_size_per_im, const float fg_fraction, const float fg_thresh, + const float bg_thresh_hi, const float bg_thresh_lo, + const std::vector& bbox_reg_weights, const int class_nums, + std::minstd_rand engine) { + auto rpn_rois_et = framework::EigenTensor::From(*rpn_rois); + auto im_scale_data = im_scale->data()[0]; + rpn_rois_et = rpn_rois_et / im_scale_data; + + Tensor boxes; + int proposals_num = gt_boxes->dims()[0] + rpn_rois->dims()[0]; + boxes.mutable_data({proposals_num, kBoxDim}, context.GetPlace()); + Concat(context, *gt_boxes, *rpn_rois, &boxes); + + // Overlaps + Tensor proposal_to_gt_overlaps; + proposal_to_gt_overlaps.mutable_data({proposals_num, gt_boxes->dims()[0]}, + context.GetPlace()); + BboxOverlaps(boxes, *gt_boxes, &proposal_to_gt_overlaps); + + // Generate proposal index + std::vector> fg_bg_gt = SampleFgBgGt( + context, &proposal_to_gt_overlaps, batch_size_per_im, fg_fraction, + fg_thresh, bg_thresh_hi, bg_thresh_lo, engine); + std::vector fg_inds = fg_bg_gt[0]; + std::vector bg_inds = fg_bg_gt[1]; + std::vector gt_inds = fg_bg_gt[2]; + + // Gather boxes and labels + Tensor sampled_boxes, sampled_labels, sampled_gts; + int boxes_num = fg_inds.size() + bg_inds.size(); + framework::DDim bbox_dim({boxes_num, kBoxDim}); + sampled_boxes.mutable_data(bbox_dim, context.GetPlace()); + sampled_labels.mutable_data({boxes_num}, context.GetPlace()); + sampled_gts.mutable_data(bbox_dim, context.GetPlace()); + GatherBoxesLabels(context, boxes, *gt_boxes, *gt_classes, fg_inds, bg_inds, + gt_inds, &sampled_boxes, &sampled_labels, &sampled_gts); + + // Compute targets + Tensor bbox_targets_single; + bbox_targets_single.mutable_data(bbox_dim, context.GetPlace()); + BoxToDelta(boxes_num, sampled_boxes, sampled_gts, bbox_reg_weights, + &bbox_targets_single); + + // Scale rois + Tensor sampled_rois; + sampled_rois.mutable_data(sampled_boxes.dims(), context.GetPlace()); + auto sampled_rois_et = framework::EigenTensor::From(sampled_rois); + auto sampled_boxes_et = framework::EigenTensor::From(sampled_boxes); + sampled_rois_et = sampled_boxes_et * im_scale_data; + + // Expand box targets + Tensor bbox_targets, bbox_inside_weights, bbox_outside_weights; + framework::DDim bbox_expand_dim({boxes_num, kBoxDim * class_nums}); + bbox_targets.mutable_data(bbox_expand_dim, context.GetPlace()); + bbox_inside_weights.mutable_data(bbox_expand_dim, context.GetPlace()); + bbox_outside_weights.mutable_data(bbox_expand_dim, context.GetPlace()); + math::set_constant(context, &bbox_targets, 0.0); + math::set_constant(context, &bbox_inside_weights, 0.0); + math::set_constant(context, &bbox_outside_weights, 0.0); + + auto* bbox_targets_single_data = bbox_targets_single.data(); + auto* sampled_labels_data = sampled_labels.data(); + auto* bbox_targets_data = bbox_targets.data(); + auto* bbox_inside_weights_data = bbox_inside_weights.data(); + auto* bbox_outside_weights_data = bbox_outside_weights.data(); + int width = kBoxDim * class_nums; + for (int64_t i = 0; i < boxes_num; ++i) { + int label = sampled_labels_data[i]; + if (label > 0) { + int dst_idx = i * width + kBoxDim * label; + int src_idx = kBoxDim * i; + bbox_targets_data[dst_idx] = bbox_targets_single_data[src_idx]; + bbox_targets_data[dst_idx + 1] = bbox_targets_single_data[src_idx + 1]; + bbox_targets_data[dst_idx + 2] = bbox_targets_single_data[src_idx + 2]; + bbox_targets_data[dst_idx + 3] = bbox_targets_single_data[src_idx + 3]; + bbox_inside_weights_data[dst_idx] = 1; + bbox_inside_weights_data[dst_idx + 1] = 1; + bbox_inside_weights_data[dst_idx + 2] = 1; + bbox_inside_weights_data[dst_idx + 3] = 1; + bbox_outside_weights_data[dst_idx] = 1; + bbox_outside_weights_data[dst_idx + 1] = 1; + bbox_outside_weights_data[dst_idx + 2] = 1; + bbox_outside_weights_data[dst_idx + 3] = 1; + } + } + std::vector res; + res.emplace_back(sampled_rois); + res.emplace_back(sampled_labels); + res.emplace_back(bbox_targets); + res.emplace_back(bbox_inside_weights); + res.emplace_back(bbox_outside_weights); + return res; +} + +template +class GenerateProposalLabelsKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* rpn_rois = context.Input("RpnRois"); + auto* gt_classes = context.Input("GtClasses"); + auto* gt_boxes = context.Input("GtBoxes"); + auto* im_scales = context.Input("ImScales"); + + auto* rois = context.Output("Rois"); + auto* labels_int32 = context.Output("LabelsInt32"); + auto* bbox_targets = context.Output("BboxTargets"); + auto* bbox_inside_weights = context.Output("BboxInsideWeights"); + auto* bbox_outside_weights = + context.Output("BboxOutsideWeights"); + + int batch_size_per_im = context.Attr("batch_size_per_im"); + float fg_fraction = context.Attr("fg_fraction"); + float fg_thresh = context.Attr("fg_thresh"); + float bg_thresh_hi = context.Attr("bg_thresh_hi"); + float bg_thresh_lo = context.Attr("bg_thresh_lo"); + std::vector bbox_reg_weights = + context.Attr>("bbox_reg_weights"); + int class_nums = context.Attr("class_nums"); + + PADDLE_ENFORCE_EQ(rpn_rois->lod().size(), 1UL, + "GenerateProposalLabelsOp rpn_rois needs 1 level of LoD"); + PADDLE_ENFORCE_EQ( + gt_classes->lod().size(), 1UL, + "GenerateProposalLabelsOp gt_classes needs 1 level of LoD"); + PADDLE_ENFORCE_EQ(gt_boxes->lod().size(), 1UL, + "GenerateProposalLabelsOp gt_boxes needs 1 level of LoD"); + int64_t n = static_cast(rpn_rois->lod().back().size() - 1); + + rois->mutable_data({n * batch_size_per_im, kBoxDim}, context.GetPlace()); + labels_int32->mutable_data({n * batch_size_per_im}, + context.GetPlace()); + bbox_targets->mutable_data({n * batch_size_per_im, kBoxDim * class_nums}, + context.GetPlace()); + bbox_inside_weights->mutable_data( + {n * batch_size_per_im, kBoxDim * class_nums}, context.GetPlace()); + bbox_outside_weights->mutable_data( + {n * batch_size_per_im, kBoxDim * class_nums}, context.GetPlace()); + + std::random_device rnd; + std::minstd_rand engine; + int seed = + context.Attr("fix_seed") ? context.Attr("seed") : rnd(); + engine.seed(seed); + + framework::LoD lod; + std::vector lod0(1, 0); + + int64_t num_rois = 0; + auto& dev_ctx = context.device_context(); + + auto rpn_rois_lod = rpn_rois->lod().back(); + auto gt_classes_lod = gt_classes->lod().back(); + auto gt_boxes_lod = gt_boxes->lod().back(); + for (size_t i = 0; i < n; ++i) { + Tensor rpn_rois_slice = + rpn_rois->Slice(rpn_rois_lod[i], rpn_rois_lod[i + 1]); + Tensor gt_classes_slice = + gt_classes->Slice(gt_classes_lod[i], gt_classes_lod[i + 1]); + Tensor gt_boxes_slice = + gt_boxes->Slice(gt_boxes_lod[i], gt_boxes_lod[i + 1]); + Tensor im_scales_slice = im_scales->Slice(i, i + 1); + std::vector tensor_output = SampleRoisForOneImage( + dev_ctx, &rpn_rois_slice, >_classes_slice, >_boxes_slice, + &im_scales_slice, batch_size_per_im, fg_fraction, fg_thresh, + bg_thresh_hi, bg_thresh_lo, bbox_reg_weights, class_nums, engine); + Tensor sampled_rois = tensor_output[0]; + Tensor sampled_labels_int32 = tensor_output[1]; + Tensor sampled_bbox_targets = tensor_output[2]; + Tensor sampled_bbox_inside_weights = tensor_output[3]; + Tensor sampled_bbox_outside_weights = tensor_output[4]; + + AppendRois(rois, kBoxDim * num_rois, &sampled_rois); + AppendRois(labels_int32, num_rois, &sampled_labels_int32); + AppendRois(bbox_targets, kBoxDim * num_rois * class_nums, + &sampled_bbox_targets); + AppendRois(bbox_inside_weights, kBoxDim * num_rois * class_nums, + &sampled_bbox_inside_weights); + AppendRois(bbox_outside_weights, kBoxDim * num_rois * class_nums, + &sampled_bbox_outside_weights); + + num_rois += sampled_rois.dims()[0]; + lod0.emplace_back(num_rois); + } + + lod.emplace_back(lod0); + rois->set_lod(lod); + labels_int32->set_lod(lod); + bbox_targets->set_lod(lod); + bbox_inside_weights->set_lod(lod); + bbox_outside_weights->set_lod(lod); + rois->Resize({num_rois, kBoxDim}); + labels_int32->Resize({num_rois}); + bbox_targets->Resize({num_rois, kBoxDim * class_nums}); + bbox_inside_weights->Resize({num_rois, kBoxDim * class_nums}); + bbox_outside_weights->Resize({num_rois, kBoxDim * class_nums}); + } +}; + +class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + // TODO(buxingyuan): Add Document + AddInput("RpnRois", "RpnRois."); + AddInput("GtClasses", "GtClasses."); + AddInput("GtBoxes", "GtBoxes."); + AddInput("ImScales", "ImScales."); + + AddOutput("Rois", "Rois."); + AddOutput("LabelsInt32", "LabelsInt32."); + AddOutput("BboxTargets", "BboxTargets."); + AddOutput("BboxInsideWeights", "BboxInsideWeights."); + AddOutput("BboxOutsideWeights", "BboxOutsideWeights."); + + AddAttr("batch_size_per_im", "batch_size_per_im"); + AddAttr("fg_fraction", "fg_fraction"); + AddAttr("fg_thresh", "fg_thresh"); + AddAttr("bg_thresh_hi", "bg_thresh_hi"); + AddAttr("bg_thresh_lo", "bg_thresh_lo"); + AddAttr>("bbox_reg_weights", "bbox_reg_weights"); + AddAttr("class_nums", "class_nums"); + AddAttr("fix_seed", "fix_seed").SetDefault(false); + AddAttr("seed", "seed").SetDefault(0); + + AddComment(R"DOC( +Generate Proposals Labels Operator. +)DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(generate_proposal_labels, ops::GenerateProposalLabelsOp, + ops::GenerateProposalLabelsOpMaker, + paddle::framework::EmptyGradOpMaker); +REGISTER_OP_CPU_KERNEL(generate_proposal_labels, + ops::GenerateProposalLabelsKernel, + ops::GenerateProposalLabelsKernel); diff --git a/paddle/fluid/operators/detection/rpn_target_assign_op.cc b/paddle/fluid/operators/detection/rpn_target_assign_op.cc index 9a1643d5b35c067ba9064286bab32019fb34fbe8..177ff7cf187bc9daf69889e99ca57ae18766de90 100644 --- a/paddle/fluid/operators/detection/rpn_target_assign_op.cc +++ b/paddle/fluid/operators/detection/rpn_target_assign_op.cc @@ -86,7 +86,7 @@ class RpnTargetAssignKernel : public framework::OpKernel { std::minstd_rand engine, std::vector* inds) const { std::uniform_real_distribution uniform(0, 1); - const int64_t size = static_cast(inds->size()); + const int64_t size = static_cast(inds->size() - offset); if (size > num) { for (int64_t i = num; i < size; ++i) { int rng_ind = std::floor(uniform(engine) * i); @@ -126,7 +126,7 @@ class RpnTargetAssignKernel : public framework::OpKernel { neg_threshold, target_label_data, fg_inds, bg_inds); // Reservoir Sampling ReservoirSampling(fg_num, fg_offset, engine, fg_inds); - int bg_num = rpn_batch_size - fg_inds->size(); + int bg_num = rpn_batch_size - (fg_inds->size() - fg_offset); ReservoirSampling(bg_num, bg_offset, engine, bg_inds); } diff --git a/paddle/fluid/operators/gather_op.cc b/paddle/fluid/operators/gather_op.cc index aa3e05b83b23569a4dd9c83294916e289f993abc..089b541a0a61adb5efda6b2e027c913d5808dff0 100644 --- a/paddle/fluid/operators/gather_op.cc +++ b/paddle/fluid/operators/gather_op.cc @@ -101,5 +101,8 @@ namespace ops = paddle::operators; REGISTER_OPERATOR(gather, ops::GatherOp, ops::GatherOpMaker, paddle::framework::DefaultGradOpDescMaker); REGISTER_OPERATOR(gather_grad, ops::GatherGradOp); -REGISTER_OP_CPU_KERNEL(gather, ops::GatherOpKernel); -REGISTER_OP_CPU_KERNEL(gather_grad, ops::GatherGradientOpKernel); +REGISTER_OP_CPU_KERNEL(gather, ops::GatherOpKernel, + ops::GatherOpKernel, ops::GatherOpKernel); +REGISTER_OP_CPU_KERNEL(gather_grad, ops::GatherGradientOpKernel, + ops::GatherGradientOpKernel, + ops::GatherGradientOpKernel); diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index a5bc1fa8f801066a8281a828eb87dbccb4bb0eff..8bb161495badb3272f8c103f5aced6351abab1f0 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -39,6 +39,7 @@ __all__ = [ 'detection_map', 'rpn_target_assign', 'anchor_generator', + 'generate_proposal_labels', 'generate_proposals', ] @@ -1256,6 +1257,64 @@ def anchor_generator(input, return anchor, var +def generate_proposal_labels(rpn_rois, + gt_classes, + gt_boxes, + im_scales, + batch_size_per_im=256, + fg_fraction=0.25, + fg_thresh=0.25, + bg_thresh_hi=0.5, + bg_thresh_lo=0.0, + bbox_reg_weights=[0.1, 0.1, 0.2, 0.2], + class_nums=None): + """ + ** Generate proposal labels Faster-RCNN ** + TODO(buxingyuan): Add Document + """ + + helper = LayerHelper('generate_proposal_labels', **locals()) + + rois = helper.create_tmp_variable(dtype=rpn_rois.dtype) + labels_int32 = helper.create_tmp_variable(dtype=gt_classes.dtype) + bbox_targets = helper.create_tmp_variable(dtype=rpn_rois.dtype) + bbox_inside_weights = helper.create_tmp_variable(dtype=rpn_rois.dtype) + bbox_outside_weights = helper.create_tmp_variable(dtype=rpn_rois.dtype) + + helper.append_op( + type="generate_proposal_labels", + inputs={ + 'RpnRois': rpn_rois, + 'GtClasses': gt_classes, + 'GtBoxes': gt_boxes, + 'ImScales': im_scales + }, + outputs={ + 'Rois': rois, + 'LabelsInt32': labels_int32, + 'BboxTargets': bbox_targets, + 'BboxInsideWeights': bbox_inside_weights, + 'BboxOutsideWeights': bbox_outside_weights + }, + attrs={ + 'batch_size_per_im': batch_size_per_im, + 'fg_fraction': fg_fraction, + 'fg_thresh': fg_thresh, + 'bg_thresh_hi': bg_thresh_hi, + 'bg_thresh_lo': bg_thresh_lo, + 'bbox_reg_weights': bbox_reg_weights, + 'class_nums': class_nums + }) + + rois.stop_gradient = True + labels_int32.stop_gradient = True + bbox_targets.stop_gradient = True + bbox_inside_weights.stop_gradient = True + bbox_outside_weights.stop_gradient = True + + return rois, labels_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights + + def generate_proposals(scores, bbox_deltas, im_info, diff --git a/python/paddle/fluid/tests/test_detection.py b/python/paddle/fluid/tests/test_detection.py index b71b440d3c9155e63220f5f2d7e849e8332bfb16..9cbd8b68666f0ea82186c517c10515ebe1b9d2a5 100644 --- a/python/paddle/fluid/tests/test_detection.py +++ b/python/paddle/fluid/tests/test_detection.py @@ -146,6 +146,55 @@ class TestAnchorGenerator(unittest.TestCase): assert anchor.shape[3] == 4 +class TestGenerateProposalLabels(unittest.TestCase): + def test_generate_proposal_labels(self): + rpn_rois = layers.data( + name='rpn_rois', + shape=[4, 4], + dtype='float32', + lod_level=1, + append_batch_size=False) + gt_classes = layers.data( + name='gt_classes', + shape=[6], + dtype='int32', + lod_level=1, + append_batch_size=False) + gt_boxes = layers.data( + name='gt_boxes', + shape=[6, 4], + dtype='float32', + lod_level=1, + append_batch_size=False) + im_scales = layers.data( + name='im_scales', + shape=[1], + dtype='float32', + lod_level=1, + append_batch_size=False) + class_nums = 5 + rois, labels_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights = fluid.layers.generate_proposal_labels( + rpn_rois=rpn_rois, + gt_classes=gt_classes, + gt_boxes=gt_boxes, + im_scales=im_scales, + batch_size_per_im=2, + fg_fraction=0.5, + fg_thresh=0.5, + bg_thresh_hi=0.5, + bg_thresh_lo=0.0, + bbox_reg_weights=[0.1, 0.1, 0.2, 0.2], + class_nums=class_nums) + assert rois.shape[1] == 4 + assert rois.shape[0] == labels_int32.shape[0] + assert rois.shape[0] == bbox_targets.shape[0] + assert rois.shape[0] == bbox_inside_weights.shape[0] + assert rois.shape[0] == bbox_outside_weights.shape[0] + assert bbox_targets.shape[1] == 4 * class_nums + assert bbox_inside_weights.shape[1] == 4 * class_nums + assert bbox_outside_weights.shape[1] == 4 * class_nums + + class TestMultiBoxHead(unittest.TestCase): def test_multi_box_head(self): data_shape = [3, 224, 224] diff --git a/python/paddle/fluid/tests/unittests/test_generate_proposal_labels.py b/python/paddle/fluid/tests/unittests/test_generate_proposal_labels.py new file mode 100644 index 0000000000000000000000000000000000000000..ce766fffbce98a6a2cee4c508d6db85ee0163401 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_generate_proposal_labels.py @@ -0,0 +1,317 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://w_idxw.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +import sys +import math +import paddle.fluid as fluid +from op_test import OpTest + + +def generate_proposal_labels_in_python( + rpn_rois, gt_classes, gt_boxes, im_scales, batch_size_per_im, + fg_fraction, fg_thresh, bg_thresh_hi, bg_thresh_lo, bbox_reg_weights, + class_nums): + rois = [] + labels_int32 = [] + bbox_targets = [] + bbox_inside_weights = [] + bbox_outside_weights = [] + lod = [] + assert len(rpn_rois) == len( + im_scales), 'batch size of rpn_rois and ground_truth is not matched' + + for im_i in range(len(im_scales)): + frcn_blobs = _sample_rois( + rpn_rois[im_i], gt_classes[im_i], gt_boxes[im_i], im_scales[im_i], + batch_size_per_im, fg_fraction, fg_thresh, bg_thresh_hi, + bg_thresh_lo, bbox_reg_weights, class_nums) + + lod.append(frcn_blobs['rois'].shape[0]) + + rois.append(frcn_blobs['rois']) + labels_int32.append(frcn_blobs['labels_int32']) + bbox_targets.append(frcn_blobs['bbox_targets']) + bbox_inside_weights.append(frcn_blobs['bbox_inside_weights']) + bbox_outside_weights.append(frcn_blobs['bbox_outside_weights']) + + return rois, labels_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights, lod + + +def _sample_rois(rpn_rois, gt_classes, gt_boxes, im_scale, batch_size_per_im, + fg_fraction, fg_thresh, bg_thresh_hi, bg_thresh_lo, + bbox_reg_weights, class_nums): + rois_per_image = int(batch_size_per_im) + fg_rois_per_im = int(np.round(fg_fraction * rois_per_image)) + + # Roidb + inv_im_scale = 1. / im_scale + rpn_rois = rpn_rois * inv_im_scale + + boxes = np.vstack([gt_boxes, rpn_rois]) + gt_overlaps = np.zeros((boxes.shape[0], class_nums)) + box_to_gt_ind_map = np.zeros((boxes.shape[0]), dtype=np.int32) + if len(gt_boxes) > 0: + proposal_to_gt_overlaps = _bbox_overlaps(boxes, gt_boxes) + + overlaps_argmax = proposal_to_gt_overlaps.argmax(axis=1) + overlaps_max = proposal_to_gt_overlaps.max(axis=1) + # Boxes which with non-zero overlap with gt boxes + overlapped_boxes_ind = np.where(overlaps_max > 0)[0] + overlapped_boxes_gt_classes = gt_classes[overlaps_argmax[ + overlapped_boxes_ind]] + gt_overlaps[overlapped_boxes_ind, + overlapped_boxes_gt_classes] = overlaps_max[ + overlapped_boxes_ind] + box_to_gt_ind_map[overlapped_boxes_ind] = overlaps_argmax[ + overlapped_boxes_ind] + + max_overlaps = gt_overlaps.max(axis=1) + max_classes = gt_overlaps.argmax(axis=1) + + # Foreground + fg_inds = np.where(max_overlaps >= fg_thresh)[0] + fg_rois_per_this_image = np.minimum(fg_rois_per_im, fg_inds.shape[0]) + # Sample foreground if there are too many + if fg_inds.shape[0] > fg_rois_per_this_image: + fg_inds = np.random.choice( + fg_inds, size=fg_rois_per_this_image, replace=False) + + # Background + bg_inds = np.where((max_overlaps < bg_thresh_hi) & (max_overlaps >= + bg_thresh_lo))[0] + bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image + bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, + bg_inds.shape[0]) + # Sample background if there are too many + if bg_inds.shape[0] > bg_rois_per_this_image: + bg_inds = np.random.choice( + bg_inds, size=bg_rois_per_this_image, replace=False) + + keep_inds = np.append(fg_inds, bg_inds) + sampled_labels = max_classes[keep_inds] + sampled_labels[fg_rois_per_this_image:] = 0 + sampled_boxes = boxes[keep_inds] + sampled_gts = gt_boxes[box_to_gt_ind_map[keep_inds]] + sampled_gts[fg_rois_per_this_image:, :] = gt_boxes[0] + + bbox_label_targets = _compute_targets(sampled_boxes, sampled_gts, + sampled_labels, bbox_reg_weights) + bbox_targets, bbox_inside_weights = _expand_bbox_targets(bbox_label_targets, + class_nums) + bbox_outside_weights = np.array( + bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype) + + # Scale rois + sampled_rois = sampled_boxes * im_scale + + # Faster RCNN blobs + frcn_blobs = dict( + rois=sampled_rois, + labels_int32=sampled_labels, + bbox_targets=bbox_targets, + bbox_inside_weights=bbox_inside_weights, + bbox_outside_weights=bbox_outside_weights) + return frcn_blobs + + +def _bbox_overlaps(roi_boxes, gt_boxes): + w1 = np.maximum(roi_boxes[:, 2] - roi_boxes[:, 0] + 1, 0) + h1 = np.maximum(roi_boxes[:, 3] - roi_boxes[:, 1] + 1, 0) + w2 = np.maximum(gt_boxes[:, 2] - gt_boxes[:, 0] + 1, 0) + h2 = np.maximum(gt_boxes[:, 3] - gt_boxes[:, 1] + 1, 0) + area1 = w1 * h1 + area2 = w2 * h2 + + overlaps = np.zeros((roi_boxes.shape[0], gt_boxes.shape[0])) + for ind1 in range(roi_boxes.shape[0]): + for ind2 in range(gt_boxes.shape[0]): + inter_x1 = np.maximum(roi_boxes[ind1, 0], gt_boxes[ind2, 0]) + inter_y1 = np.maximum(roi_boxes[ind1, 1], gt_boxes[ind2, 1]) + inter_x2 = np.minimum(roi_boxes[ind1, 2], gt_boxes[ind2, 2]) + inter_y2 = np.minimum(roi_boxes[ind1, 3], gt_boxes[ind2, 3]) + inter_w = np.maximum(inter_x2 - inter_x1 + 1, 0) + inter_h = np.maximum(inter_y2 - inter_y1 + 1, 0) + inter_area = inter_w * inter_h + iou = inter_area / (area1[ind1] + area2[ind2] - inter_area) + overlaps[ind1, ind2] = iou + return overlaps + + +def _compute_targets(roi_boxes, gt_boxes, labels, bbox_reg_weights): + assert roi_boxes.shape[0] == gt_boxes.shape[0] + assert roi_boxes.shape[1] == 4 + assert gt_boxes.shape[1] == 4 + + targets = np.zeros(roi_boxes.shape) + bbox_reg_weights = np.asarray(bbox_reg_weights) + targets = _box_to_delta( + ex_boxes=roi_boxes, gt_boxes=gt_boxes, weights=bbox_reg_weights) + + return np.hstack([labels[:, np.newaxis], targets]).astype( + np.float32, copy=False) + + +def _box_to_delta(ex_boxes, gt_boxes, weights): + ex_w = ex_boxes[:, 2] - ex_boxes[:, 0] + 1 + ex_h = ex_boxes[:, 3] - ex_boxes[:, 1] + 1 + ex_ctr_x = ex_boxes[:, 0] + 0.5 * ex_w + ex_ctr_y = ex_boxes[:, 1] + 0.5 * ex_h + + gt_w = gt_boxes[:, 2] - gt_boxes[:, 0] + 1 + gt_h = gt_boxes[:, 3] - gt_boxes[:, 1] + 1 + gt_ctr_x = gt_boxes[:, 0] + 0.5 * gt_w + gt_ctr_y = gt_boxes[:, 1] + 0.5 * gt_h + + dx = (gt_ctr_x - ex_ctr_x) / ex_w / weights[0] + dy = (gt_ctr_y - ex_ctr_y) / ex_h / weights[1] + dw = (np.log(gt_w / ex_w)) / ex_w / weights[2] + dh = (np.log(gt_h / ex_h)) / ex_h / weights[3] + + targets = np.vstack([dx, dy, dw, dh]).transpose() + return targets + + +def _expand_bbox_targets(bbox_targets_input, class_nums): + class_labels = bbox_targets_input[:, 0] + fg_inds = np.where(class_labels > 0)[0] + + bbox_targets = np.zeros((class_labels.shape[0], 4 * class_nums)) + bbox_inside_weights = np.zeros(bbox_targets.shape) + for ind in fg_inds: + class_label = int(class_labels[ind]) + start_ind = class_label * 4 + end_ind = class_label * 4 + 4 + bbox_targets[ind, start_ind:end_ind] = bbox_targets_input[ind, 1:] + bbox_inside_weights[ind, start_ind:end_ind] = (1.0, 1.0, 1.0, 1.0) + + return bbox_targets, bbox_inside_weights + + +class TestGenerateProposalLabelsOp(OpTest): + def set_data(self): + self.init_test_params() + self.init_test_input() + self.init_test_output() + self.inputs = { + 'RpnRois': (self.rpn_rois[0], self.rpn_rois_lod), + 'GtClasses': (self.gt_classes[0], self.gts_lod), + 'GtBoxes': (self.gt_boxes[0], self.gts_lod), + 'ImScales': self.im_scales[0] + } + self.attrs = { + 'batch_size_per_im': self.batch_size_per_im, + 'fg_fraction': self.fg_fraction, + 'fg_thresh': self.fg_thresh, + 'bg_thresh_hi': self.bg_thresh_hi, + 'bg_thresh_lo': self.bg_thresh_lo, + 'bbox_reg_weights': self.bbox_reg_weights, + 'class_nums': self.class_nums + } + self.outputs = { + 'Rois': (self.rois[0], [self.lod]), + 'LabelsInt32': (self.labels_int32[0], [self.lod]), + 'BboxTargets': (self.bbox_targets[0], [self.lod]), + 'BboxInsideWeights': (self.bbox_inside_weights[0], [self.lod]), + 'BboxOutsideWeights': (self.bbox_outside_weights[0], [self.lod]), + } + + def test_check_output(self): + self.check_output() + + def setUp(self): + self.op_type = 'generate_proposal_labels' + self.set_data() + + def init_test_params(self): + self.batch_size_per_im = 10 + self.fg_fraction = 1.0 + self.fg_thresh = 0.5 + self.bg_thresh_hi = 0.5 + self.bg_thresh_lo = 0.0 + self.bbox_reg_weights = [0.1, 0.1, 0.2, 0.2] + self.class_nums = 81 + + def init_test_input(self): + np.random.seed(0) + image_nums = 1 + gt_nums = 6 # Keep same with batch_size_per_im for unittest + proposal_nums = self.batch_size_per_im - gt_nums + images_shape = [] + self.im_scales = [] + for i in range(image_nums): + images_shape.append(np.random.randint(200, size=2)) + self.im_scales.append(np.ones((1)).astype(np.float32)) + + self.rpn_rois, self.rpn_rois_lod = _generate_proposals(images_shape, + proposal_nums) + ground_truth, self.gts_lod = _generate_groundtruth( + images_shape, self.class_nums, gt_nums) + self.gt_classes = [gt['gt_classes'] for gt in ground_truth] + self.gt_boxes = [gt['boxes'] for gt in ground_truth] + + def init_test_output(self): + self.rois, self.labels_int32, self.bbox_targets, \ + self.bbox_inside_weights, self.bbox_outside_weights, \ + self.lod = generate_proposal_labels_in_python( + self.rpn_rois, self.gt_classes, self.gt_boxes, self.im_scales, + self.batch_size_per_im, self.fg_fraction, + self.fg_thresh, self.bg_thresh_hi, self.bg_thresh_lo, + self.bbox_reg_weights, self.class_nums + ) + + +def _generate_proposals(images_shape, proposal_nums): + rpn_rois = [] + rpn_rois_lod = [] + num_proposals = 0 + for i, image_shape in enumerate(images_shape): + proposals = _generate_boxes(image_shape, proposal_nums) + rpn_rois.append(proposals) + num_proposals += len(proposals) + rpn_rois_lod.append(num_proposals) + return rpn_rois, [rpn_rois_lod] + + +def _generate_groundtruth(images_shape, class_nums, gt_nums): + ground_truth = [] + gts_lod = [] + num_gts = 0 + for i, image_shape in enumerate(images_shape): + # Avoid background + gt_classes = np.random.randint( + low=1, high=class_nums, size=gt_nums).astype(np.int32) + gt_boxes = _generate_boxes(image_shape, gt_nums) + ground_truth.append(dict(gt_classes=gt_classes, boxes=gt_boxes)) + num_gts += len(gt_classes) + gts_lod.append(num_gts) + return ground_truth, [gts_lod] + + +def _generate_boxes(image_size, box_nums): + width = image_size[0] + height = image_size[1] + xywh = np.random.rand(box_nums, 4) + xy1 = xywh[:, [0, 1]] * image_size + wh = xywh[:, [2, 3]] * (image_size - xy1) + xy2 = xy1 + wh + boxes = np.hstack([xy1, xy2]) + boxes[:, [0, 2]] = np.minimum(width - 1., np.maximum(0., boxes[:, [0, 2]])) + boxes[:, [1, 3]] = np.minimum(height - 1., np.maximum(0., boxes[:, [1, 3]])) + return boxes.astype(np.float32) + + +if __name__ == '__main__': + unittest.main()