提交 0a97d24b 编写于 作者: X Xingyuan Bu 提交者: qingqing01

Faster RCNN Generate Proposal Labels (#12616)

* Add generate_proposal_labels for Faster-RCNN.
上级 cfa6bbb7
......@@ -303,6 +303,7 @@ paddle.fluid.layers.ssd_loss ArgSpec(args=['location', 'confidence', 'gt_box', '
paddle.fluid.layers.detection_map ArgSpec(args=['detect_res', 'label', 'class_num', 'background_label', 'overlap_threshold', 'evaluate_difficult', 'has_state', 'input_states', 'out_states', 'ap_version'], varargs=None, keywords=None, defaults=(0, 0.3, True, None, None, None, 'integral'))
paddle.fluid.layers.rpn_target_assign ArgSpec(args=['loc', 'scores', 'anchor_box', 'gt_box', 'rpn_batch_size_per_im', 'fg_fraction', 'rpn_positive_overlap', 'rpn_negative_overlap'], varargs=None, keywords=None, defaults=(256, 0.25, 0.7, 0.3))
paddle.fluid.layers.anchor_generator ArgSpec(args=['input', 'anchor_sizes', 'aspect_ratios', 'variance', 'stride', 'offset', 'name'], varargs=None, keywords=None, defaults=(None, None, [0.1, 0.1, 0.2, 0.2], None, 0.5, None))
paddle.fluid.layers.generate_proposal_labels ArgSpec(args=['rpn_rois', 'gt_classes', 'gt_boxes', 'im_scales', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None))
paddle.fluid.layers.generate_proposals ArgSpec(args=['scores', 'bbox_deltas', 'im_info', 'anchors', 'variances', 'pre_nms_top_n', 'post_nms_top_n', 'nms_thresh', 'min_size', 'eta', 'name'], varargs=None, keywords=None, defaults=(6000, 1000, 0.5, 0.1, 1.0, None))
paddle.fluid.layers.iou_similarity ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.box_coder ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
......
......@@ -29,6 +29,7 @@ target_assign_op.cu)
detection_library(polygon_box_transform_op SRCS polygon_box_transform_op.cc
polygon_box_transform_op.cu)
detection_library(rpn_target_assign_op SRCS rpn_target_assign_op.cc)
detection_library(generate_proposal_labels_op SRCS generate_proposal_labels_op.cc)
detection_library(generate_proposals_op SRCS generate_proposals_op.cc)
#Export local libraries to parent
set(DETECTION_LIBRARY ${LOCAL_DETECTION_LIBS} PARENT_SCOPE)
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <math.h>
#include <algorithm>
#include <string>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/gather.h"
#include "paddle/fluid/operators/math/concat.h"
#include "paddle/fluid/operators/math/math_function.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;
const int kBoxDim = 4;
template <typename T>
void AppendRois(LoDTensor* out, int64_t offset, Tensor* to_add) {
auto* out_data = out->data<T>();
auto* to_add_data = to_add->data<T>();
memcpy(out_data + offset, to_add_data, to_add->numel() * sizeof(T));
}
class GenerateProposalLabelsOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("RpnRois"),
"Input(RpnRois) shouldn't be null.");
PADDLE_ENFORCE(ctx->HasInput("GtClasses"),
"Input(GtClasses) shouldn't be null.");
PADDLE_ENFORCE(ctx->HasInput("GtBoxes"),
"Input(GtBoxes) shouldn't be null.");
PADDLE_ENFORCE(ctx->HasInput("ImScales"),
"Input(ImScales) shouldn't be null.");
PADDLE_ENFORCE(ctx->HasOutput("Rois"),
"Output(Rois) of RpnTargetAssignOp should not be null");
PADDLE_ENFORCE(
ctx->HasOutput("LabelsInt32"),
"Output(LabelsInt32) of RpnTargetAssignOp should not be null");
PADDLE_ENFORCE(
ctx->HasOutput("BboxTargets"),
"Output(BboxTargets) of RpnTargetAssignOp should not be null");
PADDLE_ENFORCE(
ctx->HasOutput("BboxInsideWeights"),
"Output(BboxInsideWeights) of RpnTargetAssignOp should not be null");
PADDLE_ENFORCE(
ctx->HasOutput("BboxOutsideWeights"),
"Output(BboxOutsideWeights) of RpnTargetAssignOp should not be null");
auto rpn_rois_dims = ctx->GetInputDim("RpnRois");
auto gt_classes_dims = ctx->GetInputDim("GtClasses");
auto gt_boxes_dims = ctx->GetInputDim("GtBoxes");
auto im_scales_dims = ctx->GetInputDim("ImScales");
PADDLE_ENFORCE_EQ(rpn_rois_dims.size(), 2,
"The rank of Input(RpnRois) must be 2.");
PADDLE_ENFORCE_EQ(gt_classes_dims.size(), 1,
"The rank of Input(GtClasses) must be 1.");
PADDLE_ENFORCE_EQ(gt_boxes_dims.size(), 2,
"The rank of Input(GtBoxes) must be 2.");
PADDLE_ENFORCE_EQ(im_scales_dims.size(), 1,
"The rank of Input(ImScales) must be 1.");
int class_nums = ctx->Attrs().Get<int>("class_nums");
ctx->SetOutputDim("Rois", {-1, 4});
ctx->SetOutputDim("LabelsInt32", {-1});
ctx->SetOutputDim("BboxTargets", {-1, 4 * class_nums});
ctx->SetOutputDim("BboxInsideWeights", {-1, 4 * class_nums});
ctx->SetOutputDim("BboxOutsideWeights", {-1, 4 * class_nums});
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
auto data_type = framework::GetDataTypeOfVar(ctx.InputVar("RpnRois"));
return framework::OpKernelType(data_type, platform::CPUPlace());
}
};
template <typename T>
void Concat(const platform::CPUDeviceContext& context,
const Tensor& in_tensor_a, const Tensor& in_tensor_b,
Tensor* out_tensor) {
int axis = 0;
std::vector<Tensor> inputs;
inputs.emplace_back(in_tensor_a);
inputs.emplace_back(in_tensor_b);
math::ConcatFunctor<platform::CPUDeviceContext, T> concat_functor;
concat_functor(context, inputs, axis, out_tensor);
}
template <typename T>
void BboxOverlaps(const Tensor& r_boxes, const Tensor& c_boxes,
Tensor* overlaps) {
auto r_boxes_et = framework::EigenTensor<T, 2>::From(r_boxes);
auto c_boxes_et = framework::EigenTensor<T, 2>::From(c_boxes);
auto overlaps_et = framework::EigenTensor<T, 2>::From(*overlaps);
int r_num = r_boxes.dims()[0];
int c_num = c_boxes.dims()[0];
auto zero = static_cast<T>(0.0);
T r_box_area, c_box_area, x_min, y_min, x_max, y_max, inter_w, inter_h,
inter_area;
for (int i = 0; i < r_num; ++i) {
r_box_area = (r_boxes_et(i, 2) - r_boxes_et(i, 0) + 1) *
(r_boxes_et(i, 3) - r_boxes_et(i, 1) + 1);
for (int j = 0; j < c_num; ++j) {
c_box_area = (c_boxes_et(j, 2) - c_boxes_et(j, 0) + 1) *
(c_boxes_et(j, 3) - c_boxes_et(j, 1) + 1);
x_min = std::max(r_boxes_et(i, 0), c_boxes_et(j, 0));
y_min = std::max(r_boxes_et(i, 1), c_boxes_et(j, 1));
x_max = std::min(r_boxes_et(i, 2), c_boxes_et(j, 2));
y_max = std::min(r_boxes_et(i, 3), c_boxes_et(j, 3));
inter_w = std::max(x_max - x_min + 1, zero);
inter_h = std::max(y_max - y_min + 1, zero);
inter_area = inter_w * inter_h;
overlaps_et(i, j) = inter_area / (r_box_area + c_box_area - inter_area);
}
}
}
template <typename T>
void BoxToDelta(int box_num, const Tensor& ex_boxes, const Tensor& gt_boxes,
const std::vector<float>& weights, Tensor* box_delta) {
auto ex_boxes_et = framework::EigenTensor<T, 2>::From(ex_boxes);
auto gt_boxes_et = framework::EigenTensor<T, 2>::From(gt_boxes);
auto box_delta_et = framework::EigenTensor<T, 2>::From(*box_delta);
T ex_w, ex_h, ex_ctr_x, ex_ctr_y, gt_w, gt_h, gt_ctr_x, gt_ctr_y;
for (int64_t i = 0; i < box_num; ++i) {
ex_w = ex_boxes_et(i, 2) - ex_boxes_et(i, 0) + 1;
ex_h = ex_boxes_et(i, 3) - ex_boxes_et(i, 1) + 1;
ex_ctr_x = ex_boxes_et(i, 0) + 0.5 * ex_w;
ex_ctr_y = ex_boxes_et(i, 1) + 0.5 * ex_h;
gt_w = gt_boxes_et(i, 2) - gt_boxes_et(i, 0) + 1;
gt_h = gt_boxes_et(i, 3) - gt_boxes_et(i, 1) + 1;
gt_ctr_x = gt_boxes_et(i, 0) + 0.5 * gt_w;
gt_ctr_y = gt_boxes_et(i, 1) + 0.5 * gt_h;
box_delta_et(i, 0) = (gt_ctr_x - ex_ctr_x) / ex_w / weights[0];
box_delta_et(i, 1) = (gt_ctr_y - ex_ctr_y) / ex_h / weights[1];
box_delta_et(i, 2) = log(gt_w / ex_w) / ex_w / weights[2];
box_delta_et(i, 3) = log(gt_h / ex_h) / ex_h / weights[3];
}
}
template <typename T>
std::vector<std::vector<int>> SampleFgBgGt(
const platform::CPUDeviceContext& context, Tensor* iou,
const int batch_size_per_im, const float fg_fraction, const float fg_thresh,
const float bg_thresh_hi, const float bg_thresh_lo,
std::minstd_rand engine) {
std::vector<int> fg_inds;
std::vector<int> bg_inds;
std::vector<int> gt_inds;
T* proposal_to_gt_overlaps = iou->mutable_data<T>(context.GetPlace());
int64_t row = iou->dims()[0];
int64_t col = iou->dims()[1];
float epsilon = 0.00001;
// Follow the Faster RCNN's implementation
for (int64_t i = 0; i < row; ++i) {
const T* v = proposal_to_gt_overlaps + i * col;
T max_overlap = *std::max_element(v, v + col);
if (max_overlap > fg_thresh) {
for (int64_t j = 0; j < col; ++j) {
T val = proposal_to_gt_overlaps[i * col + j];
auto diff = std::abs(max_overlap - val);
if (diff < epsilon) {
fg_inds.emplace_back(i);
gt_inds.emplace_back(j);
break;
}
}
} else {
if ((max_overlap >= bg_thresh_lo) && (max_overlap < bg_thresh_hi)) {
bg_inds.emplace_back(i);
}
}
}
// Reservoir Sampling
int fg_rois_per_im = std::floor(batch_size_per_im * fg_fraction);
int fg_rois_this_image = fg_inds.size();
int fg_rois_per_this_image = std::min(fg_rois_per_im, fg_rois_this_image);
std::uniform_real_distribution<float> uniform(0, 1);
const int64_t fg_size = static_cast<int64_t>(fg_inds.size());
if (fg_size > fg_rois_per_this_image) {
for (int64_t i = fg_rois_per_this_image; i < fg_size; ++i) {
int rng_ind = std::floor(uniform(engine) * i);
if (rng_ind < fg_rois_per_this_image) {
std::iter_swap(fg_inds.begin() + rng_ind, fg_inds.begin() + i);
std::iter_swap(gt_inds.begin() + rng_ind, gt_inds.begin() + i);
}
}
}
std::vector<int> new_fg_inds(fg_inds.begin(),
fg_inds.begin() + fg_rois_per_this_image);
std::vector<int> new_gt_inds(gt_inds.begin(),
gt_inds.begin() + fg_rois_per_this_image);
int bg_rois_per_image = batch_size_per_im - fg_rois_per_this_image;
int bg_rois_this_image = bg_inds.size();
int bg_rois_per_this_image = std::min(bg_rois_per_image, bg_rois_this_image);
const int64_t bg_size = static_cast<int64_t>(bg_inds.size());
if (bg_size > bg_rois_per_this_image) {
for (int64_t i = bg_rois_per_this_image; i < bg_size; ++i) {
int rng_ind = std::floor(uniform(engine) * i);
if (rng_ind < fg_rois_per_this_image)
std::iter_swap(bg_inds.begin() + rng_ind, bg_inds.begin() + i);
}
}
std::vector<int> new_bg_inds(bg_inds.begin(),
bg_inds.begin() + bg_rois_per_this_image);
std::vector<std::vector<int>> res;
res.emplace_back(new_fg_inds);
res.emplace_back(new_bg_inds);
res.emplace_back(new_gt_inds);
return res;
}
template <typename T>
void GatherBoxesLabels(const platform::CPUDeviceContext& context,
const Tensor& boxes, const Tensor& gt_boxes,
const Tensor& gt_classes,
const std::vector<int>& fg_inds,
const std::vector<int>& bg_inds,
const std::vector<int>& gt_inds, Tensor* sampled_boxes,
Tensor* sampled_labels, Tensor* sampled_gts) {
int fg_num = fg_inds.size();
int bg_num = bg_inds.size();
int gt_num = fg_num + bg_num;
Tensor fg_inds_t, bg_inds_t, gt_box_inds_t, gt_label_inds_t;
int* fg_inds_data = fg_inds_t.mutable_data<int>({fg_num}, context.GetPlace());
int* bg_inds_data = bg_inds_t.mutable_data<int>({bg_num}, context.GetPlace());
int* gt_box_inds_data =
gt_box_inds_t.mutable_data<int>({gt_num}, context.GetPlace());
int* gt_label_inds_data =
gt_label_inds_t.mutable_data<int>({fg_num}, context.GetPlace());
std::copy(fg_inds.begin(), fg_inds.end(), fg_inds_data);
std::copy(bg_inds.begin(), bg_inds.end(), bg_inds_data);
std::copy(gt_inds.begin(), gt_inds.end(), gt_box_inds_data);
std::copy(gt_inds.begin(), gt_inds.end(), gt_label_inds_data);
Tensor fg_boxes, bg_boxes, fg_labels, bg_labels;
fg_boxes.mutable_data<T>({fg_num, kBoxDim}, context.GetPlace());
CPUGather<T>(context, boxes, fg_inds_t, &fg_boxes);
bg_boxes.mutable_data<T>({bg_num, kBoxDim}, context.GetPlace());
CPUGather<T>(context, boxes, bg_inds_t, &bg_boxes);
Concat<T>(context, fg_boxes, bg_boxes, sampled_boxes);
CPUGather<T>(context, gt_boxes, gt_box_inds_t, sampled_gts);
fg_labels.mutable_data<int>({fg_num}, context.GetPlace());
CPUGather<int>(context, gt_classes, gt_label_inds_t, &fg_labels);
bg_labels.mutable_data<int>({bg_num}, context.GetPlace());
math::set_constant(context, &bg_labels, 0);
Concat<int>(context, fg_labels, bg_labels, sampled_labels);
}
template <typename T>
std::vector<Tensor> SampleRoisForOneImage(
const platform::CPUDeviceContext& context, Tensor* rpn_rois,
Tensor* gt_classes, Tensor* gt_boxes, Tensor* im_scale,
const int batch_size_per_im, const float fg_fraction, const float fg_thresh,
const float bg_thresh_hi, const float bg_thresh_lo,
const std::vector<float>& bbox_reg_weights, const int class_nums,
std::minstd_rand engine) {
auto rpn_rois_et = framework::EigenTensor<T, 2>::From(*rpn_rois);
auto im_scale_data = im_scale->data<T>()[0];
rpn_rois_et = rpn_rois_et / im_scale_data;
Tensor boxes;
int proposals_num = gt_boxes->dims()[0] + rpn_rois->dims()[0];
boxes.mutable_data<T>({proposals_num, kBoxDim}, context.GetPlace());
Concat<T>(context, *gt_boxes, *rpn_rois, &boxes);
// Overlaps
Tensor proposal_to_gt_overlaps;
proposal_to_gt_overlaps.mutable_data<T>({proposals_num, gt_boxes->dims()[0]},
context.GetPlace());
BboxOverlaps<T>(boxes, *gt_boxes, &proposal_to_gt_overlaps);
// Generate proposal index
std::vector<std::vector<int>> fg_bg_gt = SampleFgBgGt<T>(
context, &proposal_to_gt_overlaps, batch_size_per_im, fg_fraction,
fg_thresh, bg_thresh_hi, bg_thresh_lo, engine);
std::vector<int> fg_inds = fg_bg_gt[0];
std::vector<int> bg_inds = fg_bg_gt[1];
std::vector<int> gt_inds = fg_bg_gt[2];
// Gather boxes and labels
Tensor sampled_boxes, sampled_labels, sampled_gts;
int boxes_num = fg_inds.size() + bg_inds.size();
framework::DDim bbox_dim({boxes_num, kBoxDim});
sampled_boxes.mutable_data<T>(bbox_dim, context.GetPlace());
sampled_labels.mutable_data<int>({boxes_num}, context.GetPlace());
sampled_gts.mutable_data<T>(bbox_dim, context.GetPlace());
GatherBoxesLabels<T>(context, boxes, *gt_boxes, *gt_classes, fg_inds, bg_inds,
gt_inds, &sampled_boxes, &sampled_labels, &sampled_gts);
// Compute targets
Tensor bbox_targets_single;
bbox_targets_single.mutable_data<T>(bbox_dim, context.GetPlace());
BoxToDelta<T>(boxes_num, sampled_boxes, sampled_gts, bbox_reg_weights,
&bbox_targets_single);
// Scale rois
Tensor sampled_rois;
sampled_rois.mutable_data<T>(sampled_boxes.dims(), context.GetPlace());
auto sampled_rois_et = framework::EigenTensor<T, 2>::From(sampled_rois);
auto sampled_boxes_et = framework::EigenTensor<T, 2>::From(sampled_boxes);
sampled_rois_et = sampled_boxes_et * im_scale_data;
// Expand box targets
Tensor bbox_targets, bbox_inside_weights, bbox_outside_weights;
framework::DDim bbox_expand_dim({boxes_num, kBoxDim * class_nums});
bbox_targets.mutable_data<T>(bbox_expand_dim, context.GetPlace());
bbox_inside_weights.mutable_data<T>(bbox_expand_dim, context.GetPlace());
bbox_outside_weights.mutable_data<T>(bbox_expand_dim, context.GetPlace());
math::set_constant(context, &bbox_targets, 0.0);
math::set_constant(context, &bbox_inside_weights, 0.0);
math::set_constant(context, &bbox_outside_weights, 0.0);
auto* bbox_targets_single_data = bbox_targets_single.data<T>();
auto* sampled_labels_data = sampled_labels.data<int>();
auto* bbox_targets_data = bbox_targets.data<T>();
auto* bbox_inside_weights_data = bbox_inside_weights.data<T>();
auto* bbox_outside_weights_data = bbox_outside_weights.data<T>();
int width = kBoxDim * class_nums;
for (int64_t i = 0; i < boxes_num; ++i) {
int label = sampled_labels_data[i];
if (label > 0) {
int dst_idx = i * width + kBoxDim * label;
int src_idx = kBoxDim * i;
bbox_targets_data[dst_idx] = bbox_targets_single_data[src_idx];
bbox_targets_data[dst_idx + 1] = bbox_targets_single_data[src_idx + 1];
bbox_targets_data[dst_idx + 2] = bbox_targets_single_data[src_idx + 2];
bbox_targets_data[dst_idx + 3] = bbox_targets_single_data[src_idx + 3];
bbox_inside_weights_data[dst_idx] = 1;
bbox_inside_weights_data[dst_idx + 1] = 1;
bbox_inside_weights_data[dst_idx + 2] = 1;
bbox_inside_weights_data[dst_idx + 3] = 1;
bbox_outside_weights_data[dst_idx] = 1;
bbox_outside_weights_data[dst_idx + 1] = 1;
bbox_outside_weights_data[dst_idx + 2] = 1;
bbox_outside_weights_data[dst_idx + 3] = 1;
}
}
std::vector<Tensor> res;
res.emplace_back(sampled_rois);
res.emplace_back(sampled_labels);
res.emplace_back(bbox_targets);
res.emplace_back(bbox_inside_weights);
res.emplace_back(bbox_outside_weights);
return res;
}
template <typename T>
class GenerateProposalLabelsKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* rpn_rois = context.Input<LoDTensor>("RpnRois");
auto* gt_classes = context.Input<LoDTensor>("GtClasses");
auto* gt_boxes = context.Input<LoDTensor>("GtBoxes");
auto* im_scales = context.Input<LoDTensor>("ImScales");
auto* rois = context.Output<LoDTensor>("Rois");
auto* labels_int32 = context.Output<LoDTensor>("LabelsInt32");
auto* bbox_targets = context.Output<LoDTensor>("BboxTargets");
auto* bbox_inside_weights = context.Output<LoDTensor>("BboxInsideWeights");
auto* bbox_outside_weights =
context.Output<LoDTensor>("BboxOutsideWeights");
int batch_size_per_im = context.Attr<int>("batch_size_per_im");
float fg_fraction = context.Attr<float>("fg_fraction");
float fg_thresh = context.Attr<float>("fg_thresh");
float bg_thresh_hi = context.Attr<float>("bg_thresh_hi");
float bg_thresh_lo = context.Attr<float>("bg_thresh_lo");
std::vector<float> bbox_reg_weights =
context.Attr<std::vector<float>>("bbox_reg_weights");
int class_nums = context.Attr<int>("class_nums");
PADDLE_ENFORCE_EQ(rpn_rois->lod().size(), 1UL,
"GenerateProposalLabelsOp rpn_rois needs 1 level of LoD");
PADDLE_ENFORCE_EQ(
gt_classes->lod().size(), 1UL,
"GenerateProposalLabelsOp gt_classes needs 1 level of LoD");
PADDLE_ENFORCE_EQ(gt_boxes->lod().size(), 1UL,
"GenerateProposalLabelsOp gt_boxes needs 1 level of LoD");
int64_t n = static_cast<int64_t>(rpn_rois->lod().back().size() - 1);
rois->mutable_data<T>({n * batch_size_per_im, kBoxDim}, context.GetPlace());
labels_int32->mutable_data<int>({n * batch_size_per_im},
context.GetPlace());
bbox_targets->mutable_data<T>({n * batch_size_per_im, kBoxDim * class_nums},
context.GetPlace());
bbox_inside_weights->mutable_data<T>(
{n * batch_size_per_im, kBoxDim * class_nums}, context.GetPlace());
bbox_outside_weights->mutable_data<T>(
{n * batch_size_per_im, kBoxDim * class_nums}, context.GetPlace());
std::random_device rnd;
std::minstd_rand engine;
int seed =
context.Attr<bool>("fix_seed") ? context.Attr<int>("seed") : rnd();
engine.seed(seed);
framework::LoD lod;
std::vector<size_t> lod0(1, 0);
int64_t num_rois = 0;
auto& dev_ctx = context.device_context<platform::CPUDeviceContext>();
auto rpn_rois_lod = rpn_rois->lod().back();
auto gt_classes_lod = gt_classes->lod().back();
auto gt_boxes_lod = gt_boxes->lod().back();
for (size_t i = 0; i < n; ++i) {
Tensor rpn_rois_slice =
rpn_rois->Slice(rpn_rois_lod[i], rpn_rois_lod[i + 1]);
Tensor gt_classes_slice =
gt_classes->Slice(gt_classes_lod[i], gt_classes_lod[i + 1]);
Tensor gt_boxes_slice =
gt_boxes->Slice(gt_boxes_lod[i], gt_boxes_lod[i + 1]);
Tensor im_scales_slice = im_scales->Slice(i, i + 1);
std::vector<Tensor> tensor_output = SampleRoisForOneImage<T>(
dev_ctx, &rpn_rois_slice, &gt_classes_slice, &gt_boxes_slice,
&im_scales_slice, batch_size_per_im, fg_fraction, fg_thresh,
bg_thresh_hi, bg_thresh_lo, bbox_reg_weights, class_nums, engine);
Tensor sampled_rois = tensor_output[0];
Tensor sampled_labels_int32 = tensor_output[1];
Tensor sampled_bbox_targets = tensor_output[2];
Tensor sampled_bbox_inside_weights = tensor_output[3];
Tensor sampled_bbox_outside_weights = tensor_output[4];
AppendRois<T>(rois, kBoxDim * num_rois, &sampled_rois);
AppendRois<int>(labels_int32, num_rois, &sampled_labels_int32);
AppendRois<T>(bbox_targets, kBoxDim * num_rois * class_nums,
&sampled_bbox_targets);
AppendRois<T>(bbox_inside_weights, kBoxDim * num_rois * class_nums,
&sampled_bbox_inside_weights);
AppendRois<T>(bbox_outside_weights, kBoxDim * num_rois * class_nums,
&sampled_bbox_outside_weights);
num_rois += sampled_rois.dims()[0];
lod0.emplace_back(num_rois);
}
lod.emplace_back(lod0);
rois->set_lod(lod);
labels_int32->set_lod(lod);
bbox_targets->set_lod(lod);
bbox_inside_weights->set_lod(lod);
bbox_outside_weights->set_lod(lod);
rois->Resize({num_rois, kBoxDim});
labels_int32->Resize({num_rois});
bbox_targets->Resize({num_rois, kBoxDim * class_nums});
bbox_inside_weights->Resize({num_rois, kBoxDim * class_nums});
bbox_outside_weights->Resize({num_rois, kBoxDim * class_nums});
}
};
class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
// TODO(buxingyuan): Add Document
AddInput("RpnRois", "RpnRois.");
AddInput("GtClasses", "GtClasses.");
AddInput("GtBoxes", "GtBoxes.");
AddInput("ImScales", "ImScales.");
AddOutput("Rois", "Rois.");
AddOutput("LabelsInt32", "LabelsInt32.");
AddOutput("BboxTargets", "BboxTargets.");
AddOutput("BboxInsideWeights", "BboxInsideWeights.");
AddOutput("BboxOutsideWeights", "BboxOutsideWeights.");
AddAttr<int>("batch_size_per_im", "batch_size_per_im");
AddAttr<float>("fg_fraction", "fg_fraction");
AddAttr<float>("fg_thresh", "fg_thresh");
AddAttr<float>("bg_thresh_hi", "bg_thresh_hi");
AddAttr<float>("bg_thresh_lo", "bg_thresh_lo");
AddAttr<std::vector<float>>("bbox_reg_weights", "bbox_reg_weights");
AddAttr<int>("class_nums", "class_nums");
AddAttr<bool>("fix_seed", "fix_seed").SetDefault(false);
AddAttr<int>("seed", "seed").SetDefault(0);
AddComment(R"DOC(
Generate Proposals Labels Operator.
)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(generate_proposal_labels, ops::GenerateProposalLabelsOp,
ops::GenerateProposalLabelsOpMaker,
paddle::framework::EmptyGradOpMaker);
REGISTER_OP_CPU_KERNEL(generate_proposal_labels,
ops::GenerateProposalLabelsKernel<float>,
ops::GenerateProposalLabelsKernel<double>);
......@@ -86,7 +86,7 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> {
std::minstd_rand engine,
std::vector<int>* inds) const {
std::uniform_real_distribution<float> uniform(0, 1);
const int64_t size = static_cast<int64_t>(inds->size());
const int64_t size = static_cast<int64_t>(inds->size() - offset);
if (size > num) {
for (int64_t i = num; i < size; ++i) {
int rng_ind = std::floor(uniform(engine) * i);
......@@ -126,7 +126,7 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> {
neg_threshold, target_label_data, fg_inds, bg_inds);
// Reservoir Sampling
ReservoirSampling(fg_num, fg_offset, engine, fg_inds);
int bg_num = rpn_batch_size - fg_inds->size();
int bg_num = rpn_batch_size - (fg_inds->size() - fg_offset);
ReservoirSampling(bg_num, bg_offset, engine, bg_inds);
}
......
......@@ -101,5 +101,8 @@ namespace ops = paddle::operators;
REGISTER_OPERATOR(gather, ops::GatherOp, ops::GatherOpMaker,
paddle::framework::DefaultGradOpDescMaker<true>);
REGISTER_OPERATOR(gather_grad, ops::GatherGradOp);
REGISTER_OP_CPU_KERNEL(gather, ops::GatherOpKernel<float>);
REGISTER_OP_CPU_KERNEL(gather_grad, ops::GatherGradientOpKernel<float>);
REGISTER_OP_CPU_KERNEL(gather, ops::GatherOpKernel<float>,
ops::GatherOpKernel<int>, ops::GatherOpKernel<double>);
REGISTER_OP_CPU_KERNEL(gather_grad, ops::GatherGradientOpKernel<float>,
ops::GatherGradientOpKernel<int>,
ops::GatherGradientOpKernel<double>);
......@@ -39,6 +39,7 @@ __all__ = [
'detection_map',
'rpn_target_assign',
'anchor_generator',
'generate_proposal_labels',
'generate_proposals',
]
......@@ -1256,6 +1257,64 @@ def anchor_generator(input,
return anchor, var
def generate_proposal_labels(rpn_rois,
gt_classes,
gt_boxes,
im_scales,
batch_size_per_im=256,
fg_fraction=0.25,
fg_thresh=0.25,
bg_thresh_hi=0.5,
bg_thresh_lo=0.0,
bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
class_nums=None):
"""
** Generate proposal labels Faster-RCNN **
TODO(buxingyuan): Add Document
"""
helper = LayerHelper('generate_proposal_labels', **locals())
rois = helper.create_tmp_variable(dtype=rpn_rois.dtype)
labels_int32 = helper.create_tmp_variable(dtype=gt_classes.dtype)
bbox_targets = helper.create_tmp_variable(dtype=rpn_rois.dtype)
bbox_inside_weights = helper.create_tmp_variable(dtype=rpn_rois.dtype)
bbox_outside_weights = helper.create_tmp_variable(dtype=rpn_rois.dtype)
helper.append_op(
type="generate_proposal_labels",
inputs={
'RpnRois': rpn_rois,
'GtClasses': gt_classes,
'GtBoxes': gt_boxes,
'ImScales': im_scales
},
outputs={
'Rois': rois,
'LabelsInt32': labels_int32,
'BboxTargets': bbox_targets,
'BboxInsideWeights': bbox_inside_weights,
'BboxOutsideWeights': bbox_outside_weights
},
attrs={
'batch_size_per_im': batch_size_per_im,
'fg_fraction': fg_fraction,
'fg_thresh': fg_thresh,
'bg_thresh_hi': bg_thresh_hi,
'bg_thresh_lo': bg_thresh_lo,
'bbox_reg_weights': bbox_reg_weights,
'class_nums': class_nums
})
rois.stop_gradient = True
labels_int32.stop_gradient = True
bbox_targets.stop_gradient = True
bbox_inside_weights.stop_gradient = True
bbox_outside_weights.stop_gradient = True
return rois, labels_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights
def generate_proposals(scores,
bbox_deltas,
im_info,
......
......@@ -146,6 +146,55 @@ class TestAnchorGenerator(unittest.TestCase):
assert anchor.shape[3] == 4
class TestGenerateProposalLabels(unittest.TestCase):
def test_generate_proposal_labels(self):
rpn_rois = layers.data(
name='rpn_rois',
shape=[4, 4],
dtype='float32',
lod_level=1,
append_batch_size=False)
gt_classes = layers.data(
name='gt_classes',
shape=[6],
dtype='int32',
lod_level=1,
append_batch_size=False)
gt_boxes = layers.data(
name='gt_boxes',
shape=[6, 4],
dtype='float32',
lod_level=1,
append_batch_size=False)
im_scales = layers.data(
name='im_scales',
shape=[1],
dtype='float32',
lod_level=1,
append_batch_size=False)
class_nums = 5
rois, labels_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights = fluid.layers.generate_proposal_labels(
rpn_rois=rpn_rois,
gt_classes=gt_classes,
gt_boxes=gt_boxes,
im_scales=im_scales,
batch_size_per_im=2,
fg_fraction=0.5,
fg_thresh=0.5,
bg_thresh_hi=0.5,
bg_thresh_lo=0.0,
bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
class_nums=class_nums)
assert rois.shape[1] == 4
assert rois.shape[0] == labels_int32.shape[0]
assert rois.shape[0] == bbox_targets.shape[0]
assert rois.shape[0] == bbox_inside_weights.shape[0]
assert rois.shape[0] == bbox_outside_weights.shape[0]
assert bbox_targets.shape[1] == 4 * class_nums
assert bbox_inside_weights.shape[1] == 4 * class_nums
assert bbox_outside_weights.shape[1] == 4 * class_nums
class TestMultiBoxHead(unittest.TestCase):
def test_multi_box_head(self):
data_shape = [3, 224, 224]
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://w_idxw.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
import sys
import math
import paddle.fluid as fluid
from op_test import OpTest
def generate_proposal_labels_in_python(
rpn_rois, gt_classes, gt_boxes, im_scales, batch_size_per_im,
fg_fraction, fg_thresh, bg_thresh_hi, bg_thresh_lo, bbox_reg_weights,
class_nums):
rois = []
labels_int32 = []
bbox_targets = []
bbox_inside_weights = []
bbox_outside_weights = []
lod = []
assert len(rpn_rois) == len(
im_scales), 'batch size of rpn_rois and ground_truth is not matched'
for im_i in range(len(im_scales)):
frcn_blobs = _sample_rois(
rpn_rois[im_i], gt_classes[im_i], gt_boxes[im_i], im_scales[im_i],
batch_size_per_im, fg_fraction, fg_thresh, bg_thresh_hi,
bg_thresh_lo, bbox_reg_weights, class_nums)
lod.append(frcn_blobs['rois'].shape[0])
rois.append(frcn_blobs['rois'])
labels_int32.append(frcn_blobs['labels_int32'])
bbox_targets.append(frcn_blobs['bbox_targets'])
bbox_inside_weights.append(frcn_blobs['bbox_inside_weights'])
bbox_outside_weights.append(frcn_blobs['bbox_outside_weights'])
return rois, labels_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights, lod
def _sample_rois(rpn_rois, gt_classes, gt_boxes, im_scale, batch_size_per_im,
fg_fraction, fg_thresh, bg_thresh_hi, bg_thresh_lo,
bbox_reg_weights, class_nums):
rois_per_image = int(batch_size_per_im)
fg_rois_per_im = int(np.round(fg_fraction * rois_per_image))
# Roidb
inv_im_scale = 1. / im_scale
rpn_rois = rpn_rois * inv_im_scale
boxes = np.vstack([gt_boxes, rpn_rois])
gt_overlaps = np.zeros((boxes.shape[0], class_nums))
box_to_gt_ind_map = np.zeros((boxes.shape[0]), dtype=np.int32)
if len(gt_boxes) > 0:
proposal_to_gt_overlaps = _bbox_overlaps(boxes, gt_boxes)
overlaps_argmax = proposal_to_gt_overlaps.argmax(axis=1)
overlaps_max = proposal_to_gt_overlaps.max(axis=1)
# Boxes which with non-zero overlap with gt boxes
overlapped_boxes_ind = np.where(overlaps_max > 0)[0]
overlapped_boxes_gt_classes = gt_classes[overlaps_argmax[
overlapped_boxes_ind]]
gt_overlaps[overlapped_boxes_ind,
overlapped_boxes_gt_classes] = overlaps_max[
overlapped_boxes_ind]
box_to_gt_ind_map[overlapped_boxes_ind] = overlaps_argmax[
overlapped_boxes_ind]
max_overlaps = gt_overlaps.max(axis=1)
max_classes = gt_overlaps.argmax(axis=1)
# Foreground
fg_inds = np.where(max_overlaps >= fg_thresh)[0]
fg_rois_per_this_image = np.minimum(fg_rois_per_im, fg_inds.shape[0])
# Sample foreground if there are too many
if fg_inds.shape[0] > fg_rois_per_this_image:
fg_inds = np.random.choice(
fg_inds, size=fg_rois_per_this_image, replace=False)
# Background
bg_inds = np.where((max_overlaps < bg_thresh_hi) & (max_overlaps >=
bg_thresh_lo))[0]
bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
bg_rois_per_this_image = np.minimum(bg_rois_per_this_image,
bg_inds.shape[0])
# Sample background if there are too many
if bg_inds.shape[0] > bg_rois_per_this_image:
bg_inds = np.random.choice(
bg_inds, size=bg_rois_per_this_image, replace=False)
keep_inds = np.append(fg_inds, bg_inds)
sampled_labels = max_classes[keep_inds]
sampled_labels[fg_rois_per_this_image:] = 0
sampled_boxes = boxes[keep_inds]
sampled_gts = gt_boxes[box_to_gt_ind_map[keep_inds]]
sampled_gts[fg_rois_per_this_image:, :] = gt_boxes[0]
bbox_label_targets = _compute_targets(sampled_boxes, sampled_gts,
sampled_labels, bbox_reg_weights)
bbox_targets, bbox_inside_weights = _expand_bbox_targets(bbox_label_targets,
class_nums)
bbox_outside_weights = np.array(
bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype)
# Scale rois
sampled_rois = sampled_boxes * im_scale
# Faster RCNN blobs
frcn_blobs = dict(
rois=sampled_rois,
labels_int32=sampled_labels,
bbox_targets=bbox_targets,
bbox_inside_weights=bbox_inside_weights,
bbox_outside_weights=bbox_outside_weights)
return frcn_blobs
def _bbox_overlaps(roi_boxes, gt_boxes):
w1 = np.maximum(roi_boxes[:, 2] - roi_boxes[:, 0] + 1, 0)
h1 = np.maximum(roi_boxes[:, 3] - roi_boxes[:, 1] + 1, 0)
w2 = np.maximum(gt_boxes[:, 2] - gt_boxes[:, 0] + 1, 0)
h2 = np.maximum(gt_boxes[:, 3] - gt_boxes[:, 1] + 1, 0)
area1 = w1 * h1
area2 = w2 * h2
overlaps = np.zeros((roi_boxes.shape[0], gt_boxes.shape[0]))
for ind1 in range(roi_boxes.shape[0]):
for ind2 in range(gt_boxes.shape[0]):
inter_x1 = np.maximum(roi_boxes[ind1, 0], gt_boxes[ind2, 0])
inter_y1 = np.maximum(roi_boxes[ind1, 1], gt_boxes[ind2, 1])
inter_x2 = np.minimum(roi_boxes[ind1, 2], gt_boxes[ind2, 2])
inter_y2 = np.minimum(roi_boxes[ind1, 3], gt_boxes[ind2, 3])
inter_w = np.maximum(inter_x2 - inter_x1 + 1, 0)
inter_h = np.maximum(inter_y2 - inter_y1 + 1, 0)
inter_area = inter_w * inter_h
iou = inter_area / (area1[ind1] + area2[ind2] - inter_area)
overlaps[ind1, ind2] = iou
return overlaps
def _compute_targets(roi_boxes, gt_boxes, labels, bbox_reg_weights):
assert roi_boxes.shape[0] == gt_boxes.shape[0]
assert roi_boxes.shape[1] == 4
assert gt_boxes.shape[1] == 4
targets = np.zeros(roi_boxes.shape)
bbox_reg_weights = np.asarray(bbox_reg_weights)
targets = _box_to_delta(
ex_boxes=roi_boxes, gt_boxes=gt_boxes, weights=bbox_reg_weights)
return np.hstack([labels[:, np.newaxis], targets]).astype(
np.float32, copy=False)
def _box_to_delta(ex_boxes, gt_boxes, weights):
ex_w = ex_boxes[:, 2] - ex_boxes[:, 0] + 1
ex_h = ex_boxes[:, 3] - ex_boxes[:, 1] + 1
ex_ctr_x = ex_boxes[:, 0] + 0.5 * ex_w
ex_ctr_y = ex_boxes[:, 1] + 0.5 * ex_h
gt_w = gt_boxes[:, 2] - gt_boxes[:, 0] + 1
gt_h = gt_boxes[:, 3] - gt_boxes[:, 1] + 1
gt_ctr_x = gt_boxes[:, 0] + 0.5 * gt_w
gt_ctr_y = gt_boxes[:, 1] + 0.5 * gt_h
dx = (gt_ctr_x - ex_ctr_x) / ex_w / weights[0]
dy = (gt_ctr_y - ex_ctr_y) / ex_h / weights[1]
dw = (np.log(gt_w / ex_w)) / ex_w / weights[2]
dh = (np.log(gt_h / ex_h)) / ex_h / weights[3]
targets = np.vstack([dx, dy, dw, dh]).transpose()
return targets
def _expand_bbox_targets(bbox_targets_input, class_nums):
class_labels = bbox_targets_input[:, 0]
fg_inds = np.where(class_labels > 0)[0]
bbox_targets = np.zeros((class_labels.shape[0], 4 * class_nums))
bbox_inside_weights = np.zeros(bbox_targets.shape)
for ind in fg_inds:
class_label = int(class_labels[ind])
start_ind = class_label * 4
end_ind = class_label * 4 + 4
bbox_targets[ind, start_ind:end_ind] = bbox_targets_input[ind, 1:]
bbox_inside_weights[ind, start_ind:end_ind] = (1.0, 1.0, 1.0, 1.0)
return bbox_targets, bbox_inside_weights
class TestGenerateProposalLabelsOp(OpTest):
def set_data(self):
self.init_test_params()
self.init_test_input()
self.init_test_output()
self.inputs = {
'RpnRois': (self.rpn_rois[0], self.rpn_rois_lod),
'GtClasses': (self.gt_classes[0], self.gts_lod),
'GtBoxes': (self.gt_boxes[0], self.gts_lod),
'ImScales': self.im_scales[0]
}
self.attrs = {
'batch_size_per_im': self.batch_size_per_im,
'fg_fraction': self.fg_fraction,
'fg_thresh': self.fg_thresh,
'bg_thresh_hi': self.bg_thresh_hi,
'bg_thresh_lo': self.bg_thresh_lo,
'bbox_reg_weights': self.bbox_reg_weights,
'class_nums': self.class_nums
}
self.outputs = {
'Rois': (self.rois[0], [self.lod]),
'LabelsInt32': (self.labels_int32[0], [self.lod]),
'BboxTargets': (self.bbox_targets[0], [self.lod]),
'BboxInsideWeights': (self.bbox_inside_weights[0], [self.lod]),
'BboxOutsideWeights': (self.bbox_outside_weights[0], [self.lod]),
}
def test_check_output(self):
self.check_output()
def setUp(self):
self.op_type = 'generate_proposal_labels'
self.set_data()
def init_test_params(self):
self.batch_size_per_im = 10
self.fg_fraction = 1.0
self.fg_thresh = 0.5
self.bg_thresh_hi = 0.5
self.bg_thresh_lo = 0.0
self.bbox_reg_weights = [0.1, 0.1, 0.2, 0.2]
self.class_nums = 81
def init_test_input(self):
np.random.seed(0)
image_nums = 1
gt_nums = 6 # Keep same with batch_size_per_im for unittest
proposal_nums = self.batch_size_per_im - gt_nums
images_shape = []
self.im_scales = []
for i in range(image_nums):
images_shape.append(np.random.randint(200, size=2))
self.im_scales.append(np.ones((1)).astype(np.float32))
self.rpn_rois, self.rpn_rois_lod = _generate_proposals(images_shape,
proposal_nums)
ground_truth, self.gts_lod = _generate_groundtruth(
images_shape, self.class_nums, gt_nums)
self.gt_classes = [gt['gt_classes'] for gt in ground_truth]
self.gt_boxes = [gt['boxes'] for gt in ground_truth]
def init_test_output(self):
self.rois, self.labels_int32, self.bbox_targets, \
self.bbox_inside_weights, self.bbox_outside_weights, \
self.lod = generate_proposal_labels_in_python(
self.rpn_rois, self.gt_classes, self.gt_boxes, self.im_scales,
self.batch_size_per_im, self.fg_fraction,
self.fg_thresh, self.bg_thresh_hi, self.bg_thresh_lo,
self.bbox_reg_weights, self.class_nums
)
def _generate_proposals(images_shape, proposal_nums):
rpn_rois = []
rpn_rois_lod = []
num_proposals = 0
for i, image_shape in enumerate(images_shape):
proposals = _generate_boxes(image_shape, proposal_nums)
rpn_rois.append(proposals)
num_proposals += len(proposals)
rpn_rois_lod.append(num_proposals)
return rpn_rois, [rpn_rois_lod]
def _generate_groundtruth(images_shape, class_nums, gt_nums):
ground_truth = []
gts_lod = []
num_gts = 0
for i, image_shape in enumerate(images_shape):
# Avoid background
gt_classes = np.random.randint(
low=1, high=class_nums, size=gt_nums).astype(np.int32)
gt_boxes = _generate_boxes(image_shape, gt_nums)
ground_truth.append(dict(gt_classes=gt_classes, boxes=gt_boxes))
num_gts += len(gt_classes)
gts_lod.append(num_gts)
return ground_truth, [gts_lod]
def _generate_boxes(image_size, box_nums):
width = image_size[0]
height = image_size[1]
xywh = np.random.rand(box_nums, 4)
xy1 = xywh[:, [0, 1]] * image_size
wh = xywh[:, [2, 3]] * (image_size - xy1)
xy2 = xy1 + wh
boxes = np.hstack([xy1, xy2])
boxes[:, [0, 2]] = np.minimum(width - 1., np.maximum(0., boxes[:, [0, 2]]))
boxes[:, [1, 3]] = np.minimum(height - 1., np.maximum(0., boxes[:, [1, 3]]))
return boxes.astype(np.float32)
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册