提交 9e2e893f 编写于 作者: X Xingyuan Bu 提交者: qingqing01

Enhence generate_proposal_labels_op and fix some bug. (#13239)

* Enhence generate_proposal_labels_op
* Fix bug in generate_proposals_op
* Refine rpn_target_assign_op.
* by Bu Xingyuan, Wang Guanzhong and Dang Qingqing
上级 23ec966c
......@@ -305,9 +305,9 @@ paddle.fluid.layers.target_assign ArgSpec(args=['input', 'matched_indices', 'neg
paddle.fluid.layers.detection_output ArgSpec(args=['loc', 'scores', 'prior_box', 'prior_box_var', 'background_label', 'nms_threshold', 'nms_top_k', 'keep_top_k', 'score_threshold', 'nms_eta'], varargs=None, keywords=None, defaults=(0, 0.3, 400, 200, 0.01, 1.0))
paddle.fluid.layers.ssd_loss ArgSpec(args=['location', 'confidence', 'gt_box', 'gt_label', 'prior_box', 'prior_box_var', 'background_label', 'overlap_threshold', 'neg_pos_ratio', 'neg_overlap', 'loc_loss_weight', 'conf_loss_weight', 'match_type', 'mining_type', 'normalize', 'sample_size'], varargs=None, keywords=None, defaults=(None, 0, 0.5, 3.0, 0.5, 1.0, 1.0, 'per_prediction', 'max_negative', True, None))
paddle.fluid.layers.detection_map ArgSpec(args=['detect_res', 'label', 'class_num', 'background_label', 'overlap_threshold', 'evaluate_difficult', 'has_state', 'input_states', 'out_states', 'ap_version'], varargs=None, keywords=None, defaults=(0, 0.3, True, None, None, None, 'integral'))
paddle.fluid.layers.rpn_target_assign ArgSpec(args=['loc', 'scores', 'anchor_box', 'anchor_var', 'gt_box', 'rpn_batch_size_per_im', 'fg_fraction', 'rpn_positive_overlap', 'rpn_negative_overlap'], varargs=None, keywords=None, defaults=(256, 0.25, 0.7, 0.3))
paddle.fluid.layers.rpn_target_assign ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'is_crowd', 'im_info', 'rpn_batch_size_per_im', 'rpn_straddle_thresh', 'rpn_fg_fraction', 'rpn_positive_overlap', 'rpn_negative_overlap', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.0, 0.5, 0.7, 0.3, True))
paddle.fluid.layers.anchor_generator ArgSpec(args=['input', 'anchor_sizes', 'aspect_ratios', 'variance', 'stride', 'offset', 'name'], varargs=None, keywords=None, defaults=(None, None, [0.1, 0.1, 0.2, 0.2], None, 0.5, None))
paddle.fluid.layers.generate_proposal_labels ArgSpec(args=['rpn_rois', 'gt_classes', 'gt_boxes', 'im_scales', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None))
paddle.fluid.layers.generate_proposal_labels ArgSpec(args=['rpn_rois', 'gt_classes', 'is_crowd', 'gt_boxes', 'im_info', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None, True))
paddle.fluid.layers.generate_proposals ArgSpec(args=['scores', 'bbox_deltas', 'im_info', 'anchors', 'variances', 'pre_nms_top_n', 'post_nms_top_n', 'nms_thresh', 'min_size', 'eta', 'name'], varargs=None, keywords=None, defaults=(6000, 1000, 0.5, 0.1, 1.0, None))
paddle.fluid.layers.iou_similarity ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.box_coder ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
......
......@@ -9,6 +9,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <algorithm>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/tensor.h"
......@@ -21,7 +22,7 @@ namespace operators {
*/
template <typename T>
inline void BoxToDelta(const int box_num, const framework::Tensor& ex_boxes,
const framework::Tensor& gt_boxes, const T* weights,
const framework::Tensor& gt_boxes, const float* weights,
const bool normalized, framework::Tensor* box_delta) {
auto ex_boxes_et = framework::EigenTensor<T, 2>::From(ex_boxes);
auto gt_boxes_et = framework::EigenTensor<T, 2>::From(gt_boxes);
......@@ -62,5 +63,35 @@ void Gather(const T* in, const int in_stride, const int* index, const int num,
}
}
template <typename T>
void BboxOverlaps(const framework::Tensor& r_boxes,
const framework::Tensor& c_boxes,
framework::Tensor* overlaps) {
auto r_boxes_et = framework::EigenTensor<T, 2>::From(r_boxes);
auto c_boxes_et = framework::EigenTensor<T, 2>::From(c_boxes);
auto overlaps_et = framework::EigenTensor<T, 2>::From(*overlaps);
int r_num = r_boxes.dims()[0];
int c_num = c_boxes.dims()[0];
auto zero = static_cast<T>(0.0);
T r_box_area, c_box_area, x_min, y_min, x_max, y_max, inter_w, inter_h,
inter_area;
for (int i = 0; i < r_num; ++i) {
r_box_area = (r_boxes_et(i, 2) - r_boxes_et(i, 0) + 1) *
(r_boxes_et(i, 3) - r_boxes_et(i, 1) + 1);
for (int j = 0; j < c_num; ++j) {
c_box_area = (c_boxes_et(j, 2) - c_boxes_et(j, 0) + 1) *
(c_boxes_et(j, 3) - c_boxes_et(j, 1) + 1);
x_min = std::max(r_boxes_et(i, 0), c_boxes_et(j, 0));
y_min = std::max(r_boxes_et(i, 1), c_boxes_et(j, 1));
x_max = std::min(r_boxes_et(i, 2), c_boxes_et(j, 2));
y_max = std::min(r_boxes_et(i, 3), c_boxes_et(j, 3));
inter_w = std::max(x_max - x_min + 1, zero);
inter_h = std::max(y_max - y_min + 1, zero);
inter_area = inter_w * inter_h;
overlaps_et(i, j) = inter_area / (r_box_area + c_box_area - inter_area);
}
}
}
} // namespace operators
} // namespace paddle
......@@ -42,10 +42,11 @@ class GenerateProposalLabelsOp : public framework::OperatorWithKernel {
"Input(RpnRois) shouldn't be null.");
PADDLE_ENFORCE(ctx->HasInput("GtClasses"),
"Input(GtClasses) shouldn't be null.");
PADDLE_ENFORCE(ctx->HasInput("IsCrowd"),
"Input(IsCrowd) shouldn't be null.");
PADDLE_ENFORCE(ctx->HasInput("GtBoxes"),
"Input(GtBoxes) shouldn't be null.");
PADDLE_ENFORCE(ctx->HasInput("ImScales"),
"Input(ImScales) shouldn't be null.");
PADDLE_ENFORCE(ctx->HasInput("ImInfo"), "Input(ImInfo) shouldn't be null.");
PADDLE_ENFORCE(ctx->HasOutput("Rois"),
"Output(Rois) of RpnTargetAssignOp should not be null");
......@@ -64,22 +65,21 @@ class GenerateProposalLabelsOp : public framework::OperatorWithKernel {
auto rpn_rois_dims = ctx->GetInputDim("RpnRois");
auto gt_classes_dims = ctx->GetInputDim("GtClasses");
auto is_crowd_dims = ctx->GetInputDim("IsCrowd");
auto gt_boxes_dims = ctx->GetInputDim("GtBoxes");
auto im_scales_dims = ctx->GetInputDim("ImScales");
auto im_info_dims = ctx->GetInputDim("ImInfo");
PADDLE_ENFORCE_EQ(rpn_rois_dims.size(), 2,
"The rank of Input(RpnRois) must be 2.");
PADDLE_ENFORCE_EQ(gt_classes_dims.size(), 1,
"The rank of Input(GtClasses) must be 1.");
PADDLE_ENFORCE_EQ(gt_boxes_dims.size(), 2,
"The rank of Input(GtBoxes) must be 2.");
PADDLE_ENFORCE_EQ(im_scales_dims.size(), 1,
"The rank of Input(ImScales) must be 1.");
PADDLE_ENFORCE_EQ(im_info_dims.size(), 2,
"The rank of Input(ImInfo) must be 2.");
int class_nums = ctx->Attrs().Get<int>("class_nums");
ctx->SetOutputDim("Rois", {-1, 4});
ctx->SetOutputDim("LabelsInt32", {-1});
ctx->SetOutputDim("LabelsInt32", {-1, 1});
ctx->SetOutputDim("BboxTargets", {-1, 4 * class_nums});
ctx->SetOutputDim("BboxInsideWeights", {-1, 4 * class_nums});
ctx->SetOutputDim("BboxOutsideWeights", {-1, 4 * class_nums});
......@@ -105,45 +105,18 @@ void Concat(const platform::CPUDeviceContext& context,
concat_functor(context, inputs, axis, out_tensor);
}
template <typename T>
void BboxOverlaps(const Tensor& r_boxes, const Tensor& c_boxes,
Tensor* overlaps) {
auto r_boxes_et = framework::EigenTensor<T, 2>::From(r_boxes);
auto c_boxes_et = framework::EigenTensor<T, 2>::From(c_boxes);
auto overlaps_et = framework::EigenTensor<T, 2>::From(*overlaps);
int r_num = r_boxes.dims()[0];
int c_num = c_boxes.dims()[0];
auto zero = static_cast<T>(0.0);
T r_box_area, c_box_area, x_min, y_min, x_max, y_max, inter_w, inter_h,
inter_area;
for (int i = 0; i < r_num; ++i) {
r_box_area = (r_boxes_et(i, 2) - r_boxes_et(i, 0) + 1) *
(r_boxes_et(i, 3) - r_boxes_et(i, 1) + 1);
for (int j = 0; j < c_num; ++j) {
c_box_area = (c_boxes_et(j, 2) - c_boxes_et(j, 0) + 1) *
(c_boxes_et(j, 3) - c_boxes_et(j, 1) + 1);
x_min = std::max(r_boxes_et(i, 0), c_boxes_et(j, 0));
y_min = std::max(r_boxes_et(i, 1), c_boxes_et(j, 1));
x_max = std::min(r_boxes_et(i, 2), c_boxes_et(j, 2));
y_max = std::min(r_boxes_et(i, 3), c_boxes_et(j, 3));
inter_w = std::max(x_max - x_min + 1, zero);
inter_h = std::max(y_max - y_min + 1, zero);
inter_area = inter_w * inter_h;
overlaps_et(i, j) = inter_area / (r_box_area + c_box_area - inter_area);
}
}
}
template <typename T>
std::vector<std::vector<int>> SampleFgBgGt(
const platform::CPUDeviceContext& context, Tensor* iou,
const int batch_size_per_im, const float fg_fraction, const float fg_thresh,
const float bg_thresh_hi, const float bg_thresh_lo,
std::minstd_rand engine) {
const Tensor& is_crowd, const int batch_size_per_im,
const float fg_fraction, const float fg_thresh, const float bg_thresh_hi,
const float bg_thresh_lo, std::minstd_rand engine, const bool use_random) {
std::vector<int> fg_inds;
std::vector<int> bg_inds;
std::vector<int> gt_inds;
T* proposal_to_gt_overlaps = iou->mutable_data<T>(context.GetPlace());
int64_t gt_num = is_crowd.numel();
const int* crowd_data = is_crowd.data<int>();
T* proposal_to_gt_overlaps = iou->data<T>();
int64_t row = iou->dims()[0];
int64_t col = iou->dims()[1];
float epsilon = 0.00001;
......@@ -152,6 +125,9 @@ std::vector<std::vector<int>> SampleFgBgGt(
for (int64_t i = 0; i < row; ++i) {
const T* v = proposal_to_gt_overlaps + i * col;
T max_overlap = *std::max_element(v, v + col);
if ((i < gt_num) && (crowd_data[i])) {
max_overlap = -1.0;
}
if (max_overlap > fg_thresh) {
for (int64_t j = 0; j < col; ++j) {
T val = proposal_to_gt_overlaps[i * col + j];
......@@ -170,17 +146,19 @@ std::vector<std::vector<int>> SampleFgBgGt(
}
// Reservoir Sampling
std::uniform_real_distribution<float> uniform(0, 1);
int fg_rois_per_im = std::floor(batch_size_per_im * fg_fraction);
int fg_rois_this_image = fg_inds.size();
int fg_rois_per_this_image = std::min(fg_rois_per_im, fg_rois_this_image);
std::uniform_real_distribution<float> uniform(0, 1);
const int64_t fg_size = static_cast<int64_t>(fg_inds.size());
if (fg_size > fg_rois_per_this_image) {
for (int64_t i = fg_rois_per_this_image; i < fg_size; ++i) {
int rng_ind = std::floor(uniform(engine) * i);
if (rng_ind < fg_rois_per_this_image) {
std::iter_swap(fg_inds.begin() + rng_ind, fg_inds.begin() + i);
std::iter_swap(gt_inds.begin() + rng_ind, gt_inds.begin() + i);
if (use_random) {
const int64_t fg_size = static_cast<int64_t>(fg_inds.size());
if (fg_size > fg_rois_per_this_image) {
for (int64_t i = fg_rois_per_this_image; i < fg_size; ++i) {
int rng_ind = std::floor(uniform(engine) * i);
if (rng_ind < fg_rois_per_this_image) {
std::iter_swap(fg_inds.begin() + rng_ind, fg_inds.begin() + i);
std::iter_swap(gt_inds.begin() + rng_ind, gt_inds.begin() + i);
}
}
}
}
......@@ -192,12 +170,14 @@ std::vector<std::vector<int>> SampleFgBgGt(
int bg_rois_per_image = batch_size_per_im - fg_rois_per_this_image;
int bg_rois_this_image = bg_inds.size();
int bg_rois_per_this_image = std::min(bg_rois_per_image, bg_rois_this_image);
const int64_t bg_size = static_cast<int64_t>(bg_inds.size());
if (bg_size > bg_rois_per_this_image) {
for (int64_t i = bg_rois_per_this_image; i < bg_size; ++i) {
int rng_ind = std::floor(uniform(engine) * i);
if (rng_ind < fg_rois_per_this_image)
std::iter_swap(bg_inds.begin() + rng_ind, bg_inds.begin() + i);
if (use_random) {
const int64_t bg_size = static_cast<int64_t>(bg_inds.size());
if (bg_size > bg_rois_per_this_image) {
for (int64_t i = bg_rois_per_this_image; i < bg_size; ++i) {
int rng_ind = std::floor(uniform(engine) * i);
if (rng_ind < fg_rois_per_this_image)
std::iter_swap(bg_inds.begin() + rng_ind, bg_inds.begin() + i);
}
}
}
std::vector<int> new_bg_inds(bg_inds.begin(),
......@@ -248,14 +228,14 @@ void GatherBoxesLabels(const platform::CPUDeviceContext& context,
template <typename T>
std::vector<Tensor> SampleRoisForOneImage(
const platform::CPUDeviceContext& context, Tensor* rpn_rois,
Tensor* gt_classes, Tensor* gt_boxes, Tensor* im_scale,
Tensor* gt_classes, Tensor* is_crowd, Tensor* gt_boxes, Tensor* im_info,
const int batch_size_per_im, const float fg_fraction, const float fg_thresh,
const float bg_thresh_hi, const float bg_thresh_lo,
const std::vector<float>& bbox_reg_weights, const int class_nums,
std::minstd_rand engine) {
std::minstd_rand engine, bool use_random) {
auto rpn_rois_et = framework::EigenTensor<T, 2>::From(*rpn_rois);
auto im_scale_data = im_scale->data<T>()[0];
rpn_rois_et = rpn_rois_et / im_scale_data;
auto im_scale = im_info->data<T>()[2];
rpn_rois_et = rpn_rois_et / im_scale;
Tensor boxes;
int proposals_num = gt_boxes->dims()[0] + rpn_rois->dims()[0];
......@@ -270,8 +250,8 @@ std::vector<Tensor> SampleRoisForOneImage(
// Generate proposal index
std::vector<std::vector<int>> fg_bg_gt = SampleFgBgGt<T>(
context, &proposal_to_gt_overlaps, batch_size_per_im, fg_fraction,
fg_thresh, bg_thresh_hi, bg_thresh_lo, engine);
context, &proposal_to_gt_overlaps, *is_crowd, batch_size_per_im,
fg_fraction, fg_thresh, bg_thresh_hi, bg_thresh_lo, engine, use_random);
std::vector<int> fg_inds = fg_bg_gt[0];
std::vector<int> bg_inds = fg_bg_gt[1];
std::vector<int> gt_inds = fg_bg_gt[2];
......@@ -291,15 +271,15 @@ std::vector<Tensor> SampleRoisForOneImage(
// Compute targets
Tensor bbox_targets_single;
bbox_targets_single.mutable_data<T>(bbox_dim, context.GetPlace());
BoxToDelta<T>(fg_num, sampled_boxes, sampled_gts, nullptr, false,
&bbox_targets_single);
BoxToDelta<T>(fg_num, sampled_boxes, sampled_gts, bbox_reg_weights.data(),
false, &bbox_targets_single);
// Scale rois
Tensor sampled_rois;
sampled_rois.mutable_data<T>(sampled_boxes.dims(), context.GetPlace());
auto sampled_rois_et = framework::EigenTensor<T, 2>::From(sampled_rois);
auto sampled_boxes_et = framework::EigenTensor<T, 2>::From(sampled_boxes);
sampled_rois_et = sampled_boxes_et * im_scale_data;
sampled_rois_et = sampled_boxes_et * im_scale;
// Expand box targets
Tensor bbox_targets, bbox_inside_weights, bbox_outside_weights;
......@@ -351,8 +331,9 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> {
void Compute(const framework::ExecutionContext& context) const override {
auto* rpn_rois = context.Input<LoDTensor>("RpnRois");
auto* gt_classes = context.Input<LoDTensor>("GtClasses");
auto* is_crowd = context.Input<LoDTensor>("IsCrowd");
auto* gt_boxes = context.Input<LoDTensor>("GtBoxes");
auto* im_scales = context.Input<LoDTensor>("ImScales");
auto* im_info = context.Input<LoDTensor>("ImInfo");
auto* rois = context.Output<LoDTensor>("Rois");
auto* labels_int32 = context.Output<LoDTensor>("LabelsInt32");
......@@ -369,18 +350,21 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> {
std::vector<float> bbox_reg_weights =
context.Attr<std::vector<float>>("bbox_reg_weights");
int class_nums = context.Attr<int>("class_nums");
bool use_random = context.Attr<bool>("use_random");
PADDLE_ENFORCE_EQ(rpn_rois->lod().size(), 1UL,
"GenerateProposalLabelsOp rpn_rois needs 1 level of LoD");
PADDLE_ENFORCE_EQ(
gt_classes->lod().size(), 1UL,
"GenerateProposalLabelsOp gt_classes needs 1 level of LoD");
PADDLE_ENFORCE_EQ(is_crowd->lod().size(), 1UL,
"GenerateProposalLabelsOp is_crowd needs 1 level of LoD");
PADDLE_ENFORCE_EQ(gt_boxes->lod().size(), 1UL,
"GenerateProposalLabelsOp gt_boxes needs 1 level of LoD");
int64_t n = static_cast<int64_t>(rpn_rois->lod().back().size() - 1);
rois->mutable_data<T>({n * batch_size_per_im, kBoxDim}, context.GetPlace());
labels_int32->mutable_data<int>({n * batch_size_per_im},
labels_int32->mutable_data<int>({n * batch_size_per_im, 1},
context.GetPlace());
bbox_targets->mutable_data<T>({n * batch_size_per_im, kBoxDim * class_nums},
context.GetPlace());
......@@ -391,8 +375,7 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> {
std::random_device rnd;
std::minstd_rand engine;
int seed =
context.Attr<bool>("fix_seed") ? context.Attr<int>("seed") : rnd();
int seed = rnd();
engine.seed(seed);
framework::LoD lod;
......@@ -403,19 +386,23 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> {
auto rpn_rois_lod = rpn_rois->lod().back();
auto gt_classes_lod = gt_classes->lod().back();
auto is_crowd_lod = is_crowd->lod().back();
auto gt_boxes_lod = gt_boxes->lod().back();
for (int i = 0; i < n; ++i) {
Tensor rpn_rois_slice =
rpn_rois->Slice(rpn_rois_lod[i], rpn_rois_lod[i + 1]);
Tensor gt_classes_slice =
gt_classes->Slice(gt_classes_lod[i], gt_classes_lod[i + 1]);
Tensor is_crowd_slice =
is_crowd->Slice(is_crowd_lod[i], is_crowd_lod[i + 1]);
Tensor gt_boxes_slice =
gt_boxes->Slice(gt_boxes_lod[i], gt_boxes_lod[i + 1]);
Tensor im_scales_slice = im_scales->Slice(i, i + 1);
Tensor im_info_slice = im_info->Slice(i, i + 1);
std::vector<Tensor> tensor_output = SampleRoisForOneImage<T>(
dev_ctx, &rpn_rois_slice, &gt_classes_slice, &gt_boxes_slice,
&im_scales_slice, batch_size_per_im, fg_fraction, fg_thresh,
bg_thresh_hi, bg_thresh_lo, bbox_reg_weights, class_nums, engine);
dev_ctx, &rpn_rois_slice, &gt_classes_slice, &is_crowd_slice,
&gt_boxes_slice, &im_info_slice, batch_size_per_im, fg_fraction,
fg_thresh, bg_thresh_hi, bg_thresh_lo, bbox_reg_weights, class_nums,
engine, use_random);
Tensor sampled_rois = tensor_output[0];
Tensor sampled_labels_int32 = tensor_output[1];
Tensor sampled_bbox_targets = tensor_output[2];
......@@ -442,7 +429,7 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> {
bbox_inside_weights->set_lod(lod);
bbox_outside_weights->set_lod(lod);
rois->Resize({num_rois, kBoxDim});
labels_int32->Resize({num_rois});
labels_int32->Resize({num_rois, 1});
bbox_targets->Resize({num_rois, kBoxDim * class_nums});
bbox_inside_weights->Resize({num_rois, kBoxDim * class_nums});
bbox_outside_weights->Resize({num_rois, kBoxDim * class_nums});
......@@ -455,8 +442,9 @@ class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
// TODO(buxingyuan): Add Document
AddInput("RpnRois", "RpnRois.");
AddInput("GtClasses", "GtClasses.");
AddInput("IsCrowd", "IsCrowd.");
AddInput("GtBoxes", "GtBoxes.");
AddInput("ImScales", "ImScales.");
AddInput("ImInfo", "ImInfo.");
AddOutput("Rois", "Rois.");
AddOutput("LabelsInt32", "LabelsInt32.");
......@@ -471,8 +459,7 @@ class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<float>("bg_thresh_lo", "bg_thresh_lo");
AddAttr<std::vector<float>>("bbox_reg_weights", "bbox_reg_weights");
AddAttr<int>("class_nums", "class_nums");
AddAttr<bool>("fix_seed", "fix_seed").SetDefault(false);
AddAttr<int>("seed", "seed").SetDefault(0);
AddAttr<bool>("use_random", "use_random").SetDefault(true);
AddComment(R"DOC(
Generate Proposals Labels Operator.
......
......@@ -89,12 +89,11 @@ void BoxCoder(const platform::DeviceContext &ctx, Tensor *all_anchors,
}
for (int64_t i = 0; i < row; ++i) {
T anchor_width = anchor_data[i * len + 2] - anchor_data[i * len];
T anchor_height = anchor_data[i * len + 3] - anchor_data[i * len + 1];
T anchor_width = anchor_data[i * len + 2] - anchor_data[i * len] + 1.0;
T anchor_height = anchor_data[i * len + 3] - anchor_data[i * len + 1] + 1.0;
T anchor_center_x = (anchor_data[i * len + 2] + anchor_data[i * len]) / 2;
T anchor_center_y =
(anchor_data[i * len + 3] + anchor_data[i * len + 1]) / 2;
T anchor_center_x = anchor_data[i * len] + 0.5 * anchor_width;
T anchor_center_y = anchor_data[i * len + 1] + 0.5 * anchor_height;
T bbox_center_x = 0, bbox_center_y = 0;
T bbox_width = 0, bbox_height = 0;
......@@ -106,25 +105,31 @@ void BoxCoder(const platform::DeviceContext &ctx, Tensor *all_anchors,
bbox_center_y = variances_data[i * len + 1] *
bbox_deltas_data[i * len + 1] * anchor_height +
anchor_center_y;
bbox_width = std::exp(variances_data[i * len + 2] *
bbox_deltas_data[i * len + 2]) *
bbox_width = std::exp(std::min<T>(variances_data[i * len + 2] *
bbox_deltas_data[i * len + 2],
std::log(1000.0 / 16.0))) *
anchor_width;
bbox_height = std::exp(variances_data[i * len + 3] *
bbox_deltas_data[i * len + 3]) *
bbox_height = std::exp(std::min<T>(variances_data[i * len + 3] *
bbox_deltas_data[i * len + 3],
std::log(1000.0 / 16.0))) *
anchor_height;
} else {
bbox_center_x =
bbox_deltas_data[i * len] * anchor_width + anchor_center_x;
bbox_center_y =
bbox_deltas_data[i * len + 1] * anchor_height + anchor_center_y;
bbox_width = std::exp(bbox_deltas_data[i * len + 2]) * anchor_width;
bbox_height = std::exp(bbox_deltas_data[i * len + 3]) * anchor_height;
bbox_width = std::exp(std::min<T>(bbox_deltas_data[i * len + 2],
std::log(1000.0 / 16.0))) *
anchor_width;
bbox_height = std::exp(std::min<T>(bbox_deltas_data[i * len + 3],
std::log(1000.0 / 16.0))) *
anchor_height;
}
proposals_data[i * len] = bbox_center_x - bbox_width / 2;
proposals_data[i * len + 1] = bbox_center_y - bbox_height / 2;
proposals_data[i * len + 2] = bbox_center_x + bbox_width / 2;
proposals_data[i * len + 3] = bbox_center_y + bbox_height / 2;
proposals_data[i * len + 2] = bbox_center_x + bbox_width / 2 - 1;
proposals_data[i * len + 3] = bbox_center_y + bbox_height / 2 - 1;
}
// return proposals;
}
......@@ -156,18 +161,23 @@ void FilterBoxes(const platform::DeviceContext &ctx, Tensor *boxes,
float min_size, const Tensor &im_info, Tensor *keep) {
const T *im_info_data = im_info.data<T>();
T *boxes_data = boxes->mutable_data<T>(ctx.GetPlace());
min_size *= im_info_data[2];
T im_scale = im_info_data[2];
keep->Resize({boxes->dims()[0], 1});
min_size = std::max(min_size, 1.0f);
int *keep_data = keep->mutable_data<int>(ctx.GetPlace());
int keep_len = 0;
for (int i = 0; i < boxes->dims()[0]; ++i) {
T ws = boxes_data[4 * i + 2] - boxes_data[4 * i] + 1;
T hs = boxes_data[4 * i + 3] - boxes_data[4 * i + 1] + 1;
T ws_origin_scale =
(boxes_data[4 * i + 2] - boxes_data[4 * i]) / im_scale + 1;
T hs_origin_scale =
(boxes_data[4 * i + 3] - boxes_data[4 * i + 1]) / im_scale + 1;
T x_ctr = boxes_data[4 * i] + ws / 2;
T y_ctr = boxes_data[4 * i + 1] + hs / 2;
if (ws >= min_size && hs >= min_size && x_ctr <= im_info_data[1] &&
y_ctr <= im_info_data[0]) {
if (ws_origin_scale >= min_size && hs_origin_scale >= min_size &&
x_ctr <= im_info_data[1] && y_ctr <= im_info_data[0]) {
keep_data[keep_len++] = i;
}
}
......@@ -218,8 +228,8 @@ T JaccardOverlap(const T *box1, const T *box2, const bool normalized) {
const T inter_ymin = std::max(box1[1], box2[1]);
const T inter_xmax = std::min(box1[2], box2[2]);
const T inter_ymax = std::min(box1[3], box2[3]);
const T inter_w = inter_xmax - inter_xmin;
const T inter_h = inter_ymax - inter_ymin;
const T inter_w = std::max(0.0f, inter_xmax - inter_xmin + 1);
const T inter_h = std::max(0.0f, inter_ymax - inter_ymin + 1);
const T inter_area = inter_w * inter_h;
const T bbox1_area = BBoxArea<T>(box1, normalized);
const T bbox2_area = BBoxArea<T>(box2, normalized);
......
......@@ -31,8 +31,14 @@ class RpnTargetAssignOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("DistMat"),
"Input(DistMat) of RpnTargetAssignOp should not be null");
PADDLE_ENFORCE(ctx->HasInput("Anchor"),
"Input(Anchor) of RpnTargetAssignOp should not be null");
PADDLE_ENFORCE(ctx->HasInput("GtBoxes"),
"Input(GtBoxes) of RpnTargetAssignOp should not be null");
PADDLE_ENFORCE(ctx->HasInput("IsCrowd"),
"Input(Anchor) of RpnTargetAssignOp should not be null");
PADDLE_ENFORCE(ctx->HasInput("ImInfo"),
"Input(ImInfo) of RpnTargetAssignOp should not be null");
PADDLE_ENFORCE(
ctx->HasOutput("LocationIndex"),
......@@ -43,10 +49,20 @@ class RpnTargetAssignOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE(
ctx->HasOutput("TargetLabel"),
"Output(TargetLabel) of RpnTargetAssignOp should not be null");
auto in_dims = ctx->GetInputDim("DistMat");
PADDLE_ENFORCE_EQ(in_dims.size(), 2,
"The rank of Input(DistMat) must be 2.");
PADDLE_ENFORCE(
ctx->HasOutput("TargetBBox"),
"Output(TargetBBox) of RpnTargetAssignOp should not be null");
auto anchor_dims = ctx->GetInputDim("Anchor");
auto gt_boxes_dims = ctx->GetInputDim("GtBoxes");
auto is_crowd_dims = ctx->GetInputDim("IsCrowd");
auto im_info_dims = ctx->GetInputDim("ImInfo");
PADDLE_ENFORCE_EQ(anchor_dims.size(), 2,
"The rank of Input(Anchor) must be 2.");
PADDLE_ENFORCE_EQ(gt_boxes_dims.size(), 2,
"The rank of Input(GtBoxes) must be 2.");
PADDLE_ENFORCE_EQ(im_info_dims.size(), 2,
"The rank of Input(ImInfo) must be 2.");
ctx->SetOutputDim("LocationIndex", {-1});
ctx->SetOutputDim("ScoreIndex", {-1});
......@@ -59,198 +75,383 @@ class RpnTargetAssignOp : public framework::OperatorWithKernel {
const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(
framework::ToDataType(
ctx.Input<framework::LoDTensor>("DistMat")->type()),
ctx.Input<framework::LoDTensor>("Anchor")->type()),
platform::CPUPlace());
}
};
template <typename T>
class RpnTargetAssignKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* anchor_t = context.Input<Tensor>("Anchor"); // (H*W*A) * 4
auto* gt_bbox_t = context.Input<Tensor>("GtBox");
auto* dist_t = context.Input<LoDTensor>("DistMat");
void AppendRpns(LoDTensor* out, int64_t offset, Tensor* to_add) {
auto* out_data = out->data<T>();
auto* to_add_data = to_add->data<T>();
memcpy(out_data + offset, to_add_data, to_add->numel() * sizeof(T));
}
template <typename T>
std::vector<Tensor> FilterStraddleAnchor(
const platform::CPUDeviceContext& context, const Tensor* anchor,
const float rpn_straddle_thresh, T im_height, T im_width) {
std::vector<int> inds_inside;
int anchor_num = anchor->dims()[0];
auto* anchor_data = anchor->data<T>();
if (rpn_straddle_thresh >= 0) {
int index;
for (int i = 0; i < anchor_num; ++i) {
index = i * 4;
if ((anchor_data[index + 0] >= -rpn_straddle_thresh) &&
(anchor_data[index + 1] >= -rpn_straddle_thresh) &&
(anchor_data[index + 2] < im_width + rpn_straddle_thresh) &&
(anchor_data[index + 3] < im_height + rpn_straddle_thresh)) {
inds_inside.emplace_back(i);
}
}
} else {
for (int i = 0; i < anchor_num; ++i) {
inds_inside.emplace_back(i);
}
}
int inside_num = inds_inside.size();
Tensor inds_inside_t;
int* inds_inside_data =
inds_inside_t.mutable_data<int>({inside_num}, context.GetPlace());
std::copy(inds_inside.begin(), inds_inside.end(), inds_inside_data);
Tensor inside_anchor_t;
T* inside_anchor_data =
inside_anchor_t.mutable_data<T>({inside_num, 4}, context.GetPlace());
Gather<T>(anchor->data<T>(), 4, inds_inside_data, inside_num,
inside_anchor_data);
std::vector<Tensor> res;
res.emplace_back(inds_inside_t);
res.emplace_back(inside_anchor_t);
return res;
}
template <typename T>
Tensor FilterCrowdGt(const platform::CPUDeviceContext& context,
Tensor* gt_boxes, Tensor* is_crowd) {
int gt_num = gt_boxes->dims()[0];
std::vector<int> not_crowd_inds;
auto* is_crowd_data = is_crowd->data<int>();
for (int i = 0; i < gt_num; ++i) {
if (is_crowd_data[i] == 0) {
not_crowd_inds.emplace_back(i);
}
}
int ncrowd_num = not_crowd_inds.size();
Tensor ncrowd_gt_boxes;
T* ncrowd_gt_boxes_data =
ncrowd_gt_boxes.mutable_data<T>({ncrowd_num, 4}, context.GetPlace());
Gather<T>(gt_boxes->data<T>(), 4, not_crowd_inds.data(), ncrowd_num,
ncrowd_gt_boxes_data);
return ncrowd_gt_boxes;
}
void ReservoirSampling(const int num, std::vector<int>* inds,
std::minstd_rand engine, bool use_random) {
std::uniform_real_distribution<float> uniform(0, 1);
size_t len = inds->size();
if (len > static_cast<size_t>(num)) {
if (use_random) {
for (size_t i = num; i < len; ++i) {
int rng_ind = std::floor(uniform(engine) * i);
if (rng_ind < num)
std::iter_swap(inds->begin() + rng_ind, inds->begin() + i);
}
}
inds->resize(num);
}
}
template <typename T>
void ScoreAssign(const T* anchor_by_gt_overlap_data,
const Tensor& anchor_to_gt_max, const Tensor& gt_to_anchor_max,
const int rpn_batch_size_per_im, const float rpn_fg_fraction,
const float rpn_positive_overlap,
const float rpn_negative_overlap, std::vector<int>* fg_inds,
std::vector<int>* bg_inds, std::vector<int>* tgt_lbl,
std::minstd_rand engine, bool use_random) {
float epsilon = 0.00001;
int anchor_num = anchor_to_gt_max.dims()[0];
int gt_num = gt_to_anchor_max.dims()[0];
std::vector<int> target_label(anchor_num, -1);
std::vector<int> fg_inds_fake;
std::vector<int> bg_inds_fake;
const T* anchor_to_gt_max_data = anchor_to_gt_max.data<T>();
const T* gt_to_anchor_max_data = gt_to_anchor_max.data<T>();
// TODO(buxingyuan): Match with Detectron now
// but it seems here is a bug in two directions assignment
// in which the later one may overwrites the former one.
for (int64_t i = 0; i < anchor_num; ++i) {
bool is_anchors_with_max_overlap = false;
for (int64_t j = 0; j < gt_num; ++j) {
T value = anchor_by_gt_overlap_data[i * gt_num + j];
T diff = std::abs(value - gt_to_anchor_max_data[j]);
if (diff < epsilon) {
is_anchors_with_max_overlap = true;
break;
}
}
bool is_anchor_great_than_thresh =
(anchor_to_gt_max_data[i] >= rpn_positive_overlap);
if (is_anchors_with_max_overlap || is_anchor_great_than_thresh) {
fg_inds_fake.push_back(i);
}
}
auto* loc_index_t = context.Output<Tensor>("LocationIndex");
auto* score_index_t = context.Output<Tensor>("ScoreIndex");
auto* tgt_bbox_t = context.Output<Tensor>("TargetBBox");
auto* tgt_lbl_t = context.Output<Tensor>("TargetLabel");
// Reservoir Sampling
int fg_num = static_cast<int>(rpn_fg_fraction * rpn_batch_size_per_im);
ReservoirSampling(fg_num, &fg_inds_fake, engine, use_random);
fg_num = static_cast<int>(fg_inds_fake.size());
for (int64_t i = 0; i < fg_num; ++i) {
target_label[fg_inds_fake[i]] = 1;
}
auto lod = dist_t->lod().back();
int64_t batch_num = static_cast<int64_t>(lod.size() - 1);
int64_t anchor_num = dist_t->dims()[1];
PADDLE_ENFORCE_EQ(anchor_num, anchor_t->dims()[0]);
int bg_num = rpn_batch_size_per_im - fg_num;
for (int64_t i = 0; i < anchor_num; ++i) {
if (anchor_to_gt_max_data[i] < rpn_negative_overlap) {
bg_inds_fake.push_back(i);
}
}
ReservoirSampling(bg_num, &bg_inds_fake, engine, use_random);
bg_num = static_cast<int>(bg_inds_fake.size());
for (int64_t i = 0; i < bg_num; ++i) {
target_label[bg_inds_fake[i]] = 0;
}
int rpn_batch_size = context.Attr<int>("rpn_batch_size_per_im");
float pos_threshold = context.Attr<float>("rpn_positive_overlap");
float neg_threshold = context.Attr<float>("rpn_negative_overlap");
float fg_fraction = context.Attr<float>("fg_fraction");
for (int64_t i = 0; i < anchor_num; ++i) {
if (target_label[i] == 1) fg_inds->emplace_back(i);
if (target_label[i] == 0) bg_inds->emplace_back(i);
}
fg_num = fg_inds->size();
bg_num = bg_inds->size();
tgt_lbl->resize(fg_num + bg_num, 0);
std::vector<int> fg_lbl(fg_num, 1);
std::vector<int> bg_lbl(bg_num, 0);
std::copy(fg_lbl.begin(), fg_lbl.end(), tgt_lbl->data());
std::copy(bg_lbl.begin(), bg_lbl.end(), tgt_lbl->data() + fg_num);
}
template <typename T>
std::vector<Tensor> SampleRpnFgBgGt(const platform::CPUDeviceContext& ctx,
const Tensor& anchor_by_gt_overlap,
const int rpn_batch_size_per_im,
const float rpn_positive_overlap,
const float rpn_negative_overlap,
const float rpn_fg_fraction,
std::minstd_rand engine, bool use_random) {
auto* anchor_by_gt_overlap_data = anchor_by_gt_overlap.data<T>();
int anchor_num = anchor_by_gt_overlap.dims()[0];
int gt_num = anchor_by_gt_overlap.dims()[1];
std::vector<int> fg_inds;
std::vector<int> bg_inds;
std::vector<int> gt_inds;
std::vector<int> tgt_lbl;
// Calculate the max IoU between anchors and gt boxes
// Map from anchor to gt box that has highest overlap
auto place = ctx.GetPlace();
Tensor anchor_to_gt_max, anchor_to_gt_argmax, gt_to_anchor_max;
anchor_to_gt_max.mutable_data<T>({anchor_num}, place);
int* argmax = anchor_to_gt_argmax.mutable_data<int>({anchor_num}, place);
gt_to_anchor_max.mutable_data<T>({gt_num}, place);
auto anchor_by_gt_overlap_et =
framework::EigenMatrix<T>::From(anchor_by_gt_overlap);
auto anchor_to_gt_max_et =
framework::EigenVector<T>::Flatten(anchor_to_gt_max);
auto gt_to_anchor_max_et =
framework::EigenVector<T>::Flatten(gt_to_anchor_max);
auto anchor_to_gt_argmax_et =
framework::EigenVector<int>::Flatten(anchor_to_gt_argmax);
anchor_to_gt_max_et =
anchor_by_gt_overlap_et.maximum(Eigen::DSizes<int, 1>(1));
anchor_to_gt_argmax_et =
anchor_by_gt_overlap_et.argmax(1).template cast<int>();
gt_to_anchor_max_et =
anchor_by_gt_overlap_et.maximum(Eigen::DSizes<int, 1>(0));
// Follow the Faster RCNN's implementation
ScoreAssign(anchor_by_gt_overlap_data, anchor_to_gt_max, gt_to_anchor_max,
rpn_batch_size_per_im, rpn_fg_fraction, rpn_positive_overlap,
rpn_negative_overlap, &fg_inds, &bg_inds, &tgt_lbl, engine,
use_random);
int fg_num = fg_inds.size();
int bg_num = bg_inds.size();
gt_inds.reserve(fg_num);
for (int i = 0; i < fg_num; ++i) {
gt_inds.emplace_back(argmax[fg_inds[i]]);
}
int fg_num_per_batch = static_cast<int>(rpn_batch_size * fg_fraction);
Tensor loc_index_t, score_index_t, tgt_lbl_t, gt_inds_t;
int* loc_index_data = loc_index_t.mutable_data<int>({fg_num}, place);
int* score_index_data =
score_index_t.mutable_data<int>({fg_num + bg_num}, place);
int* tgt_lbl_data = tgt_lbl_t.mutable_data<int>({fg_num + bg_num}, place);
int* gt_inds_data = gt_inds_t.mutable_data<int>({fg_num}, place);
std::copy(fg_inds.begin(), fg_inds.end(), loc_index_data);
std::copy(fg_inds.begin(), fg_inds.end(), score_index_data);
std::copy(bg_inds.begin(), bg_inds.end(), score_index_data + fg_num);
std::copy(tgt_lbl.begin(), tgt_lbl.end(), tgt_lbl_data);
std::copy(gt_inds.begin(), gt_inds.end(), gt_inds_data);
std::vector<Tensor> loc_score_tgtlbl_gt;
loc_score_tgtlbl_gt.emplace_back(loc_index_t);
loc_score_tgtlbl_gt.emplace_back(score_index_t);
loc_score_tgtlbl_gt.emplace_back(tgt_lbl_t);
loc_score_tgtlbl_gt.emplace_back(gt_inds_t);
return loc_score_tgtlbl_gt;
}
int64_t max_num = batch_num * anchor_num;
template <typename T>
class RpnTargetAssignKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* anchor = context.Input<Tensor>("Anchor"); // (H*W*A) * 4
auto* gt_boxes = context.Input<LoDTensor>("GtBoxes");
auto* is_crowd = context.Input<LoDTensor>("IsCrowd");
auto* im_info = context.Input<LoDTensor>("ImInfo");
auto* loc_index = context.Output<LoDTensor>("LocationIndex");
auto* score_index = context.Output<LoDTensor>("ScoreIndex");
auto* tgt_bbox = context.Output<LoDTensor>("TargetBBox");
auto* tgt_lbl = context.Output<LoDTensor>("TargetLabel");
PADDLE_ENFORCE_EQ(gt_boxes->lod().size(), 1UL,
"RpnTargetAssignOp gt_boxes needs 1 level of LoD");
PADDLE_ENFORCE_EQ(is_crowd->lod().size(), 1UL,
"RpnTargetAssignOp is_crowd needs 1 level of LoD");
int64_t anchor_num = static_cast<int64_t>(anchor->dims()[0]);
int64_t batch_num = static_cast<int64_t>(gt_boxes->lod().back().size() - 1);
int rpn_batch_size_per_im = context.Attr<int>("rpn_batch_size_per_im");
float rpn_straddle_thresh = context.Attr<float>("rpn_straddle_thresh");
float rpn_positive_overlap = context.Attr<float>("rpn_positive_overlap");
float rpn_negative_overlap = context.Attr<float>("rpn_negative_overlap");
float rpn_fg_fraction = context.Attr<float>("rpn_fg_fraction");
bool use_random = context.Attr<bool>("use_random");
int64_t max_num = batch_num * rpn_batch_size_per_im;
auto place = context.GetPlace();
tgt_bbox_t->mutable_data<T>({max_num, 4}, place);
auto* loc_index = loc_index_t->mutable_data<int>({max_num}, place);
auto* score_index = score_index_t->mutable_data<int>({max_num}, place);
loc_index->mutable_data<int>({max_num}, place);
score_index->mutable_data<int>({max_num}, place);
tgt_bbox->mutable_data<T>({max_num, 4}, place);
tgt_lbl->mutable_data<int>({max_num, 1}, place);
Tensor tmp_tgt_lbl;
auto* tmp_lbl_data = tmp_tgt_lbl.mutable_data<int64_t>({max_num}, place);
auto& dev_ctx = context.device_context<platform::CPUDeviceContext>();
math::SetConstant<platform::CPUDeviceContext, int64_t> iset;
iset(dev_ctx, &tmp_tgt_lbl, static_cast<int64_t>(-1));
std::random_device rnd;
std::minstd_rand engine;
int seed =
context.Attr<bool>("fix_seed") ? context.Attr<int>("seed") : rnd();
int seed = rnd();
engine.seed(seed);
int fg_num = 0;
int bg_num = 0;
framework::LoD lod_loc, loc_score;
std::vector<size_t> lod0_loc(1, 0);
std::vector<size_t> lod0_score(1, 0);
int total_loc_num = 0;
int total_score_num = 0;
auto gt_boxes_lod = gt_boxes->lod().back();
auto is_crowd_lod = is_crowd->lod().back();
for (int i = 0; i < batch_num; ++i) {
Tensor dist = dist_t->Slice(lod[i], lod[i + 1]);
Tensor gt_bbox = gt_bbox_t->Slice(lod[i], lod[i + 1]);
auto fg_bg_gt = SampleFgBgGt(dev_ctx, dist, pos_threshold, neg_threshold,
rpn_batch_size, fg_num_per_batch, engine,
tmp_lbl_data + i * anchor_num);
int cur_fg_num = fg_bg_gt[0].size();
int cur_bg_num = fg_bg_gt[1].size();
std::transform(fg_bg_gt[0].begin(), fg_bg_gt[0].end(), loc_index,
[i, anchor_num](int d) { return d + i * anchor_num; });
memcpy(score_index, loc_index, cur_fg_num * sizeof(int));
std::transform(fg_bg_gt[1].begin(), fg_bg_gt[1].end(),
score_index + cur_fg_num,
[i, anchor_num](int d) { return d + i * anchor_num; });
Tensor gt_boxes_slice =
gt_boxes->Slice(gt_boxes_lod[i], gt_boxes_lod[i + 1]);
Tensor is_crowd_slice =
is_crowd->Slice(is_crowd_lod[i], is_crowd_lod[i + 1]);
Tensor im_info_slice = im_info->Slice(i, i + 1);
auto* im_info_data = im_info_slice.data<T>();
auto im_height = im_info_data[0];
auto im_width = im_info_data[1];
auto im_scale = im_info_data[2];
// Filter straddle anchor
std::vector<Tensor> filter_output = FilterStraddleAnchor<T>(
dev_ctx, anchor, rpn_straddle_thresh, im_height, im_width);
Tensor inds_inside = filter_output[0];
Tensor inside_anchor = filter_output[1];
// Filter crowd gt
Tensor ncrowd_gt_boxes =
FilterCrowdGt<T>(dev_ctx, &gt_boxes_slice, &is_crowd_slice);
auto ncrowd_gt_boxes_et =
framework::EigenTensor<T, 2>::From(ncrowd_gt_boxes);
ncrowd_gt_boxes_et = ncrowd_gt_boxes_et * im_scale;
Tensor anchor_by_gt_overlap;
anchor_by_gt_overlap.mutable_data<T>(
{inside_anchor.dims()[0], ncrowd_gt_boxes.dims()[0]}, place);
BboxOverlaps<T>(inside_anchor, ncrowd_gt_boxes, &anchor_by_gt_overlap);
auto loc_score_tgtlbl_gt = SampleRpnFgBgGt<T>(
dev_ctx, anchor_by_gt_overlap, rpn_batch_size_per_im,
rpn_positive_overlap, rpn_negative_overlap, rpn_fg_fraction, engine,
use_random);
Tensor sampled_loc_index = loc_score_tgtlbl_gt[0];
Tensor sampled_score_index = loc_score_tgtlbl_gt[1];
Tensor sampled_tgtlbl = loc_score_tgtlbl_gt[2];
Tensor sampled_gt_index = loc_score_tgtlbl_gt[3];
int loc_num = sampled_loc_index.dims()[0];
int score_num = sampled_score_index.dims()[0];
// unmap to all anchor
Tensor sampled_loc_index_unmap, sampled_score_index_unmap;
sampled_loc_index_unmap.mutable_data<int>({loc_num}, place);
sampled_score_index_unmap.mutable_data<int>({score_num}, place);
Gather<int>(inds_inside.data<int>(), 1, sampled_loc_index.data<int>(),
loc_num, sampled_loc_index_unmap.data<int>());
Gather<int>(inds_inside.data<int>(), 1, sampled_score_index.data<int>(),
score_num, sampled_score_index_unmap.data<int>());
// get target bbox deltas
if (cur_fg_num) {
Tensor fg_gt;
T* gt_data = fg_gt.mutable_data<T>({cur_fg_num, 4}, place);
Tensor tgt_bbox = tgt_bbox_t->Slice(fg_num, fg_num + cur_fg_num);
T* tgt_data = tgt_bbox.data<T>();
Gather<T>(anchor_t->data<T>(), 4,
reinterpret_cast<int*>(&fg_bg_gt[0][0]), cur_fg_num,
tgt_data);
Gather<T>(gt_bbox.data<T>(), 4, reinterpret_cast<int*>(&fg_bg_gt[2][0]),
cur_fg_num, gt_data);
BoxToDelta<T>(cur_fg_num, tgt_bbox, fg_gt, nullptr, false, &tgt_bbox);
}
loc_index += cur_fg_num;
score_index += cur_fg_num + cur_bg_num;
fg_num += cur_fg_num;
bg_num += cur_bg_num;
}
int lbl_num = fg_num + bg_num;
PADDLE_ENFORCE_LE(fg_num, max_num);
PADDLE_ENFORCE_LE(lbl_num, max_num);
tgt_bbox_t->Resize({fg_num, 4});
loc_index_t->Resize({fg_num});
score_index_t->Resize({lbl_num});
auto* lbl_data = tgt_lbl_t->mutable_data<int64_t>({lbl_num, 1}, place);
Gather<int64_t>(tmp_lbl_data, 1, score_index_t->data<int>(), lbl_num,
lbl_data);
}
private:
void ScoreAssign(const T* dist_data, const Tensor& anchor_to_gt_max,
const int row, const int col, const float pos_threshold,
const float neg_threshold, int64_t* target_label,
std::vector<int>* fg_inds, std::vector<int>* bg_inds) const {
float epsilon = 0.0001;
for (int64_t i = 0; i < row; ++i) {
const T* v = dist_data + i * col;
T max = *std::max_element(v, v + col);
for (int64_t j = 0; j < col; ++j) {
if (std::abs(max - v[j]) < epsilon) {
target_label[j] = 1;
}
}
}
// Pick the fg/bg
const T* anchor_to_gt_max_data = anchor_to_gt_max.data<T>();
for (int64_t j = 0; j < col; ++j) {
if (anchor_to_gt_max_data[j] >= pos_threshold) {
target_label[j] = 1;
} else if (anchor_to_gt_max_data[j] < neg_threshold) {
target_label[j] = 0;
}
if (target_label[j] == 1) {
fg_inds->push_back(j);
} else if (target_label[j] == 0) {
bg_inds->push_back(j);
}
Tensor sampled_anchor, sampled_gt, sampled_tgt_bbox;
auto* sampled_anchor_data =
sampled_anchor.mutable_data<T>({loc_num, 4}, place);
auto* sampled_gt_data = sampled_gt.mutable_data<T>({loc_num, 4}, place);
Gather<T>(anchor->data<T>(), 4, sampled_loc_index_unmap.data<int>(),
loc_num, sampled_anchor_data);
Gather<T>(ncrowd_gt_boxes.data<T>(), 4, sampled_gt_index.data<int>(),
loc_num, sampled_gt_data);
sampled_tgt_bbox.mutable_data<T>({loc_num, 4}, place);
BoxToDelta<T>(loc_num, sampled_anchor, sampled_gt, nullptr, false,
&sampled_tgt_bbox);
// Add anchor offset
int anchor_offset = i * anchor_num;
auto sampled_loc_index_unmap_et =
framework::EigenTensor<int, 1>::From(sampled_loc_index_unmap);
sampled_loc_index_unmap_et = sampled_loc_index_unmap_et + anchor_offset;
auto sampled_score_index_unmap_et =
framework::EigenTensor<int, 1>::From(sampled_score_index_unmap);
sampled_score_index_unmap_et =
sampled_score_index_unmap_et + anchor_offset;
AppendRpns<int>(loc_index, total_loc_num, &sampled_loc_index_unmap);
AppendRpns<int>(score_index, total_score_num, &sampled_score_index_unmap);
AppendRpns<T>(tgt_bbox, total_loc_num * 4, &sampled_tgt_bbox);
AppendRpns<int>(tgt_lbl, total_score_num, &sampled_tgtlbl);
total_loc_num += loc_num;
total_score_num += score_num;
lod0_loc.emplace_back(total_loc_num);
lod0_score.emplace_back(total_score_num);
}
}
void ReservoirSampling(const int num, std::minstd_rand engine,
std::vector<int>* inds) const {
std::uniform_real_distribution<float> uniform(0, 1);
size_t len = inds->size();
if (len > static_cast<size_t>(num)) {
for (size_t i = num; i < len; ++i) {
int rng_ind = std::floor(uniform(engine) * i);
if (rng_ind < num)
std::iter_swap(inds->begin() + rng_ind, inds->begin() + i);
}
inds->resize(num);
}
}
// std::vector<std::vector<int>> RpnTargetAssign(
std::vector<std::vector<int>> SampleFgBgGt(
const platform::CPUDeviceContext& ctx, const Tensor& dist,
const float pos_threshold, const float neg_threshold,
const int rpn_batch_size, const int fg_num, std::minstd_rand engine,
int64_t* target_label) const {
auto* dist_data = dist.data<T>();
int row = dist.dims()[0];
int col = dist.dims()[1];
std::vector<int> fg_inds;
std::vector<int> bg_inds;
std::vector<int> gt_inds;
// Calculate the max IoU between anchors and gt boxes
// Map from anchor to gt box that has highest overlap
auto place = ctx.GetPlace();
Tensor anchor_to_gt_max, anchor_to_gt_argmax;
anchor_to_gt_max.mutable_data<T>({col}, place);
int* argmax = anchor_to_gt_argmax.mutable_data<int>({col}, place);
auto x = framework::EigenMatrix<T>::From(dist);
auto x_col_max = framework::EigenVector<T>::Flatten(anchor_to_gt_max);
auto x_col_argmax =
framework::EigenVector<int>::Flatten(anchor_to_gt_argmax);
x_col_max = x.maximum(Eigen::DSizes<int, 1>(0));
x_col_argmax = x.argmax(0).template cast<int>();
// Follow the Faster RCNN's implementation
ScoreAssign(dist_data, anchor_to_gt_max, row, col, pos_threshold,
neg_threshold, target_label, &fg_inds, &bg_inds);
// Reservoir Sampling
ReservoirSampling(fg_num, engine, &fg_inds);
int fg_num2 = static_cast<int>(fg_inds.size());
int bg_num = rpn_batch_size - fg_num2;
ReservoirSampling(bg_num, engine, &bg_inds);
gt_inds.reserve(fg_num2);
for (int i = 0; i < fg_num2; ++i) {
gt_inds.emplace_back(argmax[fg_inds[i]]);
}
std::vector<std::vector<int>> fg_bg_gt;
fg_bg_gt.emplace_back(fg_inds);
fg_bg_gt.emplace_back(bg_inds);
fg_bg_gt.emplace_back(gt_inds);
return fg_bg_gt;
PADDLE_ENFORCE_LE(total_loc_num, max_num);
PADDLE_ENFORCE_LE(total_score_num, max_num);
lod_loc.emplace_back(lod0_loc);
loc_score.emplace_back(lod0_score);
loc_index->set_lod(lod_loc);
score_index->set_lod(loc_score);
tgt_bbox->set_lod(lod_loc);
tgt_lbl->set_lod(loc_score);
loc_index->Resize({total_loc_num});
score_index->Resize({total_score_num});
tgt_bbox->Resize({total_loc_num, 4});
tgt_lbl->Resize({total_score_num, 1});
}
};
......@@ -259,18 +460,22 @@ class RpnTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
void Make() override {
AddInput("Anchor",
"(Tensor) input anchor is a 2-D Tensor with shape [H*W*A, 4].");
AddInput("GtBox", "(LoDTensor) input groud-truth bbox with shape [K, 4].");
AddInput(
"DistMat",
"(LoDTensor or Tensor) this input is a 2-D LoDTensor with shape "
"[K, M]. It is pair-wise distance matrix between the entities "
"represented by each row and each column. For example, assumed one "
"entity is A with shape [K], another entity is B with shape [M]. The "
"DistMat[i][j] is the distance between A[i] and B[j]. The bigger "
"the distance is, the better macthing the pairs are. Please note, "
"This tensor can contain LoD information to represent a batch of "
"inputs. One instance of this batch can contain different numbers of "
"entities.");
AddInput("GtBoxes",
"(LoDTensor) input groud-truth bbox with shape [K, 4].");
AddInput("IsCrowd",
"(LoDTensor) input which indicates groud-truth is crowd.");
AddInput("ImInfo",
"(LoDTensor) input image information with shape [N, 3]. "
"N is the batch size, each image information includes height, "
"width and scale.");
AddAttr<int>("rpn_batch_size_per_im",
"Total number of RPN examples per image.")
.SetDefault(256);
AddAttr<float>(
"rpn_straddle_thresh",
"Remove RPN anchors that go outside the image by straddle_thresh "
"pixels, "
"Set to -1 or a large value, e.g. 100000, to disable pruning anchors.");
AddAttr<float>(
"rpn_positive_overlap",
"Minimum overlap required between an anchor and ground-truth "
......@@ -282,20 +487,15 @@ class RpnTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
"box for the (anchor, gt box) pair to be a negative examples.")
.SetDefault(0.3);
AddAttr<float>(
"fg_fraction",
"rpn_fg_fraction",
"Target fraction of RoI minibatch that "
"is labeled foreground (i.e. class > 0), 0-th class is background.")
.SetDefault(0.25);
AddAttr<int>("rpn_batch_size_per_im",
"Total number of RPN examples per image.")
.SetDefault(256);
AddAttr<bool>("fix_seed",
"A flag indicating whether to use a fixed seed to generate "
"random mask. NOTE: DO NOT set this flag to true in "
"training. Setting this flag to true is only useful in "
"unittest.")
.SetDefault(false);
AddAttr<int>("seed", "RpnTargetAssign random seed.").SetDefault(0);
AddAttr<bool>("use_random",
"A flag indicating whether to use a ReservoirSampling. "
"NOTE: DO NOT set this flag to false in training. "
"Setting this flag to false is only useful in unittest.")
.SetDefault(true);
AddOutput(
"LocationIndex",
"(Tensor), The indexes of foreground anchors in all RPN anchors, the "
......@@ -308,16 +508,16 @@ class RpnTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
"ScoreIndex is [F + B], F and B are sampled foreground and backgroud "
" number.");
AddOutput("TargetBBox",
"(Tensor<int64_t>), The target bbox deltas with shape "
"(Tensor), The target bbox deltas with shape "
"[F, 4], F is the sampled foreground number.");
AddOutput(
"TargetLabel",
"(Tensor<int64_t>), The target labels of each anchor with shape "
"(Tensor<int>), The target labels of each anchor with shape "
"[F + B, 1], F and B are sampled foreground and backgroud number.");
AddComment(R"DOC(
This operator can be, for given the IoU between the ground truth bboxes and the
This operator can be, for a given set of ground truth bboxes and the
anchors, to assign classification and regression targets to each prediction.
The Score index and LocationIndex will be generated according to the DistMat.
The ScoreIndex and LocationIndex will be generated according to the anchor-groundtruth IOU.
The rest anchors would not contibute to the RPN training loss
ScoreIndex is composed of foreground anchor indexes(positive labels) and
......
......@@ -55,15 +55,19 @@ for _OP in set(__auto__):
globals()[_OP] = generate_layer_fn(_OP)
def rpn_target_assign(loc,
scores,
def rpn_target_assign(bbox_pred,
cls_logits,
anchor_box,
anchor_var,
gt_box,
gt_boxes,
is_crowd,
im_info,
rpn_batch_size_per_im=256,
fg_fraction=0.25,
rpn_straddle_thresh=0.0,
rpn_fg_fraction=0.5,
rpn_positive_overlap=0.7,
rpn_negative_overlap=0.3):
rpn_negative_overlap=0.3,
use_random=True):
"""
** Target Assign Layer for region proposal network (RPN) in Faster-RCNN detection. **
......@@ -83,14 +87,13 @@ def rpn_target_assign(loc,
the positive anchors.
Args:
loc(Variable): A 3-D Tensor with shape [N, M, 4] represents the
bbox_pred(Variable): A 3-D Tensor with shape [N, M, 4] represents the
predicted locations of M bounding bboxes. N is the batch size,
and each bounding box has four coordinate values and the layout
is [xmin, ymin, xmax, ymax].
scores(Variable): A 3-D Tensor with shape [N, M, C] represents the
predicted confidence predictions. N is the batch size, C is the
class number, M is number of bounding boxes. For each category
there are total M scores which corresponding M bounding boxes.
cls_logits(Variable): A 3-D Tensor with shape [N, M, 1] represents the
predicted confidence predictions. N is the batch size, 1 is the
frontground and background sigmoid, M is number of bounding boxes.
anchor_box(Variable): A 2-D Tensor with shape [M, 4] holds M boxes,
each box is represented as [xmin, ymin, xmax, ymax],
[xmin, ymin] is the left top coordinate of the anchor box,
......@@ -99,11 +102,16 @@ def rpn_target_assign(loc,
coordinate of the anchor box.
anchor_var(Variable): A 2-D Tensor with shape [M,4] holds expanded
variances of anchors.
gt_box (Variable): The ground-truth boudding boxes (bboxes) are a 2D
gt_boxes (Variable): The ground-truth boudding boxes (bboxes) are a 2D
LoDTensor with shape [Ng, 4], Ng is the total number of ground-truth
bboxes of mini-batch input.
is_crowd (Variable): A 1-D LoDTensor which indicates groud-truth is crowd.
im_info (Variable): A 2-D LoDTensor with shape [N, 3]. N is the batch size,
3 is the height, width and scale.
rpn_batch_size_per_im(int): Total number of RPN examples per image.
fg_fraction(float): Target fraction of RoI minibatch that is labeled
rpn_straddle_thresh(float): Remove RPN anchors that go outside the image
by straddle_thresh pixels.
rpn_fg_fraction(float): Target fraction of RoI minibatch that is labeled
foreground (i.e. class > 0), 0-th class is background.
rpn_positive_overlap(float): Minimum overlap required between an anchor
and ground-truth box for the (anchor, gt box) pair to be a positive
......@@ -129,45 +137,48 @@ def rpn_target_assign(loc,
Examples:
.. code-block:: python
loc = layers.data(name='location', shape=[2, 80],
bbox_pred = layers.data(name='bbox_pred', shape=[100, 4],
append_batch_size=False, dtype='float32')
scores = layers.data(name='scores', shape=[2, 40],
cls_logits = layers.data(name='cls_logits', shape=[100, 1],
append_batch_size=False, dtype='float32')
anchor_box = layers.data(name='anchor_box', shape=[20, 4],
append_batch_size=False, dtype='float32')
gt_box = layers.data(name='gt_box', shape=[10, 4],
gt_boxes = layers.data(name='gt_boxes', shape=[10, 4],
append_batch_size=False, dtype='float32')
loc_pred, score_pred, loc_target, score_target =
fluid.layers.detection_output(loc=location,
scores=scores,
fluid.layers.rpn_target_assign(bbox_pred=bbox_pred,
cls_logits=cls_logits,
anchor_box=anchor_box,
gt_box=gt_box)
gt_boxes=gt_boxes)
"""
helper = LayerHelper('rpn_target_assign', **locals())
# Compute overlaps between the prior boxes and the gt boxes overlaps
iou = iou_similarity(x=gt_box, y=anchor_box)
# Assign target label to anchors
loc_index = helper.create_tmp_variable(dtype='int32')
score_index = helper.create_tmp_variable(dtype='int32')
target_label = helper.create_tmp_variable(dtype='int64')
target_label = helper.create_tmp_variable(dtype='int32')
target_bbox = helper.create_tmp_variable(dtype=anchor_box.dtype)
helper.append_op(
type="rpn_target_assign",
inputs={'Anchor': anchor_box,
'GtBox': gt_box,
'DistMat': iou},
inputs={
'Anchor': anchor_box,
'GtBoxes': gt_boxes,
'IsCrowd': is_crowd,
'ImInfo': im_info
},
outputs={
'LocationIndex': loc_index,
'ScoreIndex': score_index,
'TargetLabel': target_label,
'TargetBBox': target_bbox,
'TargetBBox': target_bbox
},
attrs={
'rpn_batch_size_per_im': rpn_batch_size_per_im,
'rpn_straddle_thresh': rpn_straddle_thresh,
'rpn_positive_overlap': rpn_positive_overlap,
'rpn_negative_overlap': rpn_negative_overlap,
'fg_fraction': fg_fraction
'rpn_fg_fraction': rpn_fg_fraction,
'use_random': use_random
})
loc_index.stop_gradient = True
......@@ -175,12 +186,12 @@ def rpn_target_assign(loc,
target_label.stop_gradient = True
target_bbox.stop_gradient = True
scores = nn.reshape(x=scores, shape=(-1, 1))
loc = nn.reshape(x=loc, shape=(-1, 4))
predicted_scores = nn.gather(scores, score_index)
predicted_location = nn.gather(loc, loc_index)
cls_logits = nn.reshape(x=cls_logits, shape=(-1, 1))
bbox_pred = nn.reshape(x=bbox_pred, shape=(-1, 4))
predicted_cls_logits = nn.gather(cls_logits, score_index)
predicted_bbox_pred = nn.gather(bbox_pred, loc_index)
return predicted_scores, predicted_location, target_label, target_bbox
return predicted_cls_logits, predicted_bbox_pred, target_label, target_bbox
def detection_output(loc,
......@@ -1258,15 +1269,17 @@ def anchor_generator(input,
def generate_proposal_labels(rpn_rois,
gt_classes,
is_crowd,
gt_boxes,
im_scales,
im_info,
batch_size_per_im=256,
fg_fraction=0.25,
fg_thresh=0.25,
bg_thresh_hi=0.5,
bg_thresh_lo=0.0,
bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
class_nums=None):
class_nums=None,
use_random=True):
"""
** Generate proposal labels Faster-RCNN **
TODO(buxingyuan): Add Document
......@@ -1285,8 +1298,9 @@ def generate_proposal_labels(rpn_rois,
inputs={
'RpnRois': rpn_rois,
'GtClasses': gt_classes,
'IsCrowd': is_crowd,
'GtBoxes': gt_boxes,
'ImScales': im_scales
'ImInfo': im_info
},
outputs={
'Rois': rois,
......@@ -1302,7 +1316,8 @@ def generate_proposal_labels(rpn_rois,
'bg_thresh_hi': bg_thresh_hi,
'bg_thresh_lo': bg_thresh_lo,
'bbox_reg_weights': bbox_reg_weights,
'class_nums': class_nums
'class_nums': class_nums,
'use_random': use_random
})
rois.stop_gradient = True
......
......@@ -148,51 +148,60 @@ class TestAnchorGenerator(unittest.TestCase):
class TestGenerateProposalLabels(unittest.TestCase):
def test_generate_proposal_labels(self):
rpn_rois = layers.data(
name='rpn_rois',
shape=[4, 4],
dtype='float32',
lod_level=1,
append_batch_size=False)
gt_classes = layers.data(
name='gt_classes',
shape=[6],
dtype='int32',
lod_level=1,
append_batch_size=False)
gt_boxes = layers.data(
name='gt_boxes',
shape=[6, 4],
dtype='float32',
lod_level=1,
append_batch_size=False)
im_scales = layers.data(
name='im_scales',
shape=[1],
dtype='float32',
lod_level=1,
append_batch_size=False)
class_nums = 5
rois, labels_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights = fluid.layers.generate_proposal_labels(
rpn_rois=rpn_rois,
gt_classes=gt_classes,
gt_boxes=gt_boxes,
im_scales=im_scales,
batch_size_per_im=2,
fg_fraction=0.5,
fg_thresh=0.5,
bg_thresh_hi=0.5,
bg_thresh_lo=0.0,
bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
class_nums=class_nums)
assert rois.shape[1] == 4
assert rois.shape[0] == labels_int32.shape[0]
assert rois.shape[0] == bbox_targets.shape[0]
assert rois.shape[0] == bbox_inside_weights.shape[0]
assert rois.shape[0] == bbox_outside_weights.shape[0]
assert bbox_targets.shape[1] == 4 * class_nums
assert bbox_inside_weights.shape[1] == 4 * class_nums
assert bbox_outside_weights.shape[1] == 4 * class_nums
program = Program()
with program_guard(program):
rpn_rois = layers.data(
name='rpn_rois',
shape=[4, 4],
dtype='float32',
lod_level=1,
append_batch_size=False)
gt_classes = layers.data(
name='gt_classes',
shape=[6],
dtype='int32',
lod_level=1,
append_batch_size=False)
is_crowd = layers.data(
name='is_crowd',
shape=[6],
dtype='int32',
lod_level=1,
append_batch_size=False)
gt_boxes = layers.data(
name='gt_boxes',
shape=[6, 4],
dtype='float32',
lod_level=1,
append_batch_size=False)
im_info = layers.data(
name='im_info',
shape=[1, 3],
dtype='float32',
lod_level=1,
append_batch_size=False)
class_nums = 5
rois, labels_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights = fluid.layers.generate_proposal_labels(
rpn_rois=rpn_rois,
gt_classes=gt_classes,
is_crowd=is_crowd,
gt_boxes=gt_boxes,
im_info=im_info,
batch_size_per_im=2,
fg_fraction=0.5,
fg_thresh=0.5,
bg_thresh_hi=0.5,
bg_thresh_lo=0.0,
bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
class_nums=class_nums)
assert rois.shape[1] == 4
assert rois.shape[0] == labels_int32.shape[0]
assert rois.shape[0] == bbox_targets.shape[0]
assert rois.shape[0] == bbox_inside_weights.shape[0]
assert rois.shape[0] == bbox_outside_weights.shape[0]
assert bbox_targets.shape[1] == 4 * class_nums
assert bbox_inside_weights.shape[1] == 4 * class_nums
assert bbox_outside_weights.shape[1] == 4 * class_nums
class TestMultiBoxHead(unittest.TestCase):
......@@ -254,18 +263,18 @@ class TestRpnTargetAssign(unittest.TestCase):
def test_rpn_target_assign(self):
program = Program()
with program_guard(program):
loc_shape = [10, 50, 4]
score_shape = [10, 50, 2]
bbox_pred_shape = [10, 50, 4]
cls_logits_shape = [10, 50, 2]
anchor_shape = [50, 4]
loc = layers.data(
name='loc',
shape=loc_shape,
bbox_pred = layers.data(
name='bbox_pred',
shape=bbox_pred_shape,
append_batch_size=False,
dtype='float32')
scores = layers.data(
name='scores',
shape=score_shape,
cls_logits = layers.data(
name='cls_logits',
shape=cls_logits_shape,
append_batch_size=False,
dtype='float32')
anchor_box = layers.data(
......@@ -278,17 +287,31 @@ class TestRpnTargetAssign(unittest.TestCase):
shape=anchor_shape,
append_batch_size=False,
dtype='float32')
gt_box = layers.data(
name='gt_box', shape=[4], lod_level=1, dtype='float32')
gt_boxes = layers.data(
name='gt_boxes', shape=[4], lod_level=1, dtype='float32')
is_crowd = layers.data(
name='is_crowd',
shape=[10],
dtype='int32',
lod_level=1,
append_batch_size=False)
im_info = layers.data(
name='im_info',
shape=[1, 3],
dtype='float32',
lod_level=1,
append_batch_size=False)
pred_scores, pred_loc, tgt_lbl, tgt_bbox = layers.rpn_target_assign(
loc=loc,
scores=scores,
bbox_pred=bbox_pred,
cls_logits=cls_logits,
anchor_box=anchor_box,
anchor_var=anchor_var,
gt_box=gt_box,
gt_boxes=gt_boxes,
is_crowd=is_crowd,
im_info=im_info,
rpn_batch_size_per_im=256,
fg_fraction=0.25,
rpn_straddle_thresh=0.0,
rpn_fg_fraction=0.5,
rpn_positive_overlap=0.7,
rpn_negative_overlap=0.3)
......
......@@ -20,10 +20,10 @@ import paddle.fluid as fluid
from op_test import OpTest
def generate_proposal_labels_in_python(
rpn_rois, gt_classes, gt_boxes, im_scales, batch_size_per_im,
fg_fraction, fg_thresh, bg_thresh_hi, bg_thresh_lo, bbox_reg_weights,
class_nums):
def generate_proposal_labels_in_python(rpn_rois, gt_classes, is_crowd, gt_boxes,
im_info, batch_size_per_im, fg_fraction,
fg_thresh, bg_thresh_hi, bg_thresh_lo,
bbox_reg_weights, class_nums):
rois = []
labels_int32 = []
bbox_targets = []
......@@ -31,13 +31,13 @@ def generate_proposal_labels_in_python(
bbox_outside_weights = []
lod = []
assert len(rpn_rois) == len(
im_scales), 'batch size of rpn_rois and ground_truth is not matched'
im_info), 'batch size of rpn_rois and ground_truth is not matched'
for im_i in range(len(im_scales)):
for im_i in range(len(im_info)):
frcn_blobs = _sample_rois(
rpn_rois[im_i], gt_classes[im_i], gt_boxes[im_i], im_scales[im_i],
batch_size_per_im, fg_fraction, fg_thresh, bg_thresh_hi,
bg_thresh_lo, bbox_reg_weights, class_nums)
rpn_rois[im_i], gt_classes[im_i], is_crowd[im_i], gt_boxes[im_i],
im_info[im_i], batch_size_per_im, fg_fraction, fg_thresh,
bg_thresh_hi, bg_thresh_lo, bbox_reg_weights, class_nums)
lod.append(frcn_blobs['rois'].shape[0])
......@@ -50,13 +50,14 @@ def generate_proposal_labels_in_python(
return rois, labels_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights, lod
def _sample_rois(rpn_rois, gt_classes, gt_boxes, im_scale, batch_size_per_im,
fg_fraction, fg_thresh, bg_thresh_hi, bg_thresh_lo,
bbox_reg_weights, class_nums):
def _sample_rois(rpn_rois, gt_classes, is_crowd, gt_boxes, im_info,
batch_size_per_im, fg_fraction, fg_thresh, bg_thresh_hi,
bg_thresh_lo, bbox_reg_weights, class_nums):
rois_per_image = int(batch_size_per_im)
fg_rois_per_im = int(np.round(fg_fraction * rois_per_image))
# Roidb
im_scale = im_info[2]
inv_im_scale = 1. / im_scale
rpn_rois = rpn_rois * inv_im_scale
......@@ -78,6 +79,9 @@ def _sample_rois(rpn_rois, gt_classes, gt_boxes, im_scale, batch_size_per_im,
box_to_gt_ind_map[overlapped_boxes_ind] = overlaps_argmax[
overlapped_boxes_ind]
crowd_ind = np.where(is_crowd)[0]
gt_overlaps[crowd_ind] = -1
max_overlaps = gt_overlaps.max(axis=1)
max_classes = gt_overlaps.argmax(axis=1)
......@@ -85,9 +89,10 @@ def _sample_rois(rpn_rois, gt_classes, gt_boxes, im_scale, batch_size_per_im,
fg_inds = np.where(max_overlaps >= fg_thresh)[0]
fg_rois_per_this_image = np.minimum(fg_rois_per_im, fg_inds.shape[0])
# Sample foreground if there are too many
if fg_inds.shape[0] > fg_rois_per_this_image:
fg_inds = np.random.choice(
fg_inds, size=fg_rois_per_this_image, replace=False)
# if fg_inds.shape[0] > fg_rois_per_this_image:
# fg_inds = np.random.choice(
# fg_inds, size=fg_rois_per_this_image, replace=False)
fg_inds = fg_inds[:fg_rois_per_this_image]
# Background
bg_inds = np.where((max_overlaps < bg_thresh_hi) & (max_overlaps >=
......@@ -96,9 +101,10 @@ def _sample_rois(rpn_rois, gt_classes, gt_boxes, im_scale, batch_size_per_im,
bg_rois_per_this_image = np.minimum(bg_rois_per_this_image,
bg_inds.shape[0])
# Sample background if there are too many
if bg_inds.shape[0] > bg_rois_per_this_image:
bg_inds = np.random.choice(
bg_inds, size=bg_rois_per_this_image, replace=False)
# if bg_inds.shape[0] > bg_rois_per_this_image:
# bg_inds = np.random.choice(
# bg_inds, size=bg_rois_per_this_image, replace=False)
bg_inds = bg_inds[:bg_rois_per_this_image]
keep_inds = np.append(fg_inds, bg_inds)
sampled_labels = max_classes[keep_inds]
......@@ -208,8 +214,9 @@ class TestGenerateProposalLabelsOp(OpTest):
self.inputs = {
'RpnRois': (self.rpn_rois[0], self.rpn_rois_lod),
'GtClasses': (self.gt_classes[0], self.gts_lod),
'IsCrowd': (self.is_crowd[0], self.gts_lod),
'GtBoxes': (self.gt_boxes[0], self.gts_lod),
'ImScales': self.im_scales[0]
'ImInfo': self.im_info
}
self.attrs = {
'batch_size_per_im': self.batch_size_per_im,
......@@ -218,14 +225,15 @@ class TestGenerateProposalLabelsOp(OpTest):
'bg_thresh_hi': self.bg_thresh_hi,
'bg_thresh_lo': self.bg_thresh_lo,
'bbox_reg_weights': self.bbox_reg_weights,
'class_nums': self.class_nums
'class_nums': self.class_nums,
'use_random': False
}
self.outputs = {
'Rois': (self.rois[0], [self.lod]),
'LabelsInt32': (self.labels_int32[0], [self.lod]),
'BboxTargets': (self.bbox_targets[0], [self.lod]),
'BboxInsideWeights': (self.bbox_inside_weights[0], [self.lod]),
'BboxOutsideWeights': (self.bbox_outside_weights[0], [self.lod]),
'Rois': (self.rois, [self.lod]),
'LabelsInt32': (self.labels_int32, [self.lod]),
'BboxTargets': (self.bbox_targets, [self.lod]),
'BboxInsideWeights': (self.bbox_inside_weights, [self.lod]),
'BboxOutsideWeights': (self.bbox_outside_weights, [self.lod]),
}
def test_check_output(self):
......@@ -236,8 +244,8 @@ class TestGenerateProposalLabelsOp(OpTest):
self.set_data()
def init_test_params(self):
self.batch_size_per_im = 10
self.fg_fraction = 1.0
self.batch_size_per_im = 512
self.fg_fraction = 0.25
self.fg_thresh = 0.5
self.bg_thresh_hi = 0.5
self.bg_thresh_lo = 0.0
......@@ -246,14 +254,14 @@ class TestGenerateProposalLabelsOp(OpTest):
def init_test_input(self):
np.random.seed(0)
image_nums = 1
gt_nums = 6 # Keep same with batch_size_per_im for unittest
proposal_nums = self.batch_size_per_im - gt_nums
images_shape = []
self.im_scales = []
for i in range(image_nums):
images_shape.append(np.random.randint(200, size=2))
self.im_scales.append(np.ones((1)).astype(np.float32))
proposal_nums = 2000 #self.batch_size_per_im - gt_nums
images_shape = [[64, 64]]
self.im_info = np.ones((len(images_shape), 3)).astype(np.float32)
for i in range(len(images_shape)):
self.im_info[i, 0] = images_shape[i][0]
self.im_info[i, 1] = images_shape[i][1]
self.im_info[i, 2] = 0.8 #scale
self.rpn_rois, self.rpn_rois_lod = _generate_proposals(images_shape,
proposal_nums)
......@@ -261,16 +269,23 @@ class TestGenerateProposalLabelsOp(OpTest):
images_shape, self.class_nums, gt_nums)
self.gt_classes = [gt['gt_classes'] for gt in ground_truth]
self.gt_boxes = [gt['boxes'] for gt in ground_truth]
self.is_crowd = [gt['is_crowd'] for gt in ground_truth]
def init_test_output(self):
self.rois, self.labels_int32, self.bbox_targets, \
self.bbox_inside_weights, self.bbox_outside_weights, \
self.lod = generate_proposal_labels_in_python(
self.rpn_rois, self.gt_classes, self.gt_boxes, self.im_scales,
self.rpn_rois, self.gt_classes, self.is_crowd, self.gt_boxes, self.im_info,
self.batch_size_per_im, self.fg_fraction,
self.fg_thresh, self.bg_thresh_hi, self.bg_thresh_lo,
self.bbox_reg_weights, self.class_nums
)
self.rois = np.vstack(self.rois)
self.labels_int32 = np.hstack(self.labels_int32)
self.labels_int32 = self.labels_int32[:, np.newaxis]
self.bbox_targets = np.vstack(self.bbox_targets)
self.bbox_inside_weights = np.vstack(self.bbox_inside_weights)
self.bbox_outside_weights = np.vstack(self.bbox_outside_weights)
def _generate_proposals(images_shape, proposal_nums):
......@@ -280,7 +295,7 @@ def _generate_proposals(images_shape, proposal_nums):
for i, image_shape in enumerate(images_shape):
proposals = _generate_boxes(image_shape, proposal_nums)
rpn_rois.append(proposals)
num_proposals += len(proposals)
num_proposals = len(proposals)
rpn_rois_lod.append(num_proposals)
return rpn_rois, [rpn_rois_lod]
......@@ -294,7 +309,11 @@ def _generate_groundtruth(images_shape, class_nums, gt_nums):
gt_classes = np.random.randint(
low=1, high=class_nums, size=gt_nums).astype(np.int32)
gt_boxes = _generate_boxes(image_shape, gt_nums)
ground_truth.append(dict(gt_classes=gt_classes, boxes=gt_boxes))
is_crowd = np.zeros((gt_nums), dtype=np.int32)
is_crowd[0] = 1
ground_truth.append(
dict(
gt_classes=gt_classes, boxes=gt_boxes, is_crowd=is_crowd))
num_gts += len(gt_classes)
gts_lod.append(num_gts)
return ground_truth, [gts_lod]
......
......@@ -114,10 +114,10 @@ def box_coder(all_anchors, bbox_deltas, variances):
#anchor_loc: width, height, center_x, center_y
anchor_loc = np.zeros_like(bbox_deltas, dtype=np.float32)
anchor_loc[:, 0] = all_anchors[:, 2] - all_anchors[:, 0]
anchor_loc[:, 1] = all_anchors[:, 3] - all_anchors[:, 1]
anchor_loc[:, 2] = (all_anchors[:, 2] + all_anchors[:, 0]) / 2
anchor_loc[:, 3] = (all_anchors[:, 3] + all_anchors[:, 1]) / 2
anchor_loc[:, 0] = all_anchors[:, 2] - all_anchors[:, 0] + 1
anchor_loc[:, 1] = all_anchors[:, 3] - all_anchors[:, 1] + 1
anchor_loc[:, 2] = all_anchors[:, 0] + 0.5 * anchor_loc[:, 0]
anchor_loc[:, 3] = all_anchors[:, 1] + 0.5 * anchor_loc[:, 1]
#predicted bbox: bbox_center_x, bbox_center_y, bbox_width, bbox_height
pred_bbox = np.zeros_like(bbox_deltas, dtype=np.float32)
......@@ -127,23 +127,29 @@ def box_coder(all_anchors, bbox_deltas, variances):
i, 0] + anchor_loc[i, 2]
pred_bbox[i, 1] = variances[i, 1] * bbox_deltas[i, 1] * anchor_loc[
i, 1] + anchor_loc[i, 3]
pred_bbox[i, 2] = math.exp(variances[i, 2] *
bbox_deltas[i, 2]) * anchor_loc[i, 0]
pred_bbox[i, 3] = math.exp(variances[i, 3] *
bbox_deltas[i, 3]) * anchor_loc[i, 1]
pred_bbox[i, 2] = math.exp(
min(variances[i, 2] * bbox_deltas[i, 2], math.log(
1000 / 16.0))) * anchor_loc[i, 0]
pred_bbox[i, 3] = math.exp(
min(variances[i, 3] * bbox_deltas[i, 3], math.log(
1000 / 16.0))) * anchor_loc[i, 1]
else:
for i in range(bbox_deltas.shape[0]):
pred_bbox[i, 0] = bbox_deltas[i, 0] * anchor_loc[i, 0] + anchor_loc[
i, 2]
pred_bbox[i, 1] = bbox_deltas[i, 1] * anchor_loc[i, 1] + anchor_loc[
i, 3]
pred_bbox[i, 2] = math.exp(bbox_deltas[i, 2]) * anchor_loc[i, 0]
pred_bbox[i, 3] = math.exp(bbox_deltas[i, 3]) * anchor_loc[i, 1]
pred_bbox[i, 2] = math.exp(
min(bbox_deltas[i, 2], math.log(1000 / 16.0))) * anchor_loc[i,
0]
pred_bbox[i, 3] = math.exp(
min(bbox_deltas[i, 3], math.log(1000 / 16.0))) * anchor_loc[i,
1]
proposals[:, 0] = pred_bbox[:, 0] - pred_bbox[:, 2] / 2
proposals[:, 1] = pred_bbox[:, 1] - pred_bbox[:, 3] / 2
proposals[:, 2] = pred_bbox[:, 0] + pred_bbox[:, 2] / 2
proposals[:, 3] = pred_bbox[:, 1] + pred_bbox[:, 3] / 2
proposals[:, 2] = pred_bbox[:, 0] + pred_bbox[:, 2] / 2 - 1
proposals[:, 3] = pred_bbox[:, 1] + pred_bbox[:, 3] / 2 - 1
return proposals
......@@ -170,13 +176,16 @@ def filter_boxes(boxes, min_size, im_info):
"""Only keep boxes with both sides >= min_size and center within the image.
"""
# Scale min_size to match image scale
min_size *= im_info[2]
im_scale = im_info[2]
min_size = max(min_size, 1.0)
ws = boxes[:, 2] - boxes[:, 0] + 1
hs = boxes[:, 3] - boxes[:, 1] + 1
ws_orig_scale = (boxes[:, 2] - boxes[:, 0]) / im_scale + 1
hs_orig_scale = (boxes[:, 3] - boxes[:, 1]) / im_scale + 1
x_ctr = boxes[:, 0] + ws / 2.
y_ctr = boxes[:, 1] + hs / 2.
keep = np.where((ws >= min_size) & (hs >= min_size) & (x_ctr < im_info[1]) &
(y_ctr < im_info[0]))[0]
keep = np.where((ws_orig_scale >= min_size) & (hs_orig_scale >= min_size) &
(x_ctr < im_info[1]) & (y_ctr < im_info[0]))[0]
return keep
......@@ -204,7 +213,7 @@ def iou(box_a, box_b):
xb = min(xmax_a, xmax_b)
yb = min(ymax_a, ymax_b)
inter_area = max(xb - xa, 0.0) * max(yb - ya, 0.0)
inter_area = max(xb - xa + 1, 0.0) * max(yb - ya + 1, 0.0)
iou_ratio = inter_area / (area_a + area_b - inter_area)
......
......@@ -19,48 +19,58 @@ import numpy as np
import paddle.fluid.core as core
from op_test import OpTest
from test_anchor_generator_op import anchor_generator_in_python
from test_generate_proposal_labels import _generate_groundtruth
from test_generate_proposal_labels import _bbox_overlaps, _box_to_delta
def rpn_target_assign(gt_anchor_iou, rpn_batch_size_per_im,
rpn_positive_overlap, rpn_negative_overlap, fg_fraction):
iou = np.transpose(gt_anchor_iou)
anchor_to_gt_max = iou.max(axis=1)
anchor_to_gt_argmax = iou.argmax(axis=1)
gt_to_anchor_argmax = iou.argmax(axis=0)
gt_to_anchor_max = iou[gt_to_anchor_argmax, np.arange(iou.shape[1])]
anchors_with_max_overlap = np.where(iou == gt_to_anchor_max)[0]
tgt_lbl = np.ones((iou.shape[0], ), dtype=np.int32) * -1
tgt_lbl[anchors_with_max_overlap] = 1
tgt_lbl[anchor_to_gt_max >= rpn_positive_overlap] = 1
num_fg = int(fg_fraction * rpn_batch_size_per_im)
fg_inds = np.where(tgt_lbl == 1)[0]
if len(fg_inds) > num_fg:
from test_generate_proposal_labels_op import _generate_groundtruth
from test_generate_proposal_labels_op import _bbox_overlaps, _box_to_delta
def rpn_target_assign(anchor_by_gt_overlap,
rpn_batch_size_per_im,
rpn_positive_overlap,
rpn_negative_overlap,
rpn_fg_fraction,
use_random=True):
anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1)
anchor_to_gt_max = anchor_by_gt_overlap[np.arange(
anchor_by_gt_overlap.shape[0]), anchor_to_gt_argmax]
gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0)
gt_to_anchor_max = anchor_by_gt_overlap[gt_to_anchor_argmax, np.arange(
anchor_by_gt_overlap.shape[1])]
anchors_with_max_overlap = np.where(
anchor_by_gt_overlap == gt_to_anchor_max)[0]
labels = np.ones((anchor_by_gt_overlap.shape[0], ), dtype=np.int32) * -1
labels[anchors_with_max_overlap] = 1
labels[anchor_to_gt_max >= rpn_positive_overlap] = 1
num_fg = int(rpn_fg_fraction * rpn_batch_size_per_im)
fg_inds = np.where(labels == 1)[0]
if len(fg_inds) > num_fg and use_random:
disable_inds = np.random.choice(
fg_inds, size=(len(fg_inds) - num_fg), replace=False)
tgt_lbl[disable_inds] = -1
fg_inds = np.where(tgt_lbl == 1)[0]
else:
disable_inds = fg_inds[num_fg:]
labels[disable_inds] = -1
fg_inds = np.where(labels == 1)[0]
num_bg = rpn_batch_size_per_im - np.sum(tgt_lbl == 1)
num_bg = rpn_batch_size_per_im - np.sum(labels == 1)
bg_inds = np.where(anchor_to_gt_max < rpn_negative_overlap)[0]
tgt_lbl[bg_inds] = 0
if len(bg_inds) > num_bg:
if len(bg_inds) > num_bg and use_random:
enable_inds = bg_inds[np.random.randint(len(bg_inds), size=num_bg)]
tgt_lbl[enable_inds] = 0
bg_inds = np.where(tgt_lbl == 0)[0]
tgt_lbl[bg_inds] = 0
else:
enable_inds = bg_inds[:num_bg]
labels[enable_inds] = 0
fg_inds = np.where(labels == 1)[0]
bg_inds = np.where(labels == 0)[0]
loc_index = fg_inds
score_index = np.hstack((fg_inds, bg_inds))
tgt_lbl = np.expand_dims(tgt_lbl, axis=1)
labels = labels[score_index]
assert not np.any(labels == -1), "Wrong labels with -1"
gt_inds = anchor_to_gt_argmax[fg_inds]
return loc_index, score_index, tgt_lbl, gt_inds
return loc_index, score_index, labels, gt_inds
def get_anchor(n, c, h, w):
......@@ -75,85 +85,129 @@ def get_anchor(n, c, h, w):
return anchors
def rpn_blob(anchor, gt_boxes, iou, lod, rpn_batch_size_per_im,
rpn_positive_overlap, rpn_negative_overlap, fg_fraction):
loc_indexes = []
score_indexes = []
tmp_tgt_labels = []
tgt_bboxes = []
anchor_num = anchor.shape[0]
def rpn_target_assign_in_python(all_anchors,
gt_boxes,
is_crowd,
im_info,
lod,
rpn_straddle_thresh,
rpn_batch_size_per_im,
rpn_positive_overlap,
rpn_negative_overlap,
rpn_fg_fraction,
use_random=True):
anchor_num = all_anchors.shape[0]
batch_size = len(lod) - 1
for i in range(batch_size):
im_height = im_info[i][0]
im_width = im_info[i][1]
im_scale = im_info[i][2]
if rpn_straddle_thresh >= 0:
# Only keep anchors inside the image by a margin of straddle_thresh
inds_inside = np.where(
(all_anchors[:, 0] >= -rpn_straddle_thresh) &
(all_anchors[:, 1] >= -rpn_straddle_thresh) & (
all_anchors[:, 2] < im_width + rpn_straddle_thresh) & (
all_anchors[:, 3] < im_height + rpn_straddle_thresh))[0]
# keep only inside anchors
inside_anchors = all_anchors[inds_inside, :]
else:
inds_inside = np.arange(all_anchors.shape[0])
inside_anchors = all_anchors
b, e = lod[i], lod[i + 1]
iou_slice = iou[b:e, :]
bboxes_slice = gt_boxes[b:e, :]
gt_boxes_slice = gt_boxes[b:e, :] * im_scale
is_crowd_slice = is_crowd[b:e]
loc_idx, score_idx, tgt_lbl, gt_inds = rpn_target_assign(
iou_slice, rpn_batch_size_per_im, rpn_positive_overlap,
rpn_negative_overlap, fg_fraction)
not_crowd_inds = np.where(is_crowd_slice == 0)[0]
gt_boxes_slice = gt_boxes_slice[not_crowd_inds]
iou = _bbox_overlaps(inside_anchors, gt_boxes_slice)
fg_bboxes = bboxes_slice[gt_inds]
fg_anchors = anchor[loc_idx]
box_deltas = _box_to_delta(fg_anchors, fg_bboxes, [1., 1., 1., 1.])
loc_inds, score_inds, labels, gt_inds = rpn_target_assign(
iou, rpn_batch_size_per_im, rpn_positive_overlap,
rpn_negative_overlap, rpn_fg_fraction, use_random)
# unmap to all anchor
loc_inds = inds_inside[loc_inds]
score_inds = inds_inside[score_inds]
sampled_gt = gt_boxes_slice[gt_inds]
sampled_anchor = all_anchors[loc_inds]
box_deltas = _box_to_delta(sampled_anchor, sampled_gt, [1., 1., 1., 1.])
if i == 0:
loc_indexes = loc_idx
score_indexes = score_idx
tmp_tgt_labels = tgt_lbl
loc_indexes = loc_inds
score_indexes = score_inds
tgt_labels = labels
tgt_bboxes = box_deltas
else:
loc_indexes = np.concatenate(
[loc_indexes, loc_idx + i * anchor_num])
[loc_indexes, loc_inds + i * anchor_num])
score_indexes = np.concatenate(
[score_indexes, score_idx + i * anchor_num])
tmp_tgt_labels = np.concatenate([tmp_tgt_labels, tgt_lbl])
[score_indexes, score_inds + i * anchor_num])
tgt_labels = np.concatenate([tgt_labels, labels])
tgt_bboxes = np.vstack([tgt_bboxes, box_deltas])
tgt_labels = tmp_tgt_labels[score_indexes]
return loc_indexes, score_indexes, tgt_bboxes, tgt_labels
class TestRpnTargetAssignOp(OpTest):
def setUp(self):
n, c, h, w = 2, 4, 14, 14
anchor = get_anchor(n, c, h, w)
all_anchors = get_anchor(n, c, h, w)
gt_num = 10
anchor = anchor.reshape(-1, 4)
anchor_num = anchor.shape[0]
im_shapes = [[64, 64], [64, 64]]
gt_box, lod = _generate_groundtruth(im_shapes, 3, 4)
bbox = np.vstack([v['boxes'] for v in gt_box])
iou = _bbox_overlaps(bbox, anchor)
anchor = anchor.astype('float32')
bbox = bbox.astype('float32')
iou = iou.astype('float32')
loc_index, score_index, tgt_bbox, tgt_lbl = rpn_blob(
anchor, bbox, iou, [0, 4, 8], 25600, 0.95, 0.03, 0.25)
all_anchors = all_anchors.reshape(-1, 4)
anchor_num = all_anchors.shape[0]
images_shape = [[64, 64], [64, 64]]
#images_shape = [[64, 64]]
groundtruth, lod = _generate_groundtruth(images_shape, 3, 4)
lod = [0, 4, 8]
#lod = [0, 4]
im_info = np.ones((len(images_shape), 3)).astype(np.float32)
for i in range(len(images_shape)):
im_info[i, 0] = images_shape[i][0]
im_info[i, 1] = images_shape[i][1]
im_info[i, 2] = 0.8 #scale
gt_boxes = np.vstack([v['boxes'] for v in groundtruth])
is_crowd = np.hstack([v['is_crowd'] for v in groundtruth])
all_anchors = all_anchors.astype('float32')
gt_boxes = gt_boxes.astype('float32')
rpn_straddle_thresh = 0.0
rpn_batch_size_per_im = 256
rpn_positive_overlap = 0.7
rpn_negative_overlap = 0.3
rpn_fg_fraction = 0.5
use_random = False
loc_index, score_index, tgt_bbox, labels = rpn_target_assign_in_python(
all_anchors, gt_boxes, is_crowd, im_info, lod, rpn_straddle_thresh,
rpn_batch_size_per_im, rpn_positive_overlap, rpn_negative_overlap,
rpn_fg_fraction, use_random)
labels = labels[:, np.newaxis]
self.op_type = "rpn_target_assign"
self.inputs = {
'Anchor': anchor,
'GtBox': (bbox, [[4, 4]]),
'DistMat': (iou, [[4, 4]]),
'Anchor': all_anchors,
'GtBoxes': (gt_boxes, [[4, 4]]),
'IsCrowd': (is_crowd, [[4, 4]]),
'ImInfo': (im_info, [[1, 1]])
}
self.attrs = {
'rpn_batch_size_per_im': 25600,
'rpn_positive_overlap': 0.95,
'rpn_negative_overlap': 0.03,
'fg_fraction': 0.25,
'fix_seed': True
'rpn_batch_size_per_im': rpn_batch_size_per_im,
'rpn_straddle_thresh': rpn_straddle_thresh,
'rpn_positive_overlap': rpn_positive_overlap,
'rpn_negative_overlap': rpn_negative_overlap,
'rpn_fg_fraction': rpn_fg_fraction,
'use_random': use_random
}
self.outputs = {
'LocationIndex': loc_index.astype('int32'),
'ScoreIndex': score_index.astype('int32'),
'TargetBBox': tgt_bbox.astype('float32'),
'TargetLabel': tgt_lbl.astype('int64'),
'TargetLabel': labels.astype('int32')
}
def test_check_output(self):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册