提交 9e2e893f 编写于 作者: X Xingyuan Bu 提交者: qingqing01

Enhence generate_proposal_labels_op and fix some bug. (#13239)

* Enhence generate_proposal_labels_op
* Fix bug in generate_proposals_op
* Refine rpn_target_assign_op.
* by Bu Xingyuan, Wang Guanzhong and Dang Qingqing
上级 23ec966c
...@@ -305,9 +305,9 @@ paddle.fluid.layers.target_assign ArgSpec(args=['input', 'matched_indices', 'neg ...@@ -305,9 +305,9 @@ paddle.fluid.layers.target_assign ArgSpec(args=['input', 'matched_indices', 'neg
paddle.fluid.layers.detection_output ArgSpec(args=['loc', 'scores', 'prior_box', 'prior_box_var', 'background_label', 'nms_threshold', 'nms_top_k', 'keep_top_k', 'score_threshold', 'nms_eta'], varargs=None, keywords=None, defaults=(0, 0.3, 400, 200, 0.01, 1.0)) paddle.fluid.layers.detection_output ArgSpec(args=['loc', 'scores', 'prior_box', 'prior_box_var', 'background_label', 'nms_threshold', 'nms_top_k', 'keep_top_k', 'score_threshold', 'nms_eta'], varargs=None, keywords=None, defaults=(0, 0.3, 400, 200, 0.01, 1.0))
paddle.fluid.layers.ssd_loss ArgSpec(args=['location', 'confidence', 'gt_box', 'gt_label', 'prior_box', 'prior_box_var', 'background_label', 'overlap_threshold', 'neg_pos_ratio', 'neg_overlap', 'loc_loss_weight', 'conf_loss_weight', 'match_type', 'mining_type', 'normalize', 'sample_size'], varargs=None, keywords=None, defaults=(None, 0, 0.5, 3.0, 0.5, 1.0, 1.0, 'per_prediction', 'max_negative', True, None)) paddle.fluid.layers.ssd_loss ArgSpec(args=['location', 'confidence', 'gt_box', 'gt_label', 'prior_box', 'prior_box_var', 'background_label', 'overlap_threshold', 'neg_pos_ratio', 'neg_overlap', 'loc_loss_weight', 'conf_loss_weight', 'match_type', 'mining_type', 'normalize', 'sample_size'], varargs=None, keywords=None, defaults=(None, 0, 0.5, 3.0, 0.5, 1.0, 1.0, 'per_prediction', 'max_negative', True, None))
paddle.fluid.layers.detection_map ArgSpec(args=['detect_res', 'label', 'class_num', 'background_label', 'overlap_threshold', 'evaluate_difficult', 'has_state', 'input_states', 'out_states', 'ap_version'], varargs=None, keywords=None, defaults=(0, 0.3, True, None, None, None, 'integral')) paddle.fluid.layers.detection_map ArgSpec(args=['detect_res', 'label', 'class_num', 'background_label', 'overlap_threshold', 'evaluate_difficult', 'has_state', 'input_states', 'out_states', 'ap_version'], varargs=None, keywords=None, defaults=(0, 0.3, True, None, None, None, 'integral'))
paddle.fluid.layers.rpn_target_assign ArgSpec(args=['loc', 'scores', 'anchor_box', 'anchor_var', 'gt_box', 'rpn_batch_size_per_im', 'fg_fraction', 'rpn_positive_overlap', 'rpn_negative_overlap'], varargs=None, keywords=None, defaults=(256, 0.25, 0.7, 0.3)) paddle.fluid.layers.rpn_target_assign ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'is_crowd', 'im_info', 'rpn_batch_size_per_im', 'rpn_straddle_thresh', 'rpn_fg_fraction', 'rpn_positive_overlap', 'rpn_negative_overlap', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.0, 0.5, 0.7, 0.3, True))
paddle.fluid.layers.anchor_generator ArgSpec(args=['input', 'anchor_sizes', 'aspect_ratios', 'variance', 'stride', 'offset', 'name'], varargs=None, keywords=None, defaults=(None, None, [0.1, 0.1, 0.2, 0.2], None, 0.5, None)) paddle.fluid.layers.anchor_generator ArgSpec(args=['input', 'anchor_sizes', 'aspect_ratios', 'variance', 'stride', 'offset', 'name'], varargs=None, keywords=None, defaults=(None, None, [0.1, 0.1, 0.2, 0.2], None, 0.5, None))
paddle.fluid.layers.generate_proposal_labels ArgSpec(args=['rpn_rois', 'gt_classes', 'gt_boxes', 'im_scales', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None)) paddle.fluid.layers.generate_proposal_labels ArgSpec(args=['rpn_rois', 'gt_classes', 'is_crowd', 'gt_boxes', 'im_info', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None, True))
paddle.fluid.layers.generate_proposals ArgSpec(args=['scores', 'bbox_deltas', 'im_info', 'anchors', 'variances', 'pre_nms_top_n', 'post_nms_top_n', 'nms_thresh', 'min_size', 'eta', 'name'], varargs=None, keywords=None, defaults=(6000, 1000, 0.5, 0.1, 1.0, None)) paddle.fluid.layers.generate_proposals ArgSpec(args=['scores', 'bbox_deltas', 'im_info', 'anchors', 'variances', 'pre_nms_top_n', 'post_nms_top_n', 'nms_thresh', 'min_size', 'eta', 'name'], varargs=None, keywords=None, defaults=(6000, 1000, 0.5, 0.1, 1.0, None))
paddle.fluid.layers.iou_similarity ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) paddle.fluid.layers.iou_similarity ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.box_coder ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) paddle.fluid.layers.box_coder ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
......
...@@ -9,6 +9,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -9,6 +9,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <algorithm>
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor.h"
...@@ -21,7 +22,7 @@ namespace operators { ...@@ -21,7 +22,7 @@ namespace operators {
*/ */
template <typename T> template <typename T>
inline void BoxToDelta(const int box_num, const framework::Tensor& ex_boxes, inline void BoxToDelta(const int box_num, const framework::Tensor& ex_boxes,
const framework::Tensor& gt_boxes, const T* weights, const framework::Tensor& gt_boxes, const float* weights,
const bool normalized, framework::Tensor* box_delta) { const bool normalized, framework::Tensor* box_delta) {
auto ex_boxes_et = framework::EigenTensor<T, 2>::From(ex_boxes); auto ex_boxes_et = framework::EigenTensor<T, 2>::From(ex_boxes);
auto gt_boxes_et = framework::EigenTensor<T, 2>::From(gt_boxes); auto gt_boxes_et = framework::EigenTensor<T, 2>::From(gt_boxes);
...@@ -62,5 +63,35 @@ void Gather(const T* in, const int in_stride, const int* index, const int num, ...@@ -62,5 +63,35 @@ void Gather(const T* in, const int in_stride, const int* index, const int num,
} }
} }
template <typename T>
void BboxOverlaps(const framework::Tensor& r_boxes,
const framework::Tensor& c_boxes,
framework::Tensor* overlaps) {
auto r_boxes_et = framework::EigenTensor<T, 2>::From(r_boxes);
auto c_boxes_et = framework::EigenTensor<T, 2>::From(c_boxes);
auto overlaps_et = framework::EigenTensor<T, 2>::From(*overlaps);
int r_num = r_boxes.dims()[0];
int c_num = c_boxes.dims()[0];
auto zero = static_cast<T>(0.0);
T r_box_area, c_box_area, x_min, y_min, x_max, y_max, inter_w, inter_h,
inter_area;
for (int i = 0; i < r_num; ++i) {
r_box_area = (r_boxes_et(i, 2) - r_boxes_et(i, 0) + 1) *
(r_boxes_et(i, 3) - r_boxes_et(i, 1) + 1);
for (int j = 0; j < c_num; ++j) {
c_box_area = (c_boxes_et(j, 2) - c_boxes_et(j, 0) + 1) *
(c_boxes_et(j, 3) - c_boxes_et(j, 1) + 1);
x_min = std::max(r_boxes_et(i, 0), c_boxes_et(j, 0));
y_min = std::max(r_boxes_et(i, 1), c_boxes_et(j, 1));
x_max = std::min(r_boxes_et(i, 2), c_boxes_et(j, 2));
y_max = std::min(r_boxes_et(i, 3), c_boxes_et(j, 3));
inter_w = std::max(x_max - x_min + 1, zero);
inter_h = std::max(y_max - y_min + 1, zero);
inter_area = inter_w * inter_h;
overlaps_et(i, j) = inter_area / (r_box_area + c_box_area - inter_area);
}
}
}
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -42,10 +42,11 @@ class GenerateProposalLabelsOp : public framework::OperatorWithKernel { ...@@ -42,10 +42,11 @@ class GenerateProposalLabelsOp : public framework::OperatorWithKernel {
"Input(RpnRois) shouldn't be null."); "Input(RpnRois) shouldn't be null.");
PADDLE_ENFORCE(ctx->HasInput("GtClasses"), PADDLE_ENFORCE(ctx->HasInput("GtClasses"),
"Input(GtClasses) shouldn't be null."); "Input(GtClasses) shouldn't be null.");
PADDLE_ENFORCE(ctx->HasInput("IsCrowd"),
"Input(IsCrowd) shouldn't be null.");
PADDLE_ENFORCE(ctx->HasInput("GtBoxes"), PADDLE_ENFORCE(ctx->HasInput("GtBoxes"),
"Input(GtBoxes) shouldn't be null."); "Input(GtBoxes) shouldn't be null.");
PADDLE_ENFORCE(ctx->HasInput("ImScales"), PADDLE_ENFORCE(ctx->HasInput("ImInfo"), "Input(ImInfo) shouldn't be null.");
"Input(ImScales) shouldn't be null.");
PADDLE_ENFORCE(ctx->HasOutput("Rois"), PADDLE_ENFORCE(ctx->HasOutput("Rois"),
"Output(Rois) of RpnTargetAssignOp should not be null"); "Output(Rois) of RpnTargetAssignOp should not be null");
...@@ -64,22 +65,21 @@ class GenerateProposalLabelsOp : public framework::OperatorWithKernel { ...@@ -64,22 +65,21 @@ class GenerateProposalLabelsOp : public framework::OperatorWithKernel {
auto rpn_rois_dims = ctx->GetInputDim("RpnRois"); auto rpn_rois_dims = ctx->GetInputDim("RpnRois");
auto gt_classes_dims = ctx->GetInputDim("GtClasses"); auto gt_classes_dims = ctx->GetInputDim("GtClasses");
auto is_crowd_dims = ctx->GetInputDim("IsCrowd");
auto gt_boxes_dims = ctx->GetInputDim("GtBoxes"); auto gt_boxes_dims = ctx->GetInputDim("GtBoxes");
auto im_scales_dims = ctx->GetInputDim("ImScales"); auto im_info_dims = ctx->GetInputDim("ImInfo");
PADDLE_ENFORCE_EQ(rpn_rois_dims.size(), 2, PADDLE_ENFORCE_EQ(rpn_rois_dims.size(), 2,
"The rank of Input(RpnRois) must be 2."); "The rank of Input(RpnRois) must be 2.");
PADDLE_ENFORCE_EQ(gt_classes_dims.size(), 1,
"The rank of Input(GtClasses) must be 1.");
PADDLE_ENFORCE_EQ(gt_boxes_dims.size(), 2, PADDLE_ENFORCE_EQ(gt_boxes_dims.size(), 2,
"The rank of Input(GtBoxes) must be 2."); "The rank of Input(GtBoxes) must be 2.");
PADDLE_ENFORCE_EQ(im_scales_dims.size(), 1, PADDLE_ENFORCE_EQ(im_info_dims.size(), 2,
"The rank of Input(ImScales) must be 1."); "The rank of Input(ImInfo) must be 2.");
int class_nums = ctx->Attrs().Get<int>("class_nums"); int class_nums = ctx->Attrs().Get<int>("class_nums");
ctx->SetOutputDim("Rois", {-1, 4}); ctx->SetOutputDim("Rois", {-1, 4});
ctx->SetOutputDim("LabelsInt32", {-1}); ctx->SetOutputDim("LabelsInt32", {-1, 1});
ctx->SetOutputDim("BboxTargets", {-1, 4 * class_nums}); ctx->SetOutputDim("BboxTargets", {-1, 4 * class_nums});
ctx->SetOutputDim("BboxInsideWeights", {-1, 4 * class_nums}); ctx->SetOutputDim("BboxInsideWeights", {-1, 4 * class_nums});
ctx->SetOutputDim("BboxOutsideWeights", {-1, 4 * class_nums}); ctx->SetOutputDim("BboxOutsideWeights", {-1, 4 * class_nums});
...@@ -105,45 +105,18 @@ void Concat(const platform::CPUDeviceContext& context, ...@@ -105,45 +105,18 @@ void Concat(const platform::CPUDeviceContext& context,
concat_functor(context, inputs, axis, out_tensor); concat_functor(context, inputs, axis, out_tensor);
} }
template <typename T>
void BboxOverlaps(const Tensor& r_boxes, const Tensor& c_boxes,
Tensor* overlaps) {
auto r_boxes_et = framework::EigenTensor<T, 2>::From(r_boxes);
auto c_boxes_et = framework::EigenTensor<T, 2>::From(c_boxes);
auto overlaps_et = framework::EigenTensor<T, 2>::From(*overlaps);
int r_num = r_boxes.dims()[0];
int c_num = c_boxes.dims()[0];
auto zero = static_cast<T>(0.0);
T r_box_area, c_box_area, x_min, y_min, x_max, y_max, inter_w, inter_h,
inter_area;
for (int i = 0; i < r_num; ++i) {
r_box_area = (r_boxes_et(i, 2) - r_boxes_et(i, 0) + 1) *
(r_boxes_et(i, 3) - r_boxes_et(i, 1) + 1);
for (int j = 0; j < c_num; ++j) {
c_box_area = (c_boxes_et(j, 2) - c_boxes_et(j, 0) + 1) *
(c_boxes_et(j, 3) - c_boxes_et(j, 1) + 1);
x_min = std::max(r_boxes_et(i, 0), c_boxes_et(j, 0));
y_min = std::max(r_boxes_et(i, 1), c_boxes_et(j, 1));
x_max = std::min(r_boxes_et(i, 2), c_boxes_et(j, 2));
y_max = std::min(r_boxes_et(i, 3), c_boxes_et(j, 3));
inter_w = std::max(x_max - x_min + 1, zero);
inter_h = std::max(y_max - y_min + 1, zero);
inter_area = inter_w * inter_h;
overlaps_et(i, j) = inter_area / (r_box_area + c_box_area - inter_area);
}
}
}
template <typename T> template <typename T>
std::vector<std::vector<int>> SampleFgBgGt( std::vector<std::vector<int>> SampleFgBgGt(
const platform::CPUDeviceContext& context, Tensor* iou, const platform::CPUDeviceContext& context, Tensor* iou,
const int batch_size_per_im, const float fg_fraction, const float fg_thresh, const Tensor& is_crowd, const int batch_size_per_im,
const float bg_thresh_hi, const float bg_thresh_lo, const float fg_fraction, const float fg_thresh, const float bg_thresh_hi,
std::minstd_rand engine) { const float bg_thresh_lo, std::minstd_rand engine, const bool use_random) {
std::vector<int> fg_inds; std::vector<int> fg_inds;
std::vector<int> bg_inds; std::vector<int> bg_inds;
std::vector<int> gt_inds; std::vector<int> gt_inds;
T* proposal_to_gt_overlaps = iou->mutable_data<T>(context.GetPlace()); int64_t gt_num = is_crowd.numel();
const int* crowd_data = is_crowd.data<int>();
T* proposal_to_gt_overlaps = iou->data<T>();
int64_t row = iou->dims()[0]; int64_t row = iou->dims()[0];
int64_t col = iou->dims()[1]; int64_t col = iou->dims()[1];
float epsilon = 0.00001; float epsilon = 0.00001;
...@@ -152,6 +125,9 @@ std::vector<std::vector<int>> SampleFgBgGt( ...@@ -152,6 +125,9 @@ std::vector<std::vector<int>> SampleFgBgGt(
for (int64_t i = 0; i < row; ++i) { for (int64_t i = 0; i < row; ++i) {
const T* v = proposal_to_gt_overlaps + i * col; const T* v = proposal_to_gt_overlaps + i * col;
T max_overlap = *std::max_element(v, v + col); T max_overlap = *std::max_element(v, v + col);
if ((i < gt_num) && (crowd_data[i])) {
max_overlap = -1.0;
}
if (max_overlap > fg_thresh) { if (max_overlap > fg_thresh) {
for (int64_t j = 0; j < col; ++j) { for (int64_t j = 0; j < col; ++j) {
T val = proposal_to_gt_overlaps[i * col + j]; T val = proposal_to_gt_overlaps[i * col + j];
...@@ -170,17 +146,19 @@ std::vector<std::vector<int>> SampleFgBgGt( ...@@ -170,17 +146,19 @@ std::vector<std::vector<int>> SampleFgBgGt(
} }
// Reservoir Sampling // Reservoir Sampling
std::uniform_real_distribution<float> uniform(0, 1);
int fg_rois_per_im = std::floor(batch_size_per_im * fg_fraction); int fg_rois_per_im = std::floor(batch_size_per_im * fg_fraction);
int fg_rois_this_image = fg_inds.size(); int fg_rois_this_image = fg_inds.size();
int fg_rois_per_this_image = std::min(fg_rois_per_im, fg_rois_this_image); int fg_rois_per_this_image = std::min(fg_rois_per_im, fg_rois_this_image);
std::uniform_real_distribution<float> uniform(0, 1); if (use_random) {
const int64_t fg_size = static_cast<int64_t>(fg_inds.size()); const int64_t fg_size = static_cast<int64_t>(fg_inds.size());
if (fg_size > fg_rois_per_this_image) { if (fg_size > fg_rois_per_this_image) {
for (int64_t i = fg_rois_per_this_image; i < fg_size; ++i) { for (int64_t i = fg_rois_per_this_image; i < fg_size; ++i) {
int rng_ind = std::floor(uniform(engine) * i); int rng_ind = std::floor(uniform(engine) * i);
if (rng_ind < fg_rois_per_this_image) { if (rng_ind < fg_rois_per_this_image) {
std::iter_swap(fg_inds.begin() + rng_ind, fg_inds.begin() + i); std::iter_swap(fg_inds.begin() + rng_ind, fg_inds.begin() + i);
std::iter_swap(gt_inds.begin() + rng_ind, gt_inds.begin() + i); std::iter_swap(gt_inds.begin() + rng_ind, gt_inds.begin() + i);
}
} }
} }
} }
...@@ -192,12 +170,14 @@ std::vector<std::vector<int>> SampleFgBgGt( ...@@ -192,12 +170,14 @@ std::vector<std::vector<int>> SampleFgBgGt(
int bg_rois_per_image = batch_size_per_im - fg_rois_per_this_image; int bg_rois_per_image = batch_size_per_im - fg_rois_per_this_image;
int bg_rois_this_image = bg_inds.size(); int bg_rois_this_image = bg_inds.size();
int bg_rois_per_this_image = std::min(bg_rois_per_image, bg_rois_this_image); int bg_rois_per_this_image = std::min(bg_rois_per_image, bg_rois_this_image);
const int64_t bg_size = static_cast<int64_t>(bg_inds.size()); if (use_random) {
if (bg_size > bg_rois_per_this_image) { const int64_t bg_size = static_cast<int64_t>(bg_inds.size());
for (int64_t i = bg_rois_per_this_image; i < bg_size; ++i) { if (bg_size > bg_rois_per_this_image) {
int rng_ind = std::floor(uniform(engine) * i); for (int64_t i = bg_rois_per_this_image; i < bg_size; ++i) {
if (rng_ind < fg_rois_per_this_image) int rng_ind = std::floor(uniform(engine) * i);
std::iter_swap(bg_inds.begin() + rng_ind, bg_inds.begin() + i); if (rng_ind < fg_rois_per_this_image)
std::iter_swap(bg_inds.begin() + rng_ind, bg_inds.begin() + i);
}
} }
} }
std::vector<int> new_bg_inds(bg_inds.begin(), std::vector<int> new_bg_inds(bg_inds.begin(),
...@@ -248,14 +228,14 @@ void GatherBoxesLabels(const platform::CPUDeviceContext& context, ...@@ -248,14 +228,14 @@ void GatherBoxesLabels(const platform::CPUDeviceContext& context,
template <typename T> template <typename T>
std::vector<Tensor> SampleRoisForOneImage( std::vector<Tensor> SampleRoisForOneImage(
const platform::CPUDeviceContext& context, Tensor* rpn_rois, const platform::CPUDeviceContext& context, Tensor* rpn_rois,
Tensor* gt_classes, Tensor* gt_boxes, Tensor* im_scale, Tensor* gt_classes, Tensor* is_crowd, Tensor* gt_boxes, Tensor* im_info,
const int batch_size_per_im, const float fg_fraction, const float fg_thresh, const int batch_size_per_im, const float fg_fraction, const float fg_thresh,
const float bg_thresh_hi, const float bg_thresh_lo, const float bg_thresh_hi, const float bg_thresh_lo,
const std::vector<float>& bbox_reg_weights, const int class_nums, const std::vector<float>& bbox_reg_weights, const int class_nums,
std::minstd_rand engine) { std::minstd_rand engine, bool use_random) {
auto rpn_rois_et = framework::EigenTensor<T, 2>::From(*rpn_rois); auto rpn_rois_et = framework::EigenTensor<T, 2>::From(*rpn_rois);
auto im_scale_data = im_scale->data<T>()[0]; auto im_scale = im_info->data<T>()[2];
rpn_rois_et = rpn_rois_et / im_scale_data; rpn_rois_et = rpn_rois_et / im_scale;
Tensor boxes; Tensor boxes;
int proposals_num = gt_boxes->dims()[0] + rpn_rois->dims()[0]; int proposals_num = gt_boxes->dims()[0] + rpn_rois->dims()[0];
...@@ -270,8 +250,8 @@ std::vector<Tensor> SampleRoisForOneImage( ...@@ -270,8 +250,8 @@ std::vector<Tensor> SampleRoisForOneImage(
// Generate proposal index // Generate proposal index
std::vector<std::vector<int>> fg_bg_gt = SampleFgBgGt<T>( std::vector<std::vector<int>> fg_bg_gt = SampleFgBgGt<T>(
context, &proposal_to_gt_overlaps, batch_size_per_im, fg_fraction, context, &proposal_to_gt_overlaps, *is_crowd, batch_size_per_im,
fg_thresh, bg_thresh_hi, bg_thresh_lo, engine); fg_fraction, fg_thresh, bg_thresh_hi, bg_thresh_lo, engine, use_random);
std::vector<int> fg_inds = fg_bg_gt[0]; std::vector<int> fg_inds = fg_bg_gt[0];
std::vector<int> bg_inds = fg_bg_gt[1]; std::vector<int> bg_inds = fg_bg_gt[1];
std::vector<int> gt_inds = fg_bg_gt[2]; std::vector<int> gt_inds = fg_bg_gt[2];
...@@ -291,15 +271,15 @@ std::vector<Tensor> SampleRoisForOneImage( ...@@ -291,15 +271,15 @@ std::vector<Tensor> SampleRoisForOneImage(
// Compute targets // Compute targets
Tensor bbox_targets_single; Tensor bbox_targets_single;
bbox_targets_single.mutable_data<T>(bbox_dim, context.GetPlace()); bbox_targets_single.mutable_data<T>(bbox_dim, context.GetPlace());
BoxToDelta<T>(fg_num, sampled_boxes, sampled_gts, nullptr, false, BoxToDelta<T>(fg_num, sampled_boxes, sampled_gts, bbox_reg_weights.data(),
&bbox_targets_single); false, &bbox_targets_single);
// Scale rois // Scale rois
Tensor sampled_rois; Tensor sampled_rois;
sampled_rois.mutable_data<T>(sampled_boxes.dims(), context.GetPlace()); sampled_rois.mutable_data<T>(sampled_boxes.dims(), context.GetPlace());
auto sampled_rois_et = framework::EigenTensor<T, 2>::From(sampled_rois); auto sampled_rois_et = framework::EigenTensor<T, 2>::From(sampled_rois);
auto sampled_boxes_et = framework::EigenTensor<T, 2>::From(sampled_boxes); auto sampled_boxes_et = framework::EigenTensor<T, 2>::From(sampled_boxes);
sampled_rois_et = sampled_boxes_et * im_scale_data; sampled_rois_et = sampled_boxes_et * im_scale;
// Expand box targets // Expand box targets
Tensor bbox_targets, bbox_inside_weights, bbox_outside_weights; Tensor bbox_targets, bbox_inside_weights, bbox_outside_weights;
...@@ -351,8 +331,9 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> { ...@@ -351,8 +331,9 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> {
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto* rpn_rois = context.Input<LoDTensor>("RpnRois"); auto* rpn_rois = context.Input<LoDTensor>("RpnRois");
auto* gt_classes = context.Input<LoDTensor>("GtClasses"); auto* gt_classes = context.Input<LoDTensor>("GtClasses");
auto* is_crowd = context.Input<LoDTensor>("IsCrowd");
auto* gt_boxes = context.Input<LoDTensor>("GtBoxes"); auto* gt_boxes = context.Input<LoDTensor>("GtBoxes");
auto* im_scales = context.Input<LoDTensor>("ImScales"); auto* im_info = context.Input<LoDTensor>("ImInfo");
auto* rois = context.Output<LoDTensor>("Rois"); auto* rois = context.Output<LoDTensor>("Rois");
auto* labels_int32 = context.Output<LoDTensor>("LabelsInt32"); auto* labels_int32 = context.Output<LoDTensor>("LabelsInt32");
...@@ -369,18 +350,21 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> { ...@@ -369,18 +350,21 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> {
std::vector<float> bbox_reg_weights = std::vector<float> bbox_reg_weights =
context.Attr<std::vector<float>>("bbox_reg_weights"); context.Attr<std::vector<float>>("bbox_reg_weights");
int class_nums = context.Attr<int>("class_nums"); int class_nums = context.Attr<int>("class_nums");
bool use_random = context.Attr<bool>("use_random");
PADDLE_ENFORCE_EQ(rpn_rois->lod().size(), 1UL, PADDLE_ENFORCE_EQ(rpn_rois->lod().size(), 1UL,
"GenerateProposalLabelsOp rpn_rois needs 1 level of LoD"); "GenerateProposalLabelsOp rpn_rois needs 1 level of LoD");
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
gt_classes->lod().size(), 1UL, gt_classes->lod().size(), 1UL,
"GenerateProposalLabelsOp gt_classes needs 1 level of LoD"); "GenerateProposalLabelsOp gt_classes needs 1 level of LoD");
PADDLE_ENFORCE_EQ(is_crowd->lod().size(), 1UL,
"GenerateProposalLabelsOp is_crowd needs 1 level of LoD");
PADDLE_ENFORCE_EQ(gt_boxes->lod().size(), 1UL, PADDLE_ENFORCE_EQ(gt_boxes->lod().size(), 1UL,
"GenerateProposalLabelsOp gt_boxes needs 1 level of LoD"); "GenerateProposalLabelsOp gt_boxes needs 1 level of LoD");
int64_t n = static_cast<int64_t>(rpn_rois->lod().back().size() - 1); int64_t n = static_cast<int64_t>(rpn_rois->lod().back().size() - 1);
rois->mutable_data<T>({n * batch_size_per_im, kBoxDim}, context.GetPlace()); rois->mutable_data<T>({n * batch_size_per_im, kBoxDim}, context.GetPlace());
labels_int32->mutable_data<int>({n * batch_size_per_im}, labels_int32->mutable_data<int>({n * batch_size_per_im, 1},
context.GetPlace()); context.GetPlace());
bbox_targets->mutable_data<T>({n * batch_size_per_im, kBoxDim * class_nums}, bbox_targets->mutable_data<T>({n * batch_size_per_im, kBoxDim * class_nums},
context.GetPlace()); context.GetPlace());
...@@ -391,8 +375,7 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> { ...@@ -391,8 +375,7 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> {
std::random_device rnd; std::random_device rnd;
std::minstd_rand engine; std::minstd_rand engine;
int seed = int seed = rnd();
context.Attr<bool>("fix_seed") ? context.Attr<int>("seed") : rnd();
engine.seed(seed); engine.seed(seed);
framework::LoD lod; framework::LoD lod;
...@@ -403,19 +386,23 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> { ...@@ -403,19 +386,23 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> {
auto rpn_rois_lod = rpn_rois->lod().back(); auto rpn_rois_lod = rpn_rois->lod().back();
auto gt_classes_lod = gt_classes->lod().back(); auto gt_classes_lod = gt_classes->lod().back();
auto is_crowd_lod = is_crowd->lod().back();
auto gt_boxes_lod = gt_boxes->lod().back(); auto gt_boxes_lod = gt_boxes->lod().back();
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
Tensor rpn_rois_slice = Tensor rpn_rois_slice =
rpn_rois->Slice(rpn_rois_lod[i], rpn_rois_lod[i + 1]); rpn_rois->Slice(rpn_rois_lod[i], rpn_rois_lod[i + 1]);
Tensor gt_classes_slice = Tensor gt_classes_slice =
gt_classes->Slice(gt_classes_lod[i], gt_classes_lod[i + 1]); gt_classes->Slice(gt_classes_lod[i], gt_classes_lod[i + 1]);
Tensor is_crowd_slice =
is_crowd->Slice(is_crowd_lod[i], is_crowd_lod[i + 1]);
Tensor gt_boxes_slice = Tensor gt_boxes_slice =
gt_boxes->Slice(gt_boxes_lod[i], gt_boxes_lod[i + 1]); gt_boxes->Slice(gt_boxes_lod[i], gt_boxes_lod[i + 1]);
Tensor im_scales_slice = im_scales->Slice(i, i + 1); Tensor im_info_slice = im_info->Slice(i, i + 1);
std::vector<Tensor> tensor_output = SampleRoisForOneImage<T>( std::vector<Tensor> tensor_output = SampleRoisForOneImage<T>(
dev_ctx, &rpn_rois_slice, &gt_classes_slice, &gt_boxes_slice, dev_ctx, &rpn_rois_slice, &gt_classes_slice, &is_crowd_slice,
&im_scales_slice, batch_size_per_im, fg_fraction, fg_thresh, &gt_boxes_slice, &im_info_slice, batch_size_per_im, fg_fraction,
bg_thresh_hi, bg_thresh_lo, bbox_reg_weights, class_nums, engine); fg_thresh, bg_thresh_hi, bg_thresh_lo, bbox_reg_weights, class_nums,
engine, use_random);
Tensor sampled_rois = tensor_output[0]; Tensor sampled_rois = tensor_output[0];
Tensor sampled_labels_int32 = tensor_output[1]; Tensor sampled_labels_int32 = tensor_output[1];
Tensor sampled_bbox_targets = tensor_output[2]; Tensor sampled_bbox_targets = tensor_output[2];
...@@ -442,7 +429,7 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> { ...@@ -442,7 +429,7 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> {
bbox_inside_weights->set_lod(lod); bbox_inside_weights->set_lod(lod);
bbox_outside_weights->set_lod(lod); bbox_outside_weights->set_lod(lod);
rois->Resize({num_rois, kBoxDim}); rois->Resize({num_rois, kBoxDim});
labels_int32->Resize({num_rois}); labels_int32->Resize({num_rois, 1});
bbox_targets->Resize({num_rois, kBoxDim * class_nums}); bbox_targets->Resize({num_rois, kBoxDim * class_nums});
bbox_inside_weights->Resize({num_rois, kBoxDim * class_nums}); bbox_inside_weights->Resize({num_rois, kBoxDim * class_nums});
bbox_outside_weights->Resize({num_rois, kBoxDim * class_nums}); bbox_outside_weights->Resize({num_rois, kBoxDim * class_nums});
...@@ -455,8 +442,9 @@ class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -455,8 +442,9 @@ class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
// TODO(buxingyuan): Add Document // TODO(buxingyuan): Add Document
AddInput("RpnRois", "RpnRois."); AddInput("RpnRois", "RpnRois.");
AddInput("GtClasses", "GtClasses."); AddInput("GtClasses", "GtClasses.");
AddInput("IsCrowd", "IsCrowd.");
AddInput("GtBoxes", "GtBoxes."); AddInput("GtBoxes", "GtBoxes.");
AddInput("ImScales", "ImScales."); AddInput("ImInfo", "ImInfo.");
AddOutput("Rois", "Rois."); AddOutput("Rois", "Rois.");
AddOutput("LabelsInt32", "LabelsInt32."); AddOutput("LabelsInt32", "LabelsInt32.");
...@@ -471,8 +459,7 @@ class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -471,8 +459,7 @@ class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<float>("bg_thresh_lo", "bg_thresh_lo"); AddAttr<float>("bg_thresh_lo", "bg_thresh_lo");
AddAttr<std::vector<float>>("bbox_reg_weights", "bbox_reg_weights"); AddAttr<std::vector<float>>("bbox_reg_weights", "bbox_reg_weights");
AddAttr<int>("class_nums", "class_nums"); AddAttr<int>("class_nums", "class_nums");
AddAttr<bool>("fix_seed", "fix_seed").SetDefault(false); AddAttr<bool>("use_random", "use_random").SetDefault(true);
AddAttr<int>("seed", "seed").SetDefault(0);
AddComment(R"DOC( AddComment(R"DOC(
Generate Proposals Labels Operator. Generate Proposals Labels Operator.
......
...@@ -89,12 +89,11 @@ void BoxCoder(const platform::DeviceContext &ctx, Tensor *all_anchors, ...@@ -89,12 +89,11 @@ void BoxCoder(const platform::DeviceContext &ctx, Tensor *all_anchors,
} }
for (int64_t i = 0; i < row; ++i) { for (int64_t i = 0; i < row; ++i) {
T anchor_width = anchor_data[i * len + 2] - anchor_data[i * len]; T anchor_width = anchor_data[i * len + 2] - anchor_data[i * len] + 1.0;
T anchor_height = anchor_data[i * len + 3] - anchor_data[i * len + 1]; T anchor_height = anchor_data[i * len + 3] - anchor_data[i * len + 1] + 1.0;
T anchor_center_x = (anchor_data[i * len + 2] + anchor_data[i * len]) / 2; T anchor_center_x = anchor_data[i * len] + 0.5 * anchor_width;
T anchor_center_y = T anchor_center_y = anchor_data[i * len + 1] + 0.5 * anchor_height;
(anchor_data[i * len + 3] + anchor_data[i * len + 1]) / 2;
T bbox_center_x = 0, bbox_center_y = 0; T bbox_center_x = 0, bbox_center_y = 0;
T bbox_width = 0, bbox_height = 0; T bbox_width = 0, bbox_height = 0;
...@@ -106,25 +105,31 @@ void BoxCoder(const platform::DeviceContext &ctx, Tensor *all_anchors, ...@@ -106,25 +105,31 @@ void BoxCoder(const platform::DeviceContext &ctx, Tensor *all_anchors,
bbox_center_y = variances_data[i * len + 1] * bbox_center_y = variances_data[i * len + 1] *
bbox_deltas_data[i * len + 1] * anchor_height + bbox_deltas_data[i * len + 1] * anchor_height +
anchor_center_y; anchor_center_y;
bbox_width = std::exp(variances_data[i * len + 2] * bbox_width = std::exp(std::min<T>(variances_data[i * len + 2] *
bbox_deltas_data[i * len + 2]) * bbox_deltas_data[i * len + 2],
std::log(1000.0 / 16.0))) *
anchor_width; anchor_width;
bbox_height = std::exp(variances_data[i * len + 3] * bbox_height = std::exp(std::min<T>(variances_data[i * len + 3] *
bbox_deltas_data[i * len + 3]) * bbox_deltas_data[i * len + 3],
std::log(1000.0 / 16.0))) *
anchor_height; anchor_height;
} else { } else {
bbox_center_x = bbox_center_x =
bbox_deltas_data[i * len] * anchor_width + anchor_center_x; bbox_deltas_data[i * len] * anchor_width + anchor_center_x;
bbox_center_y = bbox_center_y =
bbox_deltas_data[i * len + 1] * anchor_height + anchor_center_y; bbox_deltas_data[i * len + 1] * anchor_height + anchor_center_y;
bbox_width = std::exp(bbox_deltas_data[i * len + 2]) * anchor_width; bbox_width = std::exp(std::min<T>(bbox_deltas_data[i * len + 2],
bbox_height = std::exp(bbox_deltas_data[i * len + 3]) * anchor_height; std::log(1000.0 / 16.0))) *
anchor_width;
bbox_height = std::exp(std::min<T>(bbox_deltas_data[i * len + 3],
std::log(1000.0 / 16.0))) *
anchor_height;
} }
proposals_data[i * len] = bbox_center_x - bbox_width / 2; proposals_data[i * len] = bbox_center_x - bbox_width / 2;
proposals_data[i * len + 1] = bbox_center_y - bbox_height / 2; proposals_data[i * len + 1] = bbox_center_y - bbox_height / 2;
proposals_data[i * len + 2] = bbox_center_x + bbox_width / 2; proposals_data[i * len + 2] = bbox_center_x + bbox_width / 2 - 1;
proposals_data[i * len + 3] = bbox_center_y + bbox_height / 2; proposals_data[i * len + 3] = bbox_center_y + bbox_height / 2 - 1;
} }
// return proposals; // return proposals;
} }
...@@ -156,18 +161,23 @@ void FilterBoxes(const platform::DeviceContext &ctx, Tensor *boxes, ...@@ -156,18 +161,23 @@ void FilterBoxes(const platform::DeviceContext &ctx, Tensor *boxes,
float min_size, const Tensor &im_info, Tensor *keep) { float min_size, const Tensor &im_info, Tensor *keep) {
const T *im_info_data = im_info.data<T>(); const T *im_info_data = im_info.data<T>();
T *boxes_data = boxes->mutable_data<T>(ctx.GetPlace()); T *boxes_data = boxes->mutable_data<T>(ctx.GetPlace());
min_size *= im_info_data[2]; T im_scale = im_info_data[2];
keep->Resize({boxes->dims()[0], 1}); keep->Resize({boxes->dims()[0], 1});
min_size = std::max(min_size, 1.0f);
int *keep_data = keep->mutable_data<int>(ctx.GetPlace()); int *keep_data = keep->mutable_data<int>(ctx.GetPlace());
int keep_len = 0; int keep_len = 0;
for (int i = 0; i < boxes->dims()[0]; ++i) { for (int i = 0; i < boxes->dims()[0]; ++i) {
T ws = boxes_data[4 * i + 2] - boxes_data[4 * i] + 1; T ws = boxes_data[4 * i + 2] - boxes_data[4 * i] + 1;
T hs = boxes_data[4 * i + 3] - boxes_data[4 * i + 1] + 1; T hs = boxes_data[4 * i + 3] - boxes_data[4 * i + 1] + 1;
T ws_origin_scale =
(boxes_data[4 * i + 2] - boxes_data[4 * i]) / im_scale + 1;
T hs_origin_scale =
(boxes_data[4 * i + 3] - boxes_data[4 * i + 1]) / im_scale + 1;
T x_ctr = boxes_data[4 * i] + ws / 2; T x_ctr = boxes_data[4 * i] + ws / 2;
T y_ctr = boxes_data[4 * i + 1] + hs / 2; T y_ctr = boxes_data[4 * i + 1] + hs / 2;
if (ws >= min_size && hs >= min_size && x_ctr <= im_info_data[1] && if (ws_origin_scale >= min_size && hs_origin_scale >= min_size &&
y_ctr <= im_info_data[0]) { x_ctr <= im_info_data[1] && y_ctr <= im_info_data[0]) {
keep_data[keep_len++] = i; keep_data[keep_len++] = i;
} }
} }
...@@ -218,8 +228,8 @@ T JaccardOverlap(const T *box1, const T *box2, const bool normalized) { ...@@ -218,8 +228,8 @@ T JaccardOverlap(const T *box1, const T *box2, const bool normalized) {
const T inter_ymin = std::max(box1[1], box2[1]); const T inter_ymin = std::max(box1[1], box2[1]);
const T inter_xmax = std::min(box1[2], box2[2]); const T inter_xmax = std::min(box1[2], box2[2]);
const T inter_ymax = std::min(box1[3], box2[3]); const T inter_ymax = std::min(box1[3], box2[3]);
const T inter_w = inter_xmax - inter_xmin; const T inter_w = std::max(0.0f, inter_xmax - inter_xmin + 1);
const T inter_h = inter_ymax - inter_ymin; const T inter_h = std::max(0.0f, inter_ymax - inter_ymin + 1);
const T inter_area = inter_w * inter_h; const T inter_area = inter_w * inter_h;
const T bbox1_area = BBoxArea<T>(box1, normalized); const T bbox1_area = BBoxArea<T>(box1, normalized);
const T bbox2_area = BBoxArea<T>(box2, normalized); const T bbox2_area = BBoxArea<T>(box2, normalized);
......
...@@ -31,8 +31,14 @@ class RpnTargetAssignOp : public framework::OperatorWithKernel { ...@@ -31,8 +31,14 @@ class RpnTargetAssignOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("DistMat"), PADDLE_ENFORCE(ctx->HasInput("Anchor"),
"Input(DistMat) of RpnTargetAssignOp should not be null"); "Input(Anchor) of RpnTargetAssignOp should not be null");
PADDLE_ENFORCE(ctx->HasInput("GtBoxes"),
"Input(GtBoxes) of RpnTargetAssignOp should not be null");
PADDLE_ENFORCE(ctx->HasInput("IsCrowd"),
"Input(Anchor) of RpnTargetAssignOp should not be null");
PADDLE_ENFORCE(ctx->HasInput("ImInfo"),
"Input(ImInfo) of RpnTargetAssignOp should not be null");
PADDLE_ENFORCE( PADDLE_ENFORCE(
ctx->HasOutput("LocationIndex"), ctx->HasOutput("LocationIndex"),
...@@ -43,10 +49,20 @@ class RpnTargetAssignOp : public framework::OperatorWithKernel { ...@@ -43,10 +49,20 @@ class RpnTargetAssignOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE( PADDLE_ENFORCE(
ctx->HasOutput("TargetLabel"), ctx->HasOutput("TargetLabel"),
"Output(TargetLabel) of RpnTargetAssignOp should not be null"); "Output(TargetLabel) of RpnTargetAssignOp should not be null");
PADDLE_ENFORCE(
auto in_dims = ctx->GetInputDim("DistMat"); ctx->HasOutput("TargetBBox"),
PADDLE_ENFORCE_EQ(in_dims.size(), 2, "Output(TargetBBox) of RpnTargetAssignOp should not be null");
"The rank of Input(DistMat) must be 2.");
auto anchor_dims = ctx->GetInputDim("Anchor");
auto gt_boxes_dims = ctx->GetInputDim("GtBoxes");
auto is_crowd_dims = ctx->GetInputDim("IsCrowd");
auto im_info_dims = ctx->GetInputDim("ImInfo");
PADDLE_ENFORCE_EQ(anchor_dims.size(), 2,
"The rank of Input(Anchor) must be 2.");
PADDLE_ENFORCE_EQ(gt_boxes_dims.size(), 2,
"The rank of Input(GtBoxes) must be 2.");
PADDLE_ENFORCE_EQ(im_info_dims.size(), 2,
"The rank of Input(ImInfo) must be 2.");
ctx->SetOutputDim("LocationIndex", {-1}); ctx->SetOutputDim("LocationIndex", {-1});
ctx->SetOutputDim("ScoreIndex", {-1}); ctx->SetOutputDim("ScoreIndex", {-1});
...@@ -59,198 +75,383 @@ class RpnTargetAssignOp : public framework::OperatorWithKernel { ...@@ -59,198 +75,383 @@ class RpnTargetAssignOp : public framework::OperatorWithKernel {
const framework::ExecutionContext& ctx) const override { const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType( return framework::OpKernelType(
framework::ToDataType( framework::ToDataType(
ctx.Input<framework::LoDTensor>("DistMat")->type()), ctx.Input<framework::LoDTensor>("Anchor")->type()),
platform::CPUPlace()); platform::CPUPlace());
} }
}; };
template <typename T> template <typename T>
class RpnTargetAssignKernel : public framework::OpKernel<T> { void AppendRpns(LoDTensor* out, int64_t offset, Tensor* to_add) {
public: auto* out_data = out->data<T>();
void Compute(const framework::ExecutionContext& context) const override { auto* to_add_data = to_add->data<T>();
auto* anchor_t = context.Input<Tensor>("Anchor"); // (H*W*A) * 4 memcpy(out_data + offset, to_add_data, to_add->numel() * sizeof(T));
auto* gt_bbox_t = context.Input<Tensor>("GtBox"); }
auto* dist_t = context.Input<LoDTensor>("DistMat");
template <typename T>
std::vector<Tensor> FilterStraddleAnchor(
const platform::CPUDeviceContext& context, const Tensor* anchor,
const float rpn_straddle_thresh, T im_height, T im_width) {
std::vector<int> inds_inside;
int anchor_num = anchor->dims()[0];
auto* anchor_data = anchor->data<T>();
if (rpn_straddle_thresh >= 0) {
int index;
for (int i = 0; i < anchor_num; ++i) {
index = i * 4;
if ((anchor_data[index + 0] >= -rpn_straddle_thresh) &&
(anchor_data[index + 1] >= -rpn_straddle_thresh) &&
(anchor_data[index + 2] < im_width + rpn_straddle_thresh) &&
(anchor_data[index + 3] < im_height + rpn_straddle_thresh)) {
inds_inside.emplace_back(i);
}
}
} else {
for (int i = 0; i < anchor_num; ++i) {
inds_inside.emplace_back(i);
}
}
int inside_num = inds_inside.size();
Tensor inds_inside_t;
int* inds_inside_data =
inds_inside_t.mutable_data<int>({inside_num}, context.GetPlace());
std::copy(inds_inside.begin(), inds_inside.end(), inds_inside_data);
Tensor inside_anchor_t;
T* inside_anchor_data =
inside_anchor_t.mutable_data<T>({inside_num, 4}, context.GetPlace());
Gather<T>(anchor->data<T>(), 4, inds_inside_data, inside_num,
inside_anchor_data);
std::vector<Tensor> res;
res.emplace_back(inds_inside_t);
res.emplace_back(inside_anchor_t);
return res;
}
template <typename T>
Tensor FilterCrowdGt(const platform::CPUDeviceContext& context,
Tensor* gt_boxes, Tensor* is_crowd) {
int gt_num = gt_boxes->dims()[0];
std::vector<int> not_crowd_inds;
auto* is_crowd_data = is_crowd->data<int>();
for (int i = 0; i < gt_num; ++i) {
if (is_crowd_data[i] == 0) {
not_crowd_inds.emplace_back(i);
}
}
int ncrowd_num = not_crowd_inds.size();
Tensor ncrowd_gt_boxes;
T* ncrowd_gt_boxes_data =
ncrowd_gt_boxes.mutable_data<T>({ncrowd_num, 4}, context.GetPlace());
Gather<T>(gt_boxes->data<T>(), 4, not_crowd_inds.data(), ncrowd_num,
ncrowd_gt_boxes_data);
return ncrowd_gt_boxes;
}
void ReservoirSampling(const int num, std::vector<int>* inds,
std::minstd_rand engine, bool use_random) {
std::uniform_real_distribution<float> uniform(0, 1);
size_t len = inds->size();
if (len > static_cast<size_t>(num)) {
if (use_random) {
for (size_t i = num; i < len; ++i) {
int rng_ind = std::floor(uniform(engine) * i);
if (rng_ind < num)
std::iter_swap(inds->begin() + rng_ind, inds->begin() + i);
}
}
inds->resize(num);
}
}
template <typename T>
void ScoreAssign(const T* anchor_by_gt_overlap_data,
const Tensor& anchor_to_gt_max, const Tensor& gt_to_anchor_max,
const int rpn_batch_size_per_im, const float rpn_fg_fraction,
const float rpn_positive_overlap,
const float rpn_negative_overlap, std::vector<int>* fg_inds,
std::vector<int>* bg_inds, std::vector<int>* tgt_lbl,
std::minstd_rand engine, bool use_random) {
float epsilon = 0.00001;
int anchor_num = anchor_to_gt_max.dims()[0];
int gt_num = gt_to_anchor_max.dims()[0];
std::vector<int> target_label(anchor_num, -1);
std::vector<int> fg_inds_fake;
std::vector<int> bg_inds_fake;
const T* anchor_to_gt_max_data = anchor_to_gt_max.data<T>();
const T* gt_to_anchor_max_data = gt_to_anchor_max.data<T>();
// TODO(buxingyuan): Match with Detectron now
// but it seems here is a bug in two directions assignment
// in which the later one may overwrites the former one.
for (int64_t i = 0; i < anchor_num; ++i) {
bool is_anchors_with_max_overlap = false;
for (int64_t j = 0; j < gt_num; ++j) {
T value = anchor_by_gt_overlap_data[i * gt_num + j];
T diff = std::abs(value - gt_to_anchor_max_data[j]);
if (diff < epsilon) {
is_anchors_with_max_overlap = true;
break;
}
}
bool is_anchor_great_than_thresh =
(anchor_to_gt_max_data[i] >= rpn_positive_overlap);
if (is_anchors_with_max_overlap || is_anchor_great_than_thresh) {
fg_inds_fake.push_back(i);
}
}
auto* loc_index_t = context.Output<Tensor>("LocationIndex"); // Reservoir Sampling
auto* score_index_t = context.Output<Tensor>("ScoreIndex"); int fg_num = static_cast<int>(rpn_fg_fraction * rpn_batch_size_per_im);
auto* tgt_bbox_t = context.Output<Tensor>("TargetBBox"); ReservoirSampling(fg_num, &fg_inds_fake, engine, use_random);
auto* tgt_lbl_t = context.Output<Tensor>("TargetLabel"); fg_num = static_cast<int>(fg_inds_fake.size());
for (int64_t i = 0; i < fg_num; ++i) {
target_label[fg_inds_fake[i]] = 1;
}
auto lod = dist_t->lod().back(); int bg_num = rpn_batch_size_per_im - fg_num;
int64_t batch_num = static_cast<int64_t>(lod.size() - 1); for (int64_t i = 0; i < anchor_num; ++i) {
int64_t anchor_num = dist_t->dims()[1]; if (anchor_to_gt_max_data[i] < rpn_negative_overlap) {
PADDLE_ENFORCE_EQ(anchor_num, anchor_t->dims()[0]); bg_inds_fake.push_back(i);
}
}
ReservoirSampling(bg_num, &bg_inds_fake, engine, use_random);
bg_num = static_cast<int>(bg_inds_fake.size());
for (int64_t i = 0; i < bg_num; ++i) {
target_label[bg_inds_fake[i]] = 0;
}
int rpn_batch_size = context.Attr<int>("rpn_batch_size_per_im"); for (int64_t i = 0; i < anchor_num; ++i) {
float pos_threshold = context.Attr<float>("rpn_positive_overlap"); if (target_label[i] == 1) fg_inds->emplace_back(i);
float neg_threshold = context.Attr<float>("rpn_negative_overlap"); if (target_label[i] == 0) bg_inds->emplace_back(i);
float fg_fraction = context.Attr<float>("fg_fraction"); }
fg_num = fg_inds->size();
bg_num = bg_inds->size();
tgt_lbl->resize(fg_num + bg_num, 0);
std::vector<int> fg_lbl(fg_num, 1);
std::vector<int> bg_lbl(bg_num, 0);
std::copy(fg_lbl.begin(), fg_lbl.end(), tgt_lbl->data());
std::copy(bg_lbl.begin(), bg_lbl.end(), tgt_lbl->data() + fg_num);
}
template <typename T>
std::vector<Tensor> SampleRpnFgBgGt(const platform::CPUDeviceContext& ctx,
const Tensor& anchor_by_gt_overlap,
const int rpn_batch_size_per_im,
const float rpn_positive_overlap,
const float rpn_negative_overlap,
const float rpn_fg_fraction,
std::minstd_rand engine, bool use_random) {
auto* anchor_by_gt_overlap_data = anchor_by_gt_overlap.data<T>();
int anchor_num = anchor_by_gt_overlap.dims()[0];
int gt_num = anchor_by_gt_overlap.dims()[1];
std::vector<int> fg_inds;
std::vector<int> bg_inds;
std::vector<int> gt_inds;
std::vector<int> tgt_lbl;
// Calculate the max IoU between anchors and gt boxes
// Map from anchor to gt box that has highest overlap
auto place = ctx.GetPlace();
Tensor anchor_to_gt_max, anchor_to_gt_argmax, gt_to_anchor_max;
anchor_to_gt_max.mutable_data<T>({anchor_num}, place);
int* argmax = anchor_to_gt_argmax.mutable_data<int>({anchor_num}, place);
gt_to_anchor_max.mutable_data<T>({gt_num}, place);
auto anchor_by_gt_overlap_et =
framework::EigenMatrix<T>::From(anchor_by_gt_overlap);
auto anchor_to_gt_max_et =
framework::EigenVector<T>::Flatten(anchor_to_gt_max);
auto gt_to_anchor_max_et =
framework::EigenVector<T>::Flatten(gt_to_anchor_max);
auto anchor_to_gt_argmax_et =
framework::EigenVector<int>::Flatten(anchor_to_gt_argmax);
anchor_to_gt_max_et =
anchor_by_gt_overlap_et.maximum(Eigen::DSizes<int, 1>(1));
anchor_to_gt_argmax_et =
anchor_by_gt_overlap_et.argmax(1).template cast<int>();
gt_to_anchor_max_et =
anchor_by_gt_overlap_et.maximum(Eigen::DSizes<int, 1>(0));
// Follow the Faster RCNN's implementation
ScoreAssign(anchor_by_gt_overlap_data, anchor_to_gt_max, gt_to_anchor_max,
rpn_batch_size_per_im, rpn_fg_fraction, rpn_positive_overlap,
rpn_negative_overlap, &fg_inds, &bg_inds, &tgt_lbl, engine,
use_random);
int fg_num = fg_inds.size();
int bg_num = bg_inds.size();
gt_inds.reserve(fg_num);
for (int i = 0; i < fg_num; ++i) {
gt_inds.emplace_back(argmax[fg_inds[i]]);
}
int fg_num_per_batch = static_cast<int>(rpn_batch_size * fg_fraction); Tensor loc_index_t, score_index_t, tgt_lbl_t, gt_inds_t;
int* loc_index_data = loc_index_t.mutable_data<int>({fg_num}, place);
int* score_index_data =
score_index_t.mutable_data<int>({fg_num + bg_num}, place);
int* tgt_lbl_data = tgt_lbl_t.mutable_data<int>({fg_num + bg_num}, place);
int* gt_inds_data = gt_inds_t.mutable_data<int>({fg_num}, place);
std::copy(fg_inds.begin(), fg_inds.end(), loc_index_data);
std::copy(fg_inds.begin(), fg_inds.end(), score_index_data);
std::copy(bg_inds.begin(), bg_inds.end(), score_index_data + fg_num);
std::copy(tgt_lbl.begin(), tgt_lbl.end(), tgt_lbl_data);
std::copy(gt_inds.begin(), gt_inds.end(), gt_inds_data);
std::vector<Tensor> loc_score_tgtlbl_gt;
loc_score_tgtlbl_gt.emplace_back(loc_index_t);
loc_score_tgtlbl_gt.emplace_back(score_index_t);
loc_score_tgtlbl_gt.emplace_back(tgt_lbl_t);
loc_score_tgtlbl_gt.emplace_back(gt_inds_t);
return loc_score_tgtlbl_gt;
}
int64_t max_num = batch_num * anchor_num; template <typename T>
class RpnTargetAssignKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* anchor = context.Input<Tensor>("Anchor"); // (H*W*A) * 4
auto* gt_boxes = context.Input<LoDTensor>("GtBoxes");
auto* is_crowd = context.Input<LoDTensor>("IsCrowd");
auto* im_info = context.Input<LoDTensor>("ImInfo");
auto* loc_index = context.Output<LoDTensor>("LocationIndex");
auto* score_index = context.Output<LoDTensor>("ScoreIndex");
auto* tgt_bbox = context.Output<LoDTensor>("TargetBBox");
auto* tgt_lbl = context.Output<LoDTensor>("TargetLabel");
PADDLE_ENFORCE_EQ(gt_boxes->lod().size(), 1UL,
"RpnTargetAssignOp gt_boxes needs 1 level of LoD");
PADDLE_ENFORCE_EQ(is_crowd->lod().size(), 1UL,
"RpnTargetAssignOp is_crowd needs 1 level of LoD");
int64_t anchor_num = static_cast<int64_t>(anchor->dims()[0]);
int64_t batch_num = static_cast<int64_t>(gt_boxes->lod().back().size() - 1);
int rpn_batch_size_per_im = context.Attr<int>("rpn_batch_size_per_im");
float rpn_straddle_thresh = context.Attr<float>("rpn_straddle_thresh");
float rpn_positive_overlap = context.Attr<float>("rpn_positive_overlap");
float rpn_negative_overlap = context.Attr<float>("rpn_negative_overlap");
float rpn_fg_fraction = context.Attr<float>("rpn_fg_fraction");
bool use_random = context.Attr<bool>("use_random");
int64_t max_num = batch_num * rpn_batch_size_per_im;
auto place = context.GetPlace(); auto place = context.GetPlace();
tgt_bbox_t->mutable_data<T>({max_num, 4}, place); loc_index->mutable_data<int>({max_num}, place);
auto* loc_index = loc_index_t->mutable_data<int>({max_num}, place); score_index->mutable_data<int>({max_num}, place);
auto* score_index = score_index_t->mutable_data<int>({max_num}, place); tgt_bbox->mutable_data<T>({max_num, 4}, place);
tgt_lbl->mutable_data<int>({max_num, 1}, place);
Tensor tmp_tgt_lbl;
auto* tmp_lbl_data = tmp_tgt_lbl.mutable_data<int64_t>({max_num}, place);
auto& dev_ctx = context.device_context<platform::CPUDeviceContext>(); auto& dev_ctx = context.device_context<platform::CPUDeviceContext>();
math::SetConstant<platform::CPUDeviceContext, int64_t> iset;
iset(dev_ctx, &tmp_tgt_lbl, static_cast<int64_t>(-1));
std::random_device rnd; std::random_device rnd;
std::minstd_rand engine; std::minstd_rand engine;
int seed = int seed = rnd();
context.Attr<bool>("fix_seed") ? context.Attr<int>("seed") : rnd();
engine.seed(seed); engine.seed(seed);
int fg_num = 0; framework::LoD lod_loc, loc_score;
int bg_num = 0; std::vector<size_t> lod0_loc(1, 0);
std::vector<size_t> lod0_score(1, 0);
int total_loc_num = 0;
int total_score_num = 0;
auto gt_boxes_lod = gt_boxes->lod().back();
auto is_crowd_lod = is_crowd->lod().back();
for (int i = 0; i < batch_num; ++i) { for (int i = 0; i < batch_num; ++i) {
Tensor dist = dist_t->Slice(lod[i], lod[i + 1]); Tensor gt_boxes_slice =
Tensor gt_bbox = gt_bbox_t->Slice(lod[i], lod[i + 1]); gt_boxes->Slice(gt_boxes_lod[i], gt_boxes_lod[i + 1]);
auto fg_bg_gt = SampleFgBgGt(dev_ctx, dist, pos_threshold, neg_threshold, Tensor is_crowd_slice =
rpn_batch_size, fg_num_per_batch, engine, is_crowd->Slice(is_crowd_lod[i], is_crowd_lod[i + 1]);
tmp_lbl_data + i * anchor_num); Tensor im_info_slice = im_info->Slice(i, i + 1);
auto* im_info_data = im_info_slice.data<T>();
int cur_fg_num = fg_bg_gt[0].size(); auto im_height = im_info_data[0];
int cur_bg_num = fg_bg_gt[1].size(); auto im_width = im_info_data[1];
std::transform(fg_bg_gt[0].begin(), fg_bg_gt[0].end(), loc_index, auto im_scale = im_info_data[2];
[i, anchor_num](int d) { return d + i * anchor_num; });
memcpy(score_index, loc_index, cur_fg_num * sizeof(int)); // Filter straddle anchor
std::transform(fg_bg_gt[1].begin(), fg_bg_gt[1].end(), std::vector<Tensor> filter_output = FilterStraddleAnchor<T>(
score_index + cur_fg_num, dev_ctx, anchor, rpn_straddle_thresh, im_height, im_width);
[i, anchor_num](int d) { return d + i * anchor_num; }); Tensor inds_inside = filter_output[0];
Tensor inside_anchor = filter_output[1];
// Filter crowd gt
Tensor ncrowd_gt_boxes =
FilterCrowdGt<T>(dev_ctx, &gt_boxes_slice, &is_crowd_slice);
auto ncrowd_gt_boxes_et =
framework::EigenTensor<T, 2>::From(ncrowd_gt_boxes);
ncrowd_gt_boxes_et = ncrowd_gt_boxes_et * im_scale;
Tensor anchor_by_gt_overlap;
anchor_by_gt_overlap.mutable_data<T>(
{inside_anchor.dims()[0], ncrowd_gt_boxes.dims()[0]}, place);
BboxOverlaps<T>(inside_anchor, ncrowd_gt_boxes, &anchor_by_gt_overlap);
auto loc_score_tgtlbl_gt = SampleRpnFgBgGt<T>(
dev_ctx, anchor_by_gt_overlap, rpn_batch_size_per_im,
rpn_positive_overlap, rpn_negative_overlap, rpn_fg_fraction, engine,
use_random);
Tensor sampled_loc_index = loc_score_tgtlbl_gt[0];
Tensor sampled_score_index = loc_score_tgtlbl_gt[1];
Tensor sampled_tgtlbl = loc_score_tgtlbl_gt[2];
Tensor sampled_gt_index = loc_score_tgtlbl_gt[3];
int loc_num = sampled_loc_index.dims()[0];
int score_num = sampled_score_index.dims()[0];
// unmap to all anchor
Tensor sampled_loc_index_unmap, sampled_score_index_unmap;
sampled_loc_index_unmap.mutable_data<int>({loc_num}, place);
sampled_score_index_unmap.mutable_data<int>({score_num}, place);
Gather<int>(inds_inside.data<int>(), 1, sampled_loc_index.data<int>(),
loc_num, sampled_loc_index_unmap.data<int>());
Gather<int>(inds_inside.data<int>(), 1, sampled_score_index.data<int>(),
score_num, sampled_score_index_unmap.data<int>());
// get target bbox deltas // get target bbox deltas
if (cur_fg_num) { Tensor sampled_anchor, sampled_gt, sampled_tgt_bbox;
Tensor fg_gt; auto* sampled_anchor_data =
T* gt_data = fg_gt.mutable_data<T>({cur_fg_num, 4}, place); sampled_anchor.mutable_data<T>({loc_num, 4}, place);
Tensor tgt_bbox = tgt_bbox_t->Slice(fg_num, fg_num + cur_fg_num); auto* sampled_gt_data = sampled_gt.mutable_data<T>({loc_num, 4}, place);
T* tgt_data = tgt_bbox.data<T>(); Gather<T>(anchor->data<T>(), 4, sampled_loc_index_unmap.data<int>(),
Gather<T>(anchor_t->data<T>(), 4, loc_num, sampled_anchor_data);
reinterpret_cast<int*>(&fg_bg_gt[0][0]), cur_fg_num, Gather<T>(ncrowd_gt_boxes.data<T>(), 4, sampled_gt_index.data<int>(),
tgt_data); loc_num, sampled_gt_data);
Gather<T>(gt_bbox.data<T>(), 4, reinterpret_cast<int*>(&fg_bg_gt[2][0]), sampled_tgt_bbox.mutable_data<T>({loc_num, 4}, place);
cur_fg_num, gt_data); BoxToDelta<T>(loc_num, sampled_anchor, sampled_gt, nullptr, false,
BoxToDelta<T>(cur_fg_num, tgt_bbox, fg_gt, nullptr, false, &tgt_bbox); &sampled_tgt_bbox);
}
// Add anchor offset
loc_index += cur_fg_num; int anchor_offset = i * anchor_num;
score_index += cur_fg_num + cur_bg_num; auto sampled_loc_index_unmap_et =
fg_num += cur_fg_num; framework::EigenTensor<int, 1>::From(sampled_loc_index_unmap);
bg_num += cur_bg_num; sampled_loc_index_unmap_et = sampled_loc_index_unmap_et + anchor_offset;
} auto sampled_score_index_unmap_et =
framework::EigenTensor<int, 1>::From(sampled_score_index_unmap);
int lbl_num = fg_num + bg_num; sampled_score_index_unmap_et =
PADDLE_ENFORCE_LE(fg_num, max_num); sampled_score_index_unmap_et + anchor_offset;
PADDLE_ENFORCE_LE(lbl_num, max_num); AppendRpns<int>(loc_index, total_loc_num, &sampled_loc_index_unmap);
AppendRpns<int>(score_index, total_score_num, &sampled_score_index_unmap);
tgt_bbox_t->Resize({fg_num, 4}); AppendRpns<T>(tgt_bbox, total_loc_num * 4, &sampled_tgt_bbox);
loc_index_t->Resize({fg_num}); AppendRpns<int>(tgt_lbl, total_score_num, &sampled_tgtlbl);
score_index_t->Resize({lbl_num}); total_loc_num += loc_num;
auto* lbl_data = tgt_lbl_t->mutable_data<int64_t>({lbl_num, 1}, place);
Gather<int64_t>(tmp_lbl_data, 1, score_index_t->data<int>(), lbl_num, total_score_num += score_num;
lbl_data); lod0_loc.emplace_back(total_loc_num);
} lod0_score.emplace_back(total_score_num);
private:
void ScoreAssign(const T* dist_data, const Tensor& anchor_to_gt_max,
const int row, const int col, const float pos_threshold,
const float neg_threshold, int64_t* target_label,
std::vector<int>* fg_inds, std::vector<int>* bg_inds) const {
float epsilon = 0.0001;
for (int64_t i = 0; i < row; ++i) {
const T* v = dist_data + i * col;
T max = *std::max_element(v, v + col);
for (int64_t j = 0; j < col; ++j) {
if (std::abs(max - v[j]) < epsilon) {
target_label[j] = 1;
}
}
}
// Pick the fg/bg
const T* anchor_to_gt_max_data = anchor_to_gt_max.data<T>();
for (int64_t j = 0; j < col; ++j) {
if (anchor_to_gt_max_data[j] >= pos_threshold) {
target_label[j] = 1;
} else if (anchor_to_gt_max_data[j] < neg_threshold) {
target_label[j] = 0;
}
if (target_label[j] == 1) {
fg_inds->push_back(j);
} else if (target_label[j] == 0) {
bg_inds->push_back(j);
}
} }
}
void ReservoirSampling(const int num, std::minstd_rand engine,
std::vector<int>* inds) const {
std::uniform_real_distribution<float> uniform(0, 1);
size_t len = inds->size();
if (len > static_cast<size_t>(num)) {
for (size_t i = num; i < len; ++i) {
int rng_ind = std::floor(uniform(engine) * i);
if (rng_ind < num)
std::iter_swap(inds->begin() + rng_ind, inds->begin() + i);
}
inds->resize(num);
}
}
// std::vector<std::vector<int>> RpnTargetAssign( PADDLE_ENFORCE_LE(total_loc_num, max_num);
std::vector<std::vector<int>> SampleFgBgGt( PADDLE_ENFORCE_LE(total_score_num, max_num);
const platform::CPUDeviceContext& ctx, const Tensor& dist,
const float pos_threshold, const float neg_threshold, lod_loc.emplace_back(lod0_loc);
const int rpn_batch_size, const int fg_num, std::minstd_rand engine, loc_score.emplace_back(lod0_score);
int64_t* target_label) const { loc_index->set_lod(lod_loc);
auto* dist_data = dist.data<T>(); score_index->set_lod(loc_score);
int row = dist.dims()[0]; tgt_bbox->set_lod(lod_loc);
int col = dist.dims()[1]; tgt_lbl->set_lod(loc_score);
loc_index->Resize({total_loc_num});
std::vector<int> fg_inds; score_index->Resize({total_score_num});
std::vector<int> bg_inds; tgt_bbox->Resize({total_loc_num, 4});
std::vector<int> gt_inds; tgt_lbl->Resize({total_score_num, 1});
// Calculate the max IoU between anchors and gt boxes
// Map from anchor to gt box that has highest overlap
auto place = ctx.GetPlace();
Tensor anchor_to_gt_max, anchor_to_gt_argmax;
anchor_to_gt_max.mutable_data<T>({col}, place);
int* argmax = anchor_to_gt_argmax.mutable_data<int>({col}, place);
auto x = framework::EigenMatrix<T>::From(dist);
auto x_col_max = framework::EigenVector<T>::Flatten(anchor_to_gt_max);
auto x_col_argmax =
framework::EigenVector<int>::Flatten(anchor_to_gt_argmax);
x_col_max = x.maximum(Eigen::DSizes<int, 1>(0));
x_col_argmax = x.argmax(0).template cast<int>();
// Follow the Faster RCNN's implementation
ScoreAssign(dist_data, anchor_to_gt_max, row, col, pos_threshold,
neg_threshold, target_label, &fg_inds, &bg_inds);
// Reservoir Sampling
ReservoirSampling(fg_num, engine, &fg_inds);
int fg_num2 = static_cast<int>(fg_inds.size());
int bg_num = rpn_batch_size - fg_num2;
ReservoirSampling(bg_num, engine, &bg_inds);
gt_inds.reserve(fg_num2);
for (int i = 0; i < fg_num2; ++i) {
gt_inds.emplace_back(argmax[fg_inds[i]]);
}
std::vector<std::vector<int>> fg_bg_gt;
fg_bg_gt.emplace_back(fg_inds);
fg_bg_gt.emplace_back(bg_inds);
fg_bg_gt.emplace_back(gt_inds);
return fg_bg_gt;
} }
}; };
...@@ -259,18 +460,22 @@ class RpnTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -259,18 +460,22 @@ class RpnTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
void Make() override { void Make() override {
AddInput("Anchor", AddInput("Anchor",
"(Tensor) input anchor is a 2-D Tensor with shape [H*W*A, 4]."); "(Tensor) input anchor is a 2-D Tensor with shape [H*W*A, 4].");
AddInput("GtBox", "(LoDTensor) input groud-truth bbox with shape [K, 4]."); AddInput("GtBoxes",
AddInput( "(LoDTensor) input groud-truth bbox with shape [K, 4].");
"DistMat", AddInput("IsCrowd",
"(LoDTensor or Tensor) this input is a 2-D LoDTensor with shape " "(LoDTensor) input which indicates groud-truth is crowd.");
"[K, M]. It is pair-wise distance matrix between the entities " AddInput("ImInfo",
"represented by each row and each column. For example, assumed one " "(LoDTensor) input image information with shape [N, 3]. "
"entity is A with shape [K], another entity is B with shape [M]. The " "N is the batch size, each image information includes height, "
"DistMat[i][j] is the distance between A[i] and B[j]. The bigger " "width and scale.");
"the distance is, the better macthing the pairs are. Please note, " AddAttr<int>("rpn_batch_size_per_im",
"This tensor can contain LoD information to represent a batch of " "Total number of RPN examples per image.")
"inputs. One instance of this batch can contain different numbers of " .SetDefault(256);
"entities."); AddAttr<float>(
"rpn_straddle_thresh",
"Remove RPN anchors that go outside the image by straddle_thresh "
"pixels, "
"Set to -1 or a large value, e.g. 100000, to disable pruning anchors.");
AddAttr<float>( AddAttr<float>(
"rpn_positive_overlap", "rpn_positive_overlap",
"Minimum overlap required between an anchor and ground-truth " "Minimum overlap required between an anchor and ground-truth "
...@@ -282,20 +487,15 @@ class RpnTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -282,20 +487,15 @@ class RpnTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
"box for the (anchor, gt box) pair to be a negative examples.") "box for the (anchor, gt box) pair to be a negative examples.")
.SetDefault(0.3); .SetDefault(0.3);
AddAttr<float>( AddAttr<float>(
"fg_fraction", "rpn_fg_fraction",
"Target fraction of RoI minibatch that " "Target fraction of RoI minibatch that "
"is labeled foreground (i.e. class > 0), 0-th class is background.") "is labeled foreground (i.e. class > 0), 0-th class is background.")
.SetDefault(0.25); .SetDefault(0.25);
AddAttr<int>("rpn_batch_size_per_im", AddAttr<bool>("use_random",
"Total number of RPN examples per image.") "A flag indicating whether to use a ReservoirSampling. "
.SetDefault(256); "NOTE: DO NOT set this flag to false in training. "
AddAttr<bool>("fix_seed", "Setting this flag to false is only useful in unittest.")
"A flag indicating whether to use a fixed seed to generate " .SetDefault(true);
"random mask. NOTE: DO NOT set this flag to true in "
"training. Setting this flag to true is only useful in "
"unittest.")
.SetDefault(false);
AddAttr<int>("seed", "RpnTargetAssign random seed.").SetDefault(0);
AddOutput( AddOutput(
"LocationIndex", "LocationIndex",
"(Tensor), The indexes of foreground anchors in all RPN anchors, the " "(Tensor), The indexes of foreground anchors in all RPN anchors, the "
...@@ -308,16 +508,16 @@ class RpnTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -308,16 +508,16 @@ class RpnTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
"ScoreIndex is [F + B], F and B are sampled foreground and backgroud " "ScoreIndex is [F + B], F and B are sampled foreground and backgroud "
" number."); " number.");
AddOutput("TargetBBox", AddOutput("TargetBBox",
"(Tensor<int64_t>), The target bbox deltas with shape " "(Tensor), The target bbox deltas with shape "
"[F, 4], F is the sampled foreground number."); "[F, 4], F is the sampled foreground number.");
AddOutput( AddOutput(
"TargetLabel", "TargetLabel",
"(Tensor<int64_t>), The target labels of each anchor with shape " "(Tensor<int>), The target labels of each anchor with shape "
"[F + B, 1], F and B are sampled foreground and backgroud number."); "[F + B, 1], F and B are sampled foreground and backgroud number.");
AddComment(R"DOC( AddComment(R"DOC(
This operator can be, for given the IoU between the ground truth bboxes and the This operator can be, for a given set of ground truth bboxes and the
anchors, to assign classification and regression targets to each prediction. anchors, to assign classification and regression targets to each prediction.
The Score index and LocationIndex will be generated according to the DistMat. The ScoreIndex and LocationIndex will be generated according to the anchor-groundtruth IOU.
The rest anchors would not contibute to the RPN training loss The rest anchors would not contibute to the RPN training loss
ScoreIndex is composed of foreground anchor indexes(positive labels) and ScoreIndex is composed of foreground anchor indexes(positive labels) and
......
...@@ -55,15 +55,19 @@ for _OP in set(__auto__): ...@@ -55,15 +55,19 @@ for _OP in set(__auto__):
globals()[_OP] = generate_layer_fn(_OP) globals()[_OP] = generate_layer_fn(_OP)
def rpn_target_assign(loc, def rpn_target_assign(bbox_pred,
scores, cls_logits,
anchor_box, anchor_box,
anchor_var, anchor_var,
gt_box, gt_boxes,
is_crowd,
im_info,
rpn_batch_size_per_im=256, rpn_batch_size_per_im=256,
fg_fraction=0.25, rpn_straddle_thresh=0.0,
rpn_fg_fraction=0.5,
rpn_positive_overlap=0.7, rpn_positive_overlap=0.7,
rpn_negative_overlap=0.3): rpn_negative_overlap=0.3,
use_random=True):
""" """
** Target Assign Layer for region proposal network (RPN) in Faster-RCNN detection. ** ** Target Assign Layer for region proposal network (RPN) in Faster-RCNN detection. **
...@@ -83,14 +87,13 @@ def rpn_target_assign(loc, ...@@ -83,14 +87,13 @@ def rpn_target_assign(loc,
the positive anchors. the positive anchors.
Args: Args:
loc(Variable): A 3-D Tensor with shape [N, M, 4] represents the bbox_pred(Variable): A 3-D Tensor with shape [N, M, 4] represents the
predicted locations of M bounding bboxes. N is the batch size, predicted locations of M bounding bboxes. N is the batch size,
and each bounding box has four coordinate values and the layout and each bounding box has four coordinate values and the layout
is [xmin, ymin, xmax, ymax]. is [xmin, ymin, xmax, ymax].
scores(Variable): A 3-D Tensor with shape [N, M, C] represents the cls_logits(Variable): A 3-D Tensor with shape [N, M, 1] represents the
predicted confidence predictions. N is the batch size, C is the predicted confidence predictions. N is the batch size, 1 is the
class number, M is number of bounding boxes. For each category frontground and background sigmoid, M is number of bounding boxes.
there are total M scores which corresponding M bounding boxes.
anchor_box(Variable): A 2-D Tensor with shape [M, 4] holds M boxes, anchor_box(Variable): A 2-D Tensor with shape [M, 4] holds M boxes,
each box is represented as [xmin, ymin, xmax, ymax], each box is represented as [xmin, ymin, xmax, ymax],
[xmin, ymin] is the left top coordinate of the anchor box, [xmin, ymin] is the left top coordinate of the anchor box,
...@@ -99,11 +102,16 @@ def rpn_target_assign(loc, ...@@ -99,11 +102,16 @@ def rpn_target_assign(loc,
coordinate of the anchor box. coordinate of the anchor box.
anchor_var(Variable): A 2-D Tensor with shape [M,4] holds expanded anchor_var(Variable): A 2-D Tensor with shape [M,4] holds expanded
variances of anchors. variances of anchors.
gt_box (Variable): The ground-truth boudding boxes (bboxes) are a 2D gt_boxes (Variable): The ground-truth boudding boxes (bboxes) are a 2D
LoDTensor with shape [Ng, 4], Ng is the total number of ground-truth LoDTensor with shape [Ng, 4], Ng is the total number of ground-truth
bboxes of mini-batch input. bboxes of mini-batch input.
is_crowd (Variable): A 1-D LoDTensor which indicates groud-truth is crowd.
im_info (Variable): A 2-D LoDTensor with shape [N, 3]. N is the batch size,
3 is the height, width and scale.
rpn_batch_size_per_im(int): Total number of RPN examples per image. rpn_batch_size_per_im(int): Total number of RPN examples per image.
fg_fraction(float): Target fraction of RoI minibatch that is labeled rpn_straddle_thresh(float): Remove RPN anchors that go outside the image
by straddle_thresh pixels.
rpn_fg_fraction(float): Target fraction of RoI minibatch that is labeled
foreground (i.e. class > 0), 0-th class is background. foreground (i.e. class > 0), 0-th class is background.
rpn_positive_overlap(float): Minimum overlap required between an anchor rpn_positive_overlap(float): Minimum overlap required between an anchor
and ground-truth box for the (anchor, gt box) pair to be a positive and ground-truth box for the (anchor, gt box) pair to be a positive
...@@ -129,45 +137,48 @@ def rpn_target_assign(loc, ...@@ -129,45 +137,48 @@ def rpn_target_assign(loc,
Examples: Examples:
.. code-block:: python .. code-block:: python
loc = layers.data(name='location', shape=[2, 80], bbox_pred = layers.data(name='bbox_pred', shape=[100, 4],
append_batch_size=False, dtype='float32') append_batch_size=False, dtype='float32')
scores = layers.data(name='scores', shape=[2, 40], cls_logits = layers.data(name='cls_logits', shape=[100, 1],
append_batch_size=False, dtype='float32') append_batch_size=False, dtype='float32')
anchor_box = layers.data(name='anchor_box', shape=[20, 4], anchor_box = layers.data(name='anchor_box', shape=[20, 4],
append_batch_size=False, dtype='float32') append_batch_size=False, dtype='float32')
gt_box = layers.data(name='gt_box', shape=[10, 4], gt_boxes = layers.data(name='gt_boxes', shape=[10, 4],
append_batch_size=False, dtype='float32') append_batch_size=False, dtype='float32')
loc_pred, score_pred, loc_target, score_target = loc_pred, score_pred, loc_target, score_target =
fluid.layers.detection_output(loc=location, fluid.layers.rpn_target_assign(bbox_pred=bbox_pred,
scores=scores, cls_logits=cls_logits,
anchor_box=anchor_box, anchor_box=anchor_box,
gt_box=gt_box) gt_boxes=gt_boxes)
""" """
helper = LayerHelper('rpn_target_assign', **locals()) helper = LayerHelper('rpn_target_assign', **locals())
# Compute overlaps between the prior boxes and the gt boxes overlaps
iou = iou_similarity(x=gt_box, y=anchor_box)
# Assign target label to anchors # Assign target label to anchors
loc_index = helper.create_tmp_variable(dtype='int32') loc_index = helper.create_tmp_variable(dtype='int32')
score_index = helper.create_tmp_variable(dtype='int32') score_index = helper.create_tmp_variable(dtype='int32')
target_label = helper.create_tmp_variable(dtype='int64') target_label = helper.create_tmp_variable(dtype='int32')
target_bbox = helper.create_tmp_variable(dtype=anchor_box.dtype) target_bbox = helper.create_tmp_variable(dtype=anchor_box.dtype)
helper.append_op( helper.append_op(
type="rpn_target_assign", type="rpn_target_assign",
inputs={'Anchor': anchor_box, inputs={
'GtBox': gt_box, 'Anchor': anchor_box,
'DistMat': iou}, 'GtBoxes': gt_boxes,
'IsCrowd': is_crowd,
'ImInfo': im_info
},
outputs={ outputs={
'LocationIndex': loc_index, 'LocationIndex': loc_index,
'ScoreIndex': score_index, 'ScoreIndex': score_index,
'TargetLabel': target_label, 'TargetLabel': target_label,
'TargetBBox': target_bbox, 'TargetBBox': target_bbox
}, },
attrs={ attrs={
'rpn_batch_size_per_im': rpn_batch_size_per_im, 'rpn_batch_size_per_im': rpn_batch_size_per_im,
'rpn_straddle_thresh': rpn_straddle_thresh,
'rpn_positive_overlap': rpn_positive_overlap, 'rpn_positive_overlap': rpn_positive_overlap,
'rpn_negative_overlap': rpn_negative_overlap, 'rpn_negative_overlap': rpn_negative_overlap,
'fg_fraction': fg_fraction 'rpn_fg_fraction': rpn_fg_fraction,
'use_random': use_random
}) })
loc_index.stop_gradient = True loc_index.stop_gradient = True
...@@ -175,12 +186,12 @@ def rpn_target_assign(loc, ...@@ -175,12 +186,12 @@ def rpn_target_assign(loc,
target_label.stop_gradient = True target_label.stop_gradient = True
target_bbox.stop_gradient = True target_bbox.stop_gradient = True
scores = nn.reshape(x=scores, shape=(-1, 1)) cls_logits = nn.reshape(x=cls_logits, shape=(-1, 1))
loc = nn.reshape(x=loc, shape=(-1, 4)) bbox_pred = nn.reshape(x=bbox_pred, shape=(-1, 4))
predicted_scores = nn.gather(scores, score_index) predicted_cls_logits = nn.gather(cls_logits, score_index)
predicted_location = nn.gather(loc, loc_index) predicted_bbox_pred = nn.gather(bbox_pred, loc_index)
return predicted_scores, predicted_location, target_label, target_bbox return predicted_cls_logits, predicted_bbox_pred, target_label, target_bbox
def detection_output(loc, def detection_output(loc,
...@@ -1258,15 +1269,17 @@ def anchor_generator(input, ...@@ -1258,15 +1269,17 @@ def anchor_generator(input,
def generate_proposal_labels(rpn_rois, def generate_proposal_labels(rpn_rois,
gt_classes, gt_classes,
is_crowd,
gt_boxes, gt_boxes,
im_scales, im_info,
batch_size_per_im=256, batch_size_per_im=256,
fg_fraction=0.25, fg_fraction=0.25,
fg_thresh=0.25, fg_thresh=0.25,
bg_thresh_hi=0.5, bg_thresh_hi=0.5,
bg_thresh_lo=0.0, bg_thresh_lo=0.0,
bbox_reg_weights=[0.1, 0.1, 0.2, 0.2], bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
class_nums=None): class_nums=None,
use_random=True):
""" """
** Generate proposal labels Faster-RCNN ** ** Generate proposal labels Faster-RCNN **
TODO(buxingyuan): Add Document TODO(buxingyuan): Add Document
...@@ -1285,8 +1298,9 @@ def generate_proposal_labels(rpn_rois, ...@@ -1285,8 +1298,9 @@ def generate_proposal_labels(rpn_rois,
inputs={ inputs={
'RpnRois': rpn_rois, 'RpnRois': rpn_rois,
'GtClasses': gt_classes, 'GtClasses': gt_classes,
'IsCrowd': is_crowd,
'GtBoxes': gt_boxes, 'GtBoxes': gt_boxes,
'ImScales': im_scales 'ImInfo': im_info
}, },
outputs={ outputs={
'Rois': rois, 'Rois': rois,
...@@ -1302,7 +1316,8 @@ def generate_proposal_labels(rpn_rois, ...@@ -1302,7 +1316,8 @@ def generate_proposal_labels(rpn_rois,
'bg_thresh_hi': bg_thresh_hi, 'bg_thresh_hi': bg_thresh_hi,
'bg_thresh_lo': bg_thresh_lo, 'bg_thresh_lo': bg_thresh_lo,
'bbox_reg_weights': bbox_reg_weights, 'bbox_reg_weights': bbox_reg_weights,
'class_nums': class_nums 'class_nums': class_nums,
'use_random': use_random
}) })
rois.stop_gradient = True rois.stop_gradient = True
......
...@@ -148,51 +148,60 @@ class TestAnchorGenerator(unittest.TestCase): ...@@ -148,51 +148,60 @@ class TestAnchorGenerator(unittest.TestCase):
class TestGenerateProposalLabels(unittest.TestCase): class TestGenerateProposalLabels(unittest.TestCase):
def test_generate_proposal_labels(self): def test_generate_proposal_labels(self):
rpn_rois = layers.data( program = Program()
name='rpn_rois', with program_guard(program):
shape=[4, 4], rpn_rois = layers.data(
dtype='float32', name='rpn_rois',
lod_level=1, shape=[4, 4],
append_batch_size=False) dtype='float32',
gt_classes = layers.data( lod_level=1,
name='gt_classes', append_batch_size=False)
shape=[6], gt_classes = layers.data(
dtype='int32', name='gt_classes',
lod_level=1, shape=[6],
append_batch_size=False) dtype='int32',
gt_boxes = layers.data( lod_level=1,
name='gt_boxes', append_batch_size=False)
shape=[6, 4], is_crowd = layers.data(
dtype='float32', name='is_crowd',
lod_level=1, shape=[6],
append_batch_size=False) dtype='int32',
im_scales = layers.data( lod_level=1,
name='im_scales', append_batch_size=False)
shape=[1], gt_boxes = layers.data(
dtype='float32', name='gt_boxes',
lod_level=1, shape=[6, 4],
append_batch_size=False) dtype='float32',
class_nums = 5 lod_level=1,
rois, labels_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights = fluid.layers.generate_proposal_labels( append_batch_size=False)
rpn_rois=rpn_rois, im_info = layers.data(
gt_classes=gt_classes, name='im_info',
gt_boxes=gt_boxes, shape=[1, 3],
im_scales=im_scales, dtype='float32',
batch_size_per_im=2, lod_level=1,
fg_fraction=0.5, append_batch_size=False)
fg_thresh=0.5, class_nums = 5
bg_thresh_hi=0.5, rois, labels_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights = fluid.layers.generate_proposal_labels(
bg_thresh_lo=0.0, rpn_rois=rpn_rois,
bbox_reg_weights=[0.1, 0.1, 0.2, 0.2], gt_classes=gt_classes,
class_nums=class_nums) is_crowd=is_crowd,
assert rois.shape[1] == 4 gt_boxes=gt_boxes,
assert rois.shape[0] == labels_int32.shape[0] im_info=im_info,
assert rois.shape[0] == bbox_targets.shape[0] batch_size_per_im=2,
assert rois.shape[0] == bbox_inside_weights.shape[0] fg_fraction=0.5,
assert rois.shape[0] == bbox_outside_weights.shape[0] fg_thresh=0.5,
assert bbox_targets.shape[1] == 4 * class_nums bg_thresh_hi=0.5,
assert bbox_inside_weights.shape[1] == 4 * class_nums bg_thresh_lo=0.0,
assert bbox_outside_weights.shape[1] == 4 * class_nums bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
class_nums=class_nums)
assert rois.shape[1] == 4
assert rois.shape[0] == labels_int32.shape[0]
assert rois.shape[0] == bbox_targets.shape[0]
assert rois.shape[0] == bbox_inside_weights.shape[0]
assert rois.shape[0] == bbox_outside_weights.shape[0]
assert bbox_targets.shape[1] == 4 * class_nums
assert bbox_inside_weights.shape[1] == 4 * class_nums
assert bbox_outside_weights.shape[1] == 4 * class_nums
class TestMultiBoxHead(unittest.TestCase): class TestMultiBoxHead(unittest.TestCase):
...@@ -254,18 +263,18 @@ class TestRpnTargetAssign(unittest.TestCase): ...@@ -254,18 +263,18 @@ class TestRpnTargetAssign(unittest.TestCase):
def test_rpn_target_assign(self): def test_rpn_target_assign(self):
program = Program() program = Program()
with program_guard(program): with program_guard(program):
loc_shape = [10, 50, 4] bbox_pred_shape = [10, 50, 4]
score_shape = [10, 50, 2] cls_logits_shape = [10, 50, 2]
anchor_shape = [50, 4] anchor_shape = [50, 4]
loc = layers.data( bbox_pred = layers.data(
name='loc', name='bbox_pred',
shape=loc_shape, shape=bbox_pred_shape,
append_batch_size=False, append_batch_size=False,
dtype='float32') dtype='float32')
scores = layers.data( cls_logits = layers.data(
name='scores', name='cls_logits',
shape=score_shape, shape=cls_logits_shape,
append_batch_size=False, append_batch_size=False,
dtype='float32') dtype='float32')
anchor_box = layers.data( anchor_box = layers.data(
...@@ -278,17 +287,31 @@ class TestRpnTargetAssign(unittest.TestCase): ...@@ -278,17 +287,31 @@ class TestRpnTargetAssign(unittest.TestCase):
shape=anchor_shape, shape=anchor_shape,
append_batch_size=False, append_batch_size=False,
dtype='float32') dtype='float32')
gt_box = layers.data( gt_boxes = layers.data(
name='gt_box', shape=[4], lod_level=1, dtype='float32') name='gt_boxes', shape=[4], lod_level=1, dtype='float32')
is_crowd = layers.data(
name='is_crowd',
shape=[10],
dtype='int32',
lod_level=1,
append_batch_size=False)
im_info = layers.data(
name='im_info',
shape=[1, 3],
dtype='float32',
lod_level=1,
append_batch_size=False)
pred_scores, pred_loc, tgt_lbl, tgt_bbox = layers.rpn_target_assign( pred_scores, pred_loc, tgt_lbl, tgt_bbox = layers.rpn_target_assign(
loc=loc, bbox_pred=bbox_pred,
scores=scores, cls_logits=cls_logits,
anchor_box=anchor_box, anchor_box=anchor_box,
anchor_var=anchor_var, anchor_var=anchor_var,
gt_box=gt_box, gt_boxes=gt_boxes,
is_crowd=is_crowd,
im_info=im_info,
rpn_batch_size_per_im=256, rpn_batch_size_per_im=256,
fg_fraction=0.25, rpn_straddle_thresh=0.0,
rpn_fg_fraction=0.5,
rpn_positive_overlap=0.7, rpn_positive_overlap=0.7,
rpn_negative_overlap=0.3) rpn_negative_overlap=0.3)
......
...@@ -20,10 +20,10 @@ import paddle.fluid as fluid ...@@ -20,10 +20,10 @@ import paddle.fluid as fluid
from op_test import OpTest from op_test import OpTest
def generate_proposal_labels_in_python( def generate_proposal_labels_in_python(rpn_rois, gt_classes, is_crowd, gt_boxes,
rpn_rois, gt_classes, gt_boxes, im_scales, batch_size_per_im, im_info, batch_size_per_im, fg_fraction,
fg_fraction, fg_thresh, bg_thresh_hi, bg_thresh_lo, bbox_reg_weights, fg_thresh, bg_thresh_hi, bg_thresh_lo,
class_nums): bbox_reg_weights, class_nums):
rois = [] rois = []
labels_int32 = [] labels_int32 = []
bbox_targets = [] bbox_targets = []
...@@ -31,13 +31,13 @@ def generate_proposal_labels_in_python( ...@@ -31,13 +31,13 @@ def generate_proposal_labels_in_python(
bbox_outside_weights = [] bbox_outside_weights = []
lod = [] lod = []
assert len(rpn_rois) == len( assert len(rpn_rois) == len(
im_scales), 'batch size of rpn_rois and ground_truth is not matched' im_info), 'batch size of rpn_rois and ground_truth is not matched'
for im_i in range(len(im_scales)): for im_i in range(len(im_info)):
frcn_blobs = _sample_rois( frcn_blobs = _sample_rois(
rpn_rois[im_i], gt_classes[im_i], gt_boxes[im_i], im_scales[im_i], rpn_rois[im_i], gt_classes[im_i], is_crowd[im_i], gt_boxes[im_i],
batch_size_per_im, fg_fraction, fg_thresh, bg_thresh_hi, im_info[im_i], batch_size_per_im, fg_fraction, fg_thresh,
bg_thresh_lo, bbox_reg_weights, class_nums) bg_thresh_hi, bg_thresh_lo, bbox_reg_weights, class_nums)
lod.append(frcn_blobs['rois'].shape[0]) lod.append(frcn_blobs['rois'].shape[0])
...@@ -50,13 +50,14 @@ def generate_proposal_labels_in_python( ...@@ -50,13 +50,14 @@ def generate_proposal_labels_in_python(
return rois, labels_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights, lod return rois, labels_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights, lod
def _sample_rois(rpn_rois, gt_classes, gt_boxes, im_scale, batch_size_per_im, def _sample_rois(rpn_rois, gt_classes, is_crowd, gt_boxes, im_info,
fg_fraction, fg_thresh, bg_thresh_hi, bg_thresh_lo, batch_size_per_im, fg_fraction, fg_thresh, bg_thresh_hi,
bbox_reg_weights, class_nums): bg_thresh_lo, bbox_reg_weights, class_nums):
rois_per_image = int(batch_size_per_im) rois_per_image = int(batch_size_per_im)
fg_rois_per_im = int(np.round(fg_fraction * rois_per_image)) fg_rois_per_im = int(np.round(fg_fraction * rois_per_image))
# Roidb # Roidb
im_scale = im_info[2]
inv_im_scale = 1. / im_scale inv_im_scale = 1. / im_scale
rpn_rois = rpn_rois * inv_im_scale rpn_rois = rpn_rois * inv_im_scale
...@@ -78,6 +79,9 @@ def _sample_rois(rpn_rois, gt_classes, gt_boxes, im_scale, batch_size_per_im, ...@@ -78,6 +79,9 @@ def _sample_rois(rpn_rois, gt_classes, gt_boxes, im_scale, batch_size_per_im,
box_to_gt_ind_map[overlapped_boxes_ind] = overlaps_argmax[ box_to_gt_ind_map[overlapped_boxes_ind] = overlaps_argmax[
overlapped_boxes_ind] overlapped_boxes_ind]
crowd_ind = np.where(is_crowd)[0]
gt_overlaps[crowd_ind] = -1
max_overlaps = gt_overlaps.max(axis=1) max_overlaps = gt_overlaps.max(axis=1)
max_classes = gt_overlaps.argmax(axis=1) max_classes = gt_overlaps.argmax(axis=1)
...@@ -85,9 +89,10 @@ def _sample_rois(rpn_rois, gt_classes, gt_boxes, im_scale, batch_size_per_im, ...@@ -85,9 +89,10 @@ def _sample_rois(rpn_rois, gt_classes, gt_boxes, im_scale, batch_size_per_im,
fg_inds = np.where(max_overlaps >= fg_thresh)[0] fg_inds = np.where(max_overlaps >= fg_thresh)[0]
fg_rois_per_this_image = np.minimum(fg_rois_per_im, fg_inds.shape[0]) fg_rois_per_this_image = np.minimum(fg_rois_per_im, fg_inds.shape[0])
# Sample foreground if there are too many # Sample foreground if there are too many
if fg_inds.shape[0] > fg_rois_per_this_image: # if fg_inds.shape[0] > fg_rois_per_this_image:
fg_inds = np.random.choice( # fg_inds = np.random.choice(
fg_inds, size=fg_rois_per_this_image, replace=False) # fg_inds, size=fg_rois_per_this_image, replace=False)
fg_inds = fg_inds[:fg_rois_per_this_image]
# Background # Background
bg_inds = np.where((max_overlaps < bg_thresh_hi) & (max_overlaps >= bg_inds = np.where((max_overlaps < bg_thresh_hi) & (max_overlaps >=
...@@ -96,9 +101,10 @@ def _sample_rois(rpn_rois, gt_classes, gt_boxes, im_scale, batch_size_per_im, ...@@ -96,9 +101,10 @@ def _sample_rois(rpn_rois, gt_classes, gt_boxes, im_scale, batch_size_per_im,
bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_rois_per_this_image = np.minimum(bg_rois_per_this_image,
bg_inds.shape[0]) bg_inds.shape[0])
# Sample background if there are too many # Sample background if there are too many
if bg_inds.shape[0] > bg_rois_per_this_image: # if bg_inds.shape[0] > bg_rois_per_this_image:
bg_inds = np.random.choice( # bg_inds = np.random.choice(
bg_inds, size=bg_rois_per_this_image, replace=False) # bg_inds, size=bg_rois_per_this_image, replace=False)
bg_inds = bg_inds[:bg_rois_per_this_image]
keep_inds = np.append(fg_inds, bg_inds) keep_inds = np.append(fg_inds, bg_inds)
sampled_labels = max_classes[keep_inds] sampled_labels = max_classes[keep_inds]
...@@ -208,8 +214,9 @@ class TestGenerateProposalLabelsOp(OpTest): ...@@ -208,8 +214,9 @@ class TestGenerateProposalLabelsOp(OpTest):
self.inputs = { self.inputs = {
'RpnRois': (self.rpn_rois[0], self.rpn_rois_lod), 'RpnRois': (self.rpn_rois[0], self.rpn_rois_lod),
'GtClasses': (self.gt_classes[0], self.gts_lod), 'GtClasses': (self.gt_classes[0], self.gts_lod),
'IsCrowd': (self.is_crowd[0], self.gts_lod),
'GtBoxes': (self.gt_boxes[0], self.gts_lod), 'GtBoxes': (self.gt_boxes[0], self.gts_lod),
'ImScales': self.im_scales[0] 'ImInfo': self.im_info
} }
self.attrs = { self.attrs = {
'batch_size_per_im': self.batch_size_per_im, 'batch_size_per_im': self.batch_size_per_im,
...@@ -218,14 +225,15 @@ class TestGenerateProposalLabelsOp(OpTest): ...@@ -218,14 +225,15 @@ class TestGenerateProposalLabelsOp(OpTest):
'bg_thresh_hi': self.bg_thresh_hi, 'bg_thresh_hi': self.bg_thresh_hi,
'bg_thresh_lo': self.bg_thresh_lo, 'bg_thresh_lo': self.bg_thresh_lo,
'bbox_reg_weights': self.bbox_reg_weights, 'bbox_reg_weights': self.bbox_reg_weights,
'class_nums': self.class_nums 'class_nums': self.class_nums,
'use_random': False
} }
self.outputs = { self.outputs = {
'Rois': (self.rois[0], [self.lod]), 'Rois': (self.rois, [self.lod]),
'LabelsInt32': (self.labels_int32[0], [self.lod]), 'LabelsInt32': (self.labels_int32, [self.lod]),
'BboxTargets': (self.bbox_targets[0], [self.lod]), 'BboxTargets': (self.bbox_targets, [self.lod]),
'BboxInsideWeights': (self.bbox_inside_weights[0], [self.lod]), 'BboxInsideWeights': (self.bbox_inside_weights, [self.lod]),
'BboxOutsideWeights': (self.bbox_outside_weights[0], [self.lod]), 'BboxOutsideWeights': (self.bbox_outside_weights, [self.lod]),
} }
def test_check_output(self): def test_check_output(self):
...@@ -236,8 +244,8 @@ class TestGenerateProposalLabelsOp(OpTest): ...@@ -236,8 +244,8 @@ class TestGenerateProposalLabelsOp(OpTest):
self.set_data() self.set_data()
def init_test_params(self): def init_test_params(self):
self.batch_size_per_im = 10 self.batch_size_per_im = 512
self.fg_fraction = 1.0 self.fg_fraction = 0.25
self.fg_thresh = 0.5 self.fg_thresh = 0.5
self.bg_thresh_hi = 0.5 self.bg_thresh_hi = 0.5
self.bg_thresh_lo = 0.0 self.bg_thresh_lo = 0.0
...@@ -246,14 +254,14 @@ class TestGenerateProposalLabelsOp(OpTest): ...@@ -246,14 +254,14 @@ class TestGenerateProposalLabelsOp(OpTest):
def init_test_input(self): def init_test_input(self):
np.random.seed(0) np.random.seed(0)
image_nums = 1
gt_nums = 6 # Keep same with batch_size_per_im for unittest gt_nums = 6 # Keep same with batch_size_per_im for unittest
proposal_nums = self.batch_size_per_im - gt_nums proposal_nums = 2000 #self.batch_size_per_im - gt_nums
images_shape = [] images_shape = [[64, 64]]
self.im_scales = [] self.im_info = np.ones((len(images_shape), 3)).astype(np.float32)
for i in range(image_nums): for i in range(len(images_shape)):
images_shape.append(np.random.randint(200, size=2)) self.im_info[i, 0] = images_shape[i][0]
self.im_scales.append(np.ones((1)).astype(np.float32)) self.im_info[i, 1] = images_shape[i][1]
self.im_info[i, 2] = 0.8 #scale
self.rpn_rois, self.rpn_rois_lod = _generate_proposals(images_shape, self.rpn_rois, self.rpn_rois_lod = _generate_proposals(images_shape,
proposal_nums) proposal_nums)
...@@ -261,16 +269,23 @@ class TestGenerateProposalLabelsOp(OpTest): ...@@ -261,16 +269,23 @@ class TestGenerateProposalLabelsOp(OpTest):
images_shape, self.class_nums, gt_nums) images_shape, self.class_nums, gt_nums)
self.gt_classes = [gt['gt_classes'] for gt in ground_truth] self.gt_classes = [gt['gt_classes'] for gt in ground_truth]
self.gt_boxes = [gt['boxes'] for gt in ground_truth] self.gt_boxes = [gt['boxes'] for gt in ground_truth]
self.is_crowd = [gt['is_crowd'] for gt in ground_truth]
def init_test_output(self): def init_test_output(self):
self.rois, self.labels_int32, self.bbox_targets, \ self.rois, self.labels_int32, self.bbox_targets, \
self.bbox_inside_weights, self.bbox_outside_weights, \ self.bbox_inside_weights, self.bbox_outside_weights, \
self.lod = generate_proposal_labels_in_python( self.lod = generate_proposal_labels_in_python(
self.rpn_rois, self.gt_classes, self.gt_boxes, self.im_scales, self.rpn_rois, self.gt_classes, self.is_crowd, self.gt_boxes, self.im_info,
self.batch_size_per_im, self.fg_fraction, self.batch_size_per_im, self.fg_fraction,
self.fg_thresh, self.bg_thresh_hi, self.bg_thresh_lo, self.fg_thresh, self.bg_thresh_hi, self.bg_thresh_lo,
self.bbox_reg_weights, self.class_nums self.bbox_reg_weights, self.class_nums
) )
self.rois = np.vstack(self.rois)
self.labels_int32 = np.hstack(self.labels_int32)
self.labels_int32 = self.labels_int32[:, np.newaxis]
self.bbox_targets = np.vstack(self.bbox_targets)
self.bbox_inside_weights = np.vstack(self.bbox_inside_weights)
self.bbox_outside_weights = np.vstack(self.bbox_outside_weights)
def _generate_proposals(images_shape, proposal_nums): def _generate_proposals(images_shape, proposal_nums):
...@@ -280,7 +295,7 @@ def _generate_proposals(images_shape, proposal_nums): ...@@ -280,7 +295,7 @@ def _generate_proposals(images_shape, proposal_nums):
for i, image_shape in enumerate(images_shape): for i, image_shape in enumerate(images_shape):
proposals = _generate_boxes(image_shape, proposal_nums) proposals = _generate_boxes(image_shape, proposal_nums)
rpn_rois.append(proposals) rpn_rois.append(proposals)
num_proposals += len(proposals) num_proposals = len(proposals)
rpn_rois_lod.append(num_proposals) rpn_rois_lod.append(num_proposals)
return rpn_rois, [rpn_rois_lod] return rpn_rois, [rpn_rois_lod]
...@@ -294,7 +309,11 @@ def _generate_groundtruth(images_shape, class_nums, gt_nums): ...@@ -294,7 +309,11 @@ def _generate_groundtruth(images_shape, class_nums, gt_nums):
gt_classes = np.random.randint( gt_classes = np.random.randint(
low=1, high=class_nums, size=gt_nums).astype(np.int32) low=1, high=class_nums, size=gt_nums).astype(np.int32)
gt_boxes = _generate_boxes(image_shape, gt_nums) gt_boxes = _generate_boxes(image_shape, gt_nums)
ground_truth.append(dict(gt_classes=gt_classes, boxes=gt_boxes)) is_crowd = np.zeros((gt_nums), dtype=np.int32)
is_crowd[0] = 1
ground_truth.append(
dict(
gt_classes=gt_classes, boxes=gt_boxes, is_crowd=is_crowd))
num_gts += len(gt_classes) num_gts += len(gt_classes)
gts_lod.append(num_gts) gts_lod.append(num_gts)
return ground_truth, [gts_lod] return ground_truth, [gts_lod]
......
...@@ -114,10 +114,10 @@ def box_coder(all_anchors, bbox_deltas, variances): ...@@ -114,10 +114,10 @@ def box_coder(all_anchors, bbox_deltas, variances):
#anchor_loc: width, height, center_x, center_y #anchor_loc: width, height, center_x, center_y
anchor_loc = np.zeros_like(bbox_deltas, dtype=np.float32) anchor_loc = np.zeros_like(bbox_deltas, dtype=np.float32)
anchor_loc[:, 0] = all_anchors[:, 2] - all_anchors[:, 0] anchor_loc[:, 0] = all_anchors[:, 2] - all_anchors[:, 0] + 1
anchor_loc[:, 1] = all_anchors[:, 3] - all_anchors[:, 1] anchor_loc[:, 1] = all_anchors[:, 3] - all_anchors[:, 1] + 1
anchor_loc[:, 2] = (all_anchors[:, 2] + all_anchors[:, 0]) / 2 anchor_loc[:, 2] = all_anchors[:, 0] + 0.5 * anchor_loc[:, 0]
anchor_loc[:, 3] = (all_anchors[:, 3] + all_anchors[:, 1]) / 2 anchor_loc[:, 3] = all_anchors[:, 1] + 0.5 * anchor_loc[:, 1]
#predicted bbox: bbox_center_x, bbox_center_y, bbox_width, bbox_height #predicted bbox: bbox_center_x, bbox_center_y, bbox_width, bbox_height
pred_bbox = np.zeros_like(bbox_deltas, dtype=np.float32) pred_bbox = np.zeros_like(bbox_deltas, dtype=np.float32)
...@@ -127,23 +127,29 @@ def box_coder(all_anchors, bbox_deltas, variances): ...@@ -127,23 +127,29 @@ def box_coder(all_anchors, bbox_deltas, variances):
i, 0] + anchor_loc[i, 2] i, 0] + anchor_loc[i, 2]
pred_bbox[i, 1] = variances[i, 1] * bbox_deltas[i, 1] * anchor_loc[ pred_bbox[i, 1] = variances[i, 1] * bbox_deltas[i, 1] * anchor_loc[
i, 1] + anchor_loc[i, 3] i, 1] + anchor_loc[i, 3]
pred_bbox[i, 2] = math.exp(variances[i, 2] * pred_bbox[i, 2] = math.exp(
bbox_deltas[i, 2]) * anchor_loc[i, 0] min(variances[i, 2] * bbox_deltas[i, 2], math.log(
pred_bbox[i, 3] = math.exp(variances[i, 3] * 1000 / 16.0))) * anchor_loc[i, 0]
bbox_deltas[i, 3]) * anchor_loc[i, 1] pred_bbox[i, 3] = math.exp(
min(variances[i, 3] * bbox_deltas[i, 3], math.log(
1000 / 16.0))) * anchor_loc[i, 1]
else: else:
for i in range(bbox_deltas.shape[0]): for i in range(bbox_deltas.shape[0]):
pred_bbox[i, 0] = bbox_deltas[i, 0] * anchor_loc[i, 0] + anchor_loc[ pred_bbox[i, 0] = bbox_deltas[i, 0] * anchor_loc[i, 0] + anchor_loc[
i, 2] i, 2]
pred_bbox[i, 1] = bbox_deltas[i, 1] * anchor_loc[i, 1] + anchor_loc[ pred_bbox[i, 1] = bbox_deltas[i, 1] * anchor_loc[i, 1] + anchor_loc[
i, 3] i, 3]
pred_bbox[i, 2] = math.exp(bbox_deltas[i, 2]) * anchor_loc[i, 0] pred_bbox[i, 2] = math.exp(
pred_bbox[i, 3] = math.exp(bbox_deltas[i, 3]) * anchor_loc[i, 1] min(bbox_deltas[i, 2], math.log(1000 / 16.0))) * anchor_loc[i,
0]
pred_bbox[i, 3] = math.exp(
min(bbox_deltas[i, 3], math.log(1000 / 16.0))) * anchor_loc[i,
1]
proposals[:, 0] = pred_bbox[:, 0] - pred_bbox[:, 2] / 2 proposals[:, 0] = pred_bbox[:, 0] - pred_bbox[:, 2] / 2
proposals[:, 1] = pred_bbox[:, 1] - pred_bbox[:, 3] / 2 proposals[:, 1] = pred_bbox[:, 1] - pred_bbox[:, 3] / 2
proposals[:, 2] = pred_bbox[:, 0] + pred_bbox[:, 2] / 2 proposals[:, 2] = pred_bbox[:, 0] + pred_bbox[:, 2] / 2 - 1
proposals[:, 3] = pred_bbox[:, 1] + pred_bbox[:, 3] / 2 proposals[:, 3] = pred_bbox[:, 1] + pred_bbox[:, 3] / 2 - 1
return proposals return proposals
...@@ -170,13 +176,16 @@ def filter_boxes(boxes, min_size, im_info): ...@@ -170,13 +176,16 @@ def filter_boxes(boxes, min_size, im_info):
"""Only keep boxes with both sides >= min_size and center within the image. """Only keep boxes with both sides >= min_size and center within the image.
""" """
# Scale min_size to match image scale # Scale min_size to match image scale
min_size *= im_info[2] im_scale = im_info[2]
min_size = max(min_size, 1.0)
ws = boxes[:, 2] - boxes[:, 0] + 1 ws = boxes[:, 2] - boxes[:, 0] + 1
hs = boxes[:, 3] - boxes[:, 1] + 1 hs = boxes[:, 3] - boxes[:, 1] + 1
ws_orig_scale = (boxes[:, 2] - boxes[:, 0]) / im_scale + 1
hs_orig_scale = (boxes[:, 3] - boxes[:, 1]) / im_scale + 1
x_ctr = boxes[:, 0] + ws / 2. x_ctr = boxes[:, 0] + ws / 2.
y_ctr = boxes[:, 1] + hs / 2. y_ctr = boxes[:, 1] + hs / 2.
keep = np.where((ws >= min_size) & (hs >= min_size) & (x_ctr < im_info[1]) & keep = np.where((ws_orig_scale >= min_size) & (hs_orig_scale >= min_size) &
(y_ctr < im_info[0]))[0] (x_ctr < im_info[1]) & (y_ctr < im_info[0]))[0]
return keep return keep
...@@ -204,7 +213,7 @@ def iou(box_a, box_b): ...@@ -204,7 +213,7 @@ def iou(box_a, box_b):
xb = min(xmax_a, xmax_b) xb = min(xmax_a, xmax_b)
yb = min(ymax_a, ymax_b) yb = min(ymax_a, ymax_b)
inter_area = max(xb - xa, 0.0) * max(yb - ya, 0.0) inter_area = max(xb - xa + 1, 0.0) * max(yb - ya + 1, 0.0)
iou_ratio = inter_area / (area_a + area_b - inter_area) iou_ratio = inter_area / (area_a + area_b - inter_area)
......
...@@ -19,48 +19,58 @@ import numpy as np ...@@ -19,48 +19,58 @@ import numpy as np
import paddle.fluid.core as core import paddle.fluid.core as core
from op_test import OpTest from op_test import OpTest
from test_anchor_generator_op import anchor_generator_in_python from test_anchor_generator_op import anchor_generator_in_python
from test_generate_proposal_labels import _generate_groundtruth from test_generate_proposal_labels_op import _generate_groundtruth
from test_generate_proposal_labels import _bbox_overlaps, _box_to_delta from test_generate_proposal_labels_op import _bbox_overlaps, _box_to_delta
def rpn_target_assign(gt_anchor_iou, rpn_batch_size_per_im, def rpn_target_assign(anchor_by_gt_overlap,
rpn_positive_overlap, rpn_negative_overlap, fg_fraction): rpn_batch_size_per_im,
iou = np.transpose(gt_anchor_iou) rpn_positive_overlap,
anchor_to_gt_max = iou.max(axis=1) rpn_negative_overlap,
anchor_to_gt_argmax = iou.argmax(axis=1) rpn_fg_fraction,
use_random=True):
gt_to_anchor_argmax = iou.argmax(axis=0) anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1)
gt_to_anchor_max = iou[gt_to_anchor_argmax, np.arange(iou.shape[1])] anchor_to_gt_max = anchor_by_gt_overlap[np.arange(
anchors_with_max_overlap = np.where(iou == gt_to_anchor_max)[0] anchor_by_gt_overlap.shape[0]), anchor_to_gt_argmax]
tgt_lbl = np.ones((iou.shape[0], ), dtype=np.int32) * -1 gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0)
tgt_lbl[anchors_with_max_overlap] = 1 gt_to_anchor_max = anchor_by_gt_overlap[gt_to_anchor_argmax, np.arange(
tgt_lbl[anchor_to_gt_max >= rpn_positive_overlap] = 1 anchor_by_gt_overlap.shape[1])]
anchors_with_max_overlap = np.where(
num_fg = int(fg_fraction * rpn_batch_size_per_im) anchor_by_gt_overlap == gt_to_anchor_max)[0]
fg_inds = np.where(tgt_lbl == 1)[0]
if len(fg_inds) > num_fg: labels = np.ones((anchor_by_gt_overlap.shape[0], ), dtype=np.int32) * -1
labels[anchors_with_max_overlap] = 1
labels[anchor_to_gt_max >= rpn_positive_overlap] = 1
num_fg = int(rpn_fg_fraction * rpn_batch_size_per_im)
fg_inds = np.where(labels == 1)[0]
if len(fg_inds) > num_fg and use_random:
disable_inds = np.random.choice( disable_inds = np.random.choice(
fg_inds, size=(len(fg_inds) - num_fg), replace=False) fg_inds, size=(len(fg_inds) - num_fg), replace=False)
tgt_lbl[disable_inds] = -1 else:
fg_inds = np.where(tgt_lbl == 1)[0] disable_inds = fg_inds[num_fg:]
labels[disable_inds] = -1
fg_inds = np.where(labels == 1)[0]
num_bg = rpn_batch_size_per_im - np.sum(tgt_lbl == 1) num_bg = rpn_batch_size_per_im - np.sum(labels == 1)
bg_inds = np.where(anchor_to_gt_max < rpn_negative_overlap)[0] bg_inds = np.where(anchor_to_gt_max < rpn_negative_overlap)[0]
tgt_lbl[bg_inds] = 0 if len(bg_inds) > num_bg and use_random:
if len(bg_inds) > num_bg:
enable_inds = bg_inds[np.random.randint(len(bg_inds), size=num_bg)] enable_inds = bg_inds[np.random.randint(len(bg_inds), size=num_bg)]
tgt_lbl[enable_inds] = 0 else:
bg_inds = np.where(tgt_lbl == 0)[0] enable_inds = bg_inds[:num_bg]
tgt_lbl[bg_inds] = 0 labels[enable_inds] = 0
fg_inds = np.where(labels == 1)[0]
bg_inds = np.where(labels == 0)[0]
loc_index = fg_inds loc_index = fg_inds
score_index = np.hstack((fg_inds, bg_inds)) score_index = np.hstack((fg_inds, bg_inds))
tgt_lbl = np.expand_dims(tgt_lbl, axis=1) labels = labels[score_index]
assert not np.any(labels == -1), "Wrong labels with -1"
gt_inds = anchor_to_gt_argmax[fg_inds] gt_inds = anchor_to_gt_argmax[fg_inds]
return loc_index, score_index, tgt_lbl, gt_inds return loc_index, score_index, labels, gt_inds
def get_anchor(n, c, h, w): def get_anchor(n, c, h, w):
...@@ -75,85 +85,129 @@ def get_anchor(n, c, h, w): ...@@ -75,85 +85,129 @@ def get_anchor(n, c, h, w):
return anchors return anchors
def rpn_blob(anchor, gt_boxes, iou, lod, rpn_batch_size_per_im, def rpn_target_assign_in_python(all_anchors,
rpn_positive_overlap, rpn_negative_overlap, fg_fraction): gt_boxes,
is_crowd,
loc_indexes = [] im_info,
score_indexes = [] lod,
tmp_tgt_labels = [] rpn_straddle_thresh,
tgt_bboxes = [] rpn_batch_size_per_im,
anchor_num = anchor.shape[0] rpn_positive_overlap,
rpn_negative_overlap,
rpn_fg_fraction,
use_random=True):
anchor_num = all_anchors.shape[0]
batch_size = len(lod) - 1 batch_size = len(lod) - 1
for i in range(batch_size): for i in range(batch_size):
im_height = im_info[i][0]
im_width = im_info[i][1]
im_scale = im_info[i][2]
if rpn_straddle_thresh >= 0:
# Only keep anchors inside the image by a margin of straddle_thresh
inds_inside = np.where(
(all_anchors[:, 0] >= -rpn_straddle_thresh) &
(all_anchors[:, 1] >= -rpn_straddle_thresh) & (
all_anchors[:, 2] < im_width + rpn_straddle_thresh) & (
all_anchors[:, 3] < im_height + rpn_straddle_thresh))[0]
# keep only inside anchors
inside_anchors = all_anchors[inds_inside, :]
else:
inds_inside = np.arange(all_anchors.shape[0])
inside_anchors = all_anchors
b, e = lod[i], lod[i + 1] b, e = lod[i], lod[i + 1]
iou_slice = iou[b:e, :] gt_boxes_slice = gt_boxes[b:e, :] * im_scale
bboxes_slice = gt_boxes[b:e, :] is_crowd_slice = is_crowd[b:e]
loc_idx, score_idx, tgt_lbl, gt_inds = rpn_target_assign( not_crowd_inds = np.where(is_crowd_slice == 0)[0]
iou_slice, rpn_batch_size_per_im, rpn_positive_overlap, gt_boxes_slice = gt_boxes_slice[not_crowd_inds]
rpn_negative_overlap, fg_fraction) iou = _bbox_overlaps(inside_anchors, gt_boxes_slice)
fg_bboxes = bboxes_slice[gt_inds] loc_inds, score_inds, labels, gt_inds = rpn_target_assign(
fg_anchors = anchor[loc_idx] iou, rpn_batch_size_per_im, rpn_positive_overlap,
box_deltas = _box_to_delta(fg_anchors, fg_bboxes, [1., 1., 1., 1.]) rpn_negative_overlap, rpn_fg_fraction, use_random)
# unmap to all anchor
loc_inds = inds_inside[loc_inds]
score_inds = inds_inside[score_inds]
sampled_gt = gt_boxes_slice[gt_inds]
sampled_anchor = all_anchors[loc_inds]
box_deltas = _box_to_delta(sampled_anchor, sampled_gt, [1., 1., 1., 1.])
if i == 0: if i == 0:
loc_indexes = loc_idx loc_indexes = loc_inds
score_indexes = score_idx score_indexes = score_inds
tmp_tgt_labels = tgt_lbl tgt_labels = labels
tgt_bboxes = box_deltas tgt_bboxes = box_deltas
else: else:
loc_indexes = np.concatenate( loc_indexes = np.concatenate(
[loc_indexes, loc_idx + i * anchor_num]) [loc_indexes, loc_inds + i * anchor_num])
score_indexes = np.concatenate( score_indexes = np.concatenate(
[score_indexes, score_idx + i * anchor_num]) [score_indexes, score_inds + i * anchor_num])
tmp_tgt_labels = np.concatenate([tmp_tgt_labels, tgt_lbl]) tgt_labels = np.concatenate([tgt_labels, labels])
tgt_bboxes = np.vstack([tgt_bboxes, box_deltas]) tgt_bboxes = np.vstack([tgt_bboxes, box_deltas])
tgt_labels = tmp_tgt_labels[score_indexes]
return loc_indexes, score_indexes, tgt_bboxes, tgt_labels return loc_indexes, score_indexes, tgt_bboxes, tgt_labels
class TestRpnTargetAssignOp(OpTest): class TestRpnTargetAssignOp(OpTest):
def setUp(self): def setUp(self):
n, c, h, w = 2, 4, 14, 14 n, c, h, w = 2, 4, 14, 14
anchor = get_anchor(n, c, h, w) all_anchors = get_anchor(n, c, h, w)
gt_num = 10 gt_num = 10
anchor = anchor.reshape(-1, 4) all_anchors = all_anchors.reshape(-1, 4)
anchor_num = anchor.shape[0] anchor_num = all_anchors.shape[0]
im_shapes = [[64, 64], [64, 64]] images_shape = [[64, 64], [64, 64]]
gt_box, lod = _generate_groundtruth(im_shapes, 3, 4) #images_shape = [[64, 64]]
bbox = np.vstack([v['boxes'] for v in gt_box]) groundtruth, lod = _generate_groundtruth(images_shape, 3, 4)
lod = [0, 4, 8]
iou = _bbox_overlaps(bbox, anchor) #lod = [0, 4]
anchor = anchor.astype('float32') im_info = np.ones((len(images_shape), 3)).astype(np.float32)
bbox = bbox.astype('float32') for i in range(len(images_shape)):
iou = iou.astype('float32') im_info[i, 0] = images_shape[i][0]
im_info[i, 1] = images_shape[i][1]
loc_index, score_index, tgt_bbox, tgt_lbl = rpn_blob( im_info[i, 2] = 0.8 #scale
anchor, bbox, iou, [0, 4, 8], 25600, 0.95, 0.03, 0.25) gt_boxes = np.vstack([v['boxes'] for v in groundtruth])
is_crowd = np.hstack([v['is_crowd'] for v in groundtruth])
all_anchors = all_anchors.astype('float32')
gt_boxes = gt_boxes.astype('float32')
rpn_straddle_thresh = 0.0
rpn_batch_size_per_im = 256
rpn_positive_overlap = 0.7
rpn_negative_overlap = 0.3
rpn_fg_fraction = 0.5
use_random = False
loc_index, score_index, tgt_bbox, labels = rpn_target_assign_in_python(
all_anchors, gt_boxes, is_crowd, im_info, lod, rpn_straddle_thresh,
rpn_batch_size_per_im, rpn_positive_overlap, rpn_negative_overlap,
rpn_fg_fraction, use_random)
labels = labels[:, np.newaxis]
self.op_type = "rpn_target_assign" self.op_type = "rpn_target_assign"
self.inputs = { self.inputs = {
'Anchor': anchor, 'Anchor': all_anchors,
'GtBox': (bbox, [[4, 4]]), 'GtBoxes': (gt_boxes, [[4, 4]]),
'DistMat': (iou, [[4, 4]]), 'IsCrowd': (is_crowd, [[4, 4]]),
'ImInfo': (im_info, [[1, 1]])
} }
self.attrs = { self.attrs = {
'rpn_batch_size_per_im': 25600, 'rpn_batch_size_per_im': rpn_batch_size_per_im,
'rpn_positive_overlap': 0.95, 'rpn_straddle_thresh': rpn_straddle_thresh,
'rpn_negative_overlap': 0.03, 'rpn_positive_overlap': rpn_positive_overlap,
'fg_fraction': 0.25, 'rpn_negative_overlap': rpn_negative_overlap,
'fix_seed': True 'rpn_fg_fraction': rpn_fg_fraction,
'use_random': use_random
} }
self.outputs = { self.outputs = {
'LocationIndex': loc_index.astype('int32'), 'LocationIndex': loc_index.astype('int32'),
'ScoreIndex': score_index.astype('int32'), 'ScoreIndex': score_index.astype('int32'),
'TargetBBox': tgt_bbox.astype('float32'), 'TargetBBox': tgt_bbox.astype('float32'),
'TargetLabel': tgt_lbl.astype('int64'), 'TargetLabel': labels.astype('int32')
} }
def test_check_output(self): def test_check_output(self):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册