提交 9e4b9d97 编写于 作者: F FDInSky 提交者: qingqing01

Update generate_proposal_labels_op to support CascadeRCNN. (#17200)

* Update generate_proposal_labels_op to support CascadeRCNN.
上级 9ed2f936
...@@ -351,7 +351,7 @@ paddle.fluid.layers.rpn_target_assign (ArgSpec(args=['bbox_pred', 'cls_logits', ...@@ -351,7 +351,7 @@ paddle.fluid.layers.rpn_target_assign (ArgSpec(args=['bbox_pred', 'cls_logits',
paddle.fluid.layers.retinanet_target_assign (ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'gt_labels', 'is_crowd', 'im_info', 'num_classes', 'positive_overlap', 'negative_overlap'], varargs=None, keywords=None, defaults=(1, 0.5, 0.4)), ('document', 'fa1d1c9d5e0111684c0db705f86a2595')) paddle.fluid.layers.retinanet_target_assign (ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'gt_labels', 'is_crowd', 'im_info', 'num_classes', 'positive_overlap', 'negative_overlap'], varargs=None, keywords=None, defaults=(1, 0.5, 0.4)), ('document', 'fa1d1c9d5e0111684c0db705f86a2595'))
paddle.fluid.layers.anchor_generator (ArgSpec(args=['input', 'anchor_sizes', 'aspect_ratios', 'variance', 'stride', 'offset', 'name'], varargs=None, keywords=None, defaults=(None, None, [0.1, 0.1, 0.2, 0.2], None, 0.5, None)), ('document', '82b2aefeeb1b706bc4afec70928a259a')) paddle.fluid.layers.anchor_generator (ArgSpec(args=['input', 'anchor_sizes', 'aspect_ratios', 'variance', 'stride', 'offset', 'name'], varargs=None, keywords=None, defaults=(None, None, [0.1, 0.1, 0.2, 0.2], None, 0.5, None)), ('document', '82b2aefeeb1b706bc4afec70928a259a'))
paddle.fluid.layers.roi_perspective_transform (ArgSpec(args=['input', 'rois', 'transformed_height', 'transformed_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1.0,)), ('document', 'd1ddc75629fedee46f82e631e22c79dc')) paddle.fluid.layers.roi_perspective_transform (ArgSpec(args=['input', 'rois', 'transformed_height', 'transformed_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1.0,)), ('document', 'd1ddc75629fedee46f82e631e22c79dc'))
paddle.fluid.layers.generate_proposal_labels (ArgSpec(args=['rpn_rois', 'gt_classes', 'is_crowd', 'gt_boxes', 'im_info', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None, True)), ('document', '9c601df88b251f22e9311c52939948cd')) paddle.fluid.layers.generate_proposal_labels (ArgSpec(args=['rpn_rois', 'gt_classes', 'is_crowd', 'gt_boxes', 'im_info', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums', 'use_random', 'is_cls_agnostic', 'is_cascade_rcnn'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None, True, False, False)), ('document', 'c0d00acf724691ff3480d4207036a722'))
paddle.fluid.layers.generate_proposals (ArgSpec(args=['scores', 'bbox_deltas', 'im_info', 'anchors', 'variances', 'pre_nms_top_n', 'post_nms_top_n', 'nms_thresh', 'min_size', 'eta', 'name'], varargs=None, keywords=None, defaults=(6000, 1000, 0.5, 0.1, 1.0, None)), ('document', 'b7d707822b6af2a586bce608040235b1')) paddle.fluid.layers.generate_proposals (ArgSpec(args=['scores', 'bbox_deltas', 'im_info', 'anchors', 'variances', 'pre_nms_top_n', 'post_nms_top_n', 'nms_thresh', 'min_size', 'eta', 'name'], varargs=None, keywords=None, defaults=(6000, 1000, 0.5, 0.1, 1.0, None)), ('document', 'b7d707822b6af2a586bce608040235b1'))
paddle.fluid.layers.generate_mask_labels (ArgSpec(args=['im_info', 'gt_classes', 'is_crowd', 'gt_segms', 'rois', 'labels_int32', 'num_classes', 'resolution'], varargs=None, keywords=None, defaults=None), ('document', 'b319b10ddaf17fb4ddf03518685a17ef')) paddle.fluid.layers.generate_mask_labels (ArgSpec(args=['im_info', 'gt_classes', 'is_crowd', 'gt_segms', 'rois', 'labels_int32', 'num_classes', 'resolution'], varargs=None, keywords=None, defaults=None), ('document', 'b319b10ddaf17fb4ddf03518685a17ef'))
paddle.fluid.layers.iou_similarity (ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '72fca4a39ccf82d5c746ae62d1868a99')) paddle.fluid.layers.iou_similarity (ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '72fca4a39ccf82d5c746ae62d1868a99'))
......
...@@ -109,17 +109,18 @@ std::vector<std::vector<int>> SampleFgBgGt( ...@@ -109,17 +109,18 @@ std::vector<std::vector<int>> SampleFgBgGt(
const platform::CPUDeviceContext& context, Tensor* iou, const platform::CPUDeviceContext& context, Tensor* iou,
const Tensor& is_crowd, const int batch_size_per_im, const Tensor& is_crowd, const int batch_size_per_im,
const float fg_fraction, const float fg_thresh, const float bg_thresh_hi, const float fg_fraction, const float fg_thresh, const float bg_thresh_hi,
const float bg_thresh_lo, std::minstd_rand engine, const bool use_random) { const float bg_thresh_lo, std::minstd_rand engine, const bool use_random,
const bool is_cascade_rcnn, const Tensor& rpn_rois) {
std::vector<int> fg_inds; std::vector<int> fg_inds;
std::vector<int> bg_inds; std::vector<int> bg_inds;
std::vector<int> gt_inds; std::vector<int> mapped_gt_inds;
int64_t gt_num = is_crowd.numel(); int64_t gt_num = is_crowd.numel();
const int* crowd_data = is_crowd.data<int>(); const int* crowd_data = is_crowd.data<int>();
T* proposal_to_gt_overlaps = iou->data<T>(); T* proposal_to_gt_overlaps = iou->data<T>();
int64_t row = iou->dims()[0]; int64_t row = iou->dims()[0];
int64_t col = iou->dims()[1]; int64_t col = iou->dims()[1];
float epsilon = 0.00001; float epsilon = 0.00001;
const T* rpn_rois_dt = rpn_rois.data<T>();
// Follow the Faster RCNN's implementation // Follow the Faster RCNN's implementation
for (int64_t i = 0; i < row; ++i) { for (int64_t i = 0; i < row; ++i) {
const T* v = proposal_to_gt_overlaps + i * col; const T* v = proposal_to_gt_overlaps + i * col;
...@@ -127,64 +128,82 @@ std::vector<std::vector<int>> SampleFgBgGt( ...@@ -127,64 +128,82 @@ std::vector<std::vector<int>> SampleFgBgGt(
if ((i < gt_num) && (crowd_data[i])) { if ((i < gt_num) && (crowd_data[i])) {
max_overlap = -1.0; max_overlap = -1.0;
} }
if (max_overlap > fg_thresh) { if (is_cascade_rcnn &&
((rpn_rois_dt[i * 4 + 2] - rpn_rois_dt[i * 4 + 0] + 1) <= 0 ||
(rpn_rois_dt[i * 4 + 3] - rpn_rois_dt[i * 4 + 1] + 1) <= 0)) {
continue;
}
if (max_overlap >= fg_thresh) {
// fg mapped gt label index
for (int64_t j = 0; j < col; ++j) { for (int64_t j = 0; j < col; ++j) {
T val = proposal_to_gt_overlaps[i * col + j]; T val = proposal_to_gt_overlaps[i * col + j];
auto diff = std::abs(max_overlap - val); auto diff = std::abs(max_overlap - val);
if (diff < epsilon) { if (diff < epsilon) {
fg_inds.emplace_back(i); fg_inds.emplace_back(i);
gt_inds.emplace_back(j); mapped_gt_inds.emplace_back(j);
break; break;
} }
} }
} else if ((max_overlap >= bg_thresh_lo) && (max_overlap < bg_thresh_hi)) {
bg_inds.emplace_back(i);
} else { } else {
if ((max_overlap >= bg_thresh_lo) && (max_overlap < bg_thresh_hi)) { continue;
bg_inds.emplace_back(i);
}
} }
} }
// Reservoir Sampling std::vector<std::vector<int>> res;
std::uniform_real_distribution<float> uniform(0, 1); if (is_cascade_rcnn) {
int fg_rois_per_im = std::floor(batch_size_per_im * fg_fraction); res.emplace_back(fg_inds);
int fg_rois_this_image = fg_inds.size(); res.emplace_back(bg_inds);
int fg_rois_per_this_image = std::min(fg_rois_per_im, fg_rois_this_image); res.emplace_back(mapped_gt_inds);
if (use_random) { } else {
const int64_t fg_size = static_cast<int64_t>(fg_inds.size()); // Reservoir Sampling
if (fg_size > fg_rois_per_this_image) { // sampling fg
for (int64_t i = fg_rois_per_this_image; i < fg_size; ++i) { std::uniform_real_distribution<float> uniform(0, 1);
int rng_ind = std::floor(uniform(engine) * i); int fg_rois_per_im = std::floor(batch_size_per_im * fg_fraction);
if (rng_ind < fg_rois_per_this_image) { int fg_rois_this_image = fg_inds.size();
std::iter_swap(fg_inds.begin() + rng_ind, fg_inds.begin() + i); int fg_rois_per_this_image = std::min(fg_rois_per_im, fg_rois_this_image);
std::iter_swap(gt_inds.begin() + rng_ind, gt_inds.begin() + i); if (use_random) {
const int64_t fg_size = static_cast<int64_t>(fg_inds.size());
if (fg_size > fg_rois_per_this_image) {
for (int64_t i = fg_rois_per_this_image; i < fg_size; ++i) {
int rng_ind = std::floor(uniform(engine) * i);
if (rng_ind < fg_rois_per_this_image) {
std::iter_swap(fg_inds.begin() + rng_ind, fg_inds.begin() + i);
std::iter_swap(mapped_gt_inds.begin() + rng_ind,
mapped_gt_inds.begin() + i);
}
} }
} }
} }
} std::vector<int> new_fg_inds(fg_inds.begin(),
std::vector<int> new_fg_inds(fg_inds.begin(), fg_inds.begin() + fg_rois_per_this_image);
fg_inds.begin() + fg_rois_per_this_image); std::vector<int> new_gt_inds(
std::vector<int> new_gt_inds(gt_inds.begin(), mapped_gt_inds.begin(),
gt_inds.begin() + fg_rois_per_this_image); mapped_gt_inds.begin() + fg_rois_per_this_image);
// sampling bg
int bg_rois_per_image = batch_size_per_im - fg_rois_per_this_image; int bg_rois_per_image = batch_size_per_im - fg_rois_per_this_image;
int bg_rois_this_image = bg_inds.size(); int bg_rois_this_image = bg_inds.size();
int bg_rois_per_this_image = std::min(bg_rois_per_image, bg_rois_this_image); int bg_rois_per_this_image =
if (use_random) { std::min(bg_rois_per_image, bg_rois_this_image);
const int64_t bg_size = static_cast<int64_t>(bg_inds.size()); if (use_random) {
if (bg_size > bg_rois_per_this_image) { const int64_t bg_size = static_cast<int64_t>(bg_inds.size());
for (int64_t i = bg_rois_per_this_image; i < bg_size; ++i) { if (bg_size > bg_rois_per_this_image) {
int rng_ind = std::floor(uniform(engine) * i); for (int64_t i = bg_rois_per_this_image; i < bg_size; ++i) {
if (rng_ind < fg_rois_per_this_image) int rng_ind = std::floor(uniform(engine) * i);
std::iter_swap(bg_inds.begin() + rng_ind, bg_inds.begin() + i); if (rng_ind < fg_rois_per_this_image)
std::iter_swap(bg_inds.begin() + rng_ind, bg_inds.begin() + i);
}
} }
} }
std::vector<int> new_bg_inds(bg_inds.begin(),
bg_inds.begin() + bg_rois_per_this_image);
//
res.emplace_back(new_fg_inds);
res.emplace_back(new_bg_inds);
res.emplace_back(new_gt_inds);
} }
std::vector<int> new_bg_inds(bg_inds.begin(),
bg_inds.begin() + bg_rois_per_this_image);
std::vector<std::vector<int>> res;
res.emplace_back(new_fg_inds);
res.emplace_back(new_bg_inds);
res.emplace_back(new_gt_inds);
return res; return res;
} }
...@@ -231,35 +250,50 @@ std::vector<Tensor> SampleRoisForOneImage( ...@@ -231,35 +250,50 @@ std::vector<Tensor> SampleRoisForOneImage(
const Tensor& im_info, const int batch_size_per_im, const float fg_fraction, const Tensor& im_info, const int batch_size_per_im, const float fg_fraction,
const float fg_thresh, const float bg_thresh_hi, const float bg_thresh_lo, const float fg_thresh, const float bg_thresh_hi, const float bg_thresh_lo,
const std::vector<float>& bbox_reg_weights, const int class_nums, const std::vector<float>& bbox_reg_weights, const int class_nums,
std::minstd_rand engine, bool use_random) { std::minstd_rand engine, bool use_random, bool is_cascade_rcnn,
bool is_cls_agnostic) {
// 1.1 map to original image
auto im_scale = im_info.data<T>()[2]; auto im_scale = im_info.data<T>()[2];
Tensor rpn_rois_slice;
Tensor rpn_rois; Tensor rpn_rois;
rpn_rois.mutable_data<T>(rpn_rois_in.dims(), context.GetPlace());
T* rpn_rois_dt = rpn_rois.data<T>(); if (is_cascade_rcnn) {
const T* rpn_rois_in_dt = rpn_rois_in.data<T>(); // slice rpn_rois from gt_box_num refer to detectron
for (int i = 0; i < rpn_rois.numel(); ++i) { rpn_rois_slice =
rpn_rois_dt[i] = rpn_rois_in_dt[i] / im_scale; rpn_rois_in.Slice(gt_boxes.dims()[0], rpn_rois_in.dims()[0]);
rpn_rois.mutable_data<T>(rpn_rois_slice.dims(), context.GetPlace());
const T* rpn_rois_in_dt = rpn_rois_slice.data<T>();
T* rpn_rois_dt = rpn_rois.data<T>();
for (int i = 0; i < rpn_rois.numel(); ++i) {
rpn_rois_dt[i] = rpn_rois_in_dt[i] / im_scale;
}
} else {
rpn_rois.mutable_data<T>(rpn_rois_in.dims(), context.GetPlace());
const T* rpn_rois_in_dt = rpn_rois_in.data<T>();
T* rpn_rois_dt = rpn_rois.data<T>();
for (int i = 0; i < rpn_rois.numel(); ++i) {
rpn_rois_dt[i] = rpn_rois_in_dt[i] / im_scale;
}
} }
Tensor boxes; // 1.2 compute overlaps
int proposals_num = gt_boxes.dims()[0] + rpn_rois.dims()[0]; int proposals_num = gt_boxes.dims()[0] + rpn_rois.dims()[0];
Tensor boxes;
boxes.mutable_data<T>({proposals_num, kBoxDim}, context.GetPlace()); boxes.mutable_data<T>({proposals_num, kBoxDim}, context.GetPlace());
Concat<T>(context, gt_boxes, rpn_rois, &boxes); Concat<T>(context, gt_boxes, rpn_rois, &boxes);
// Overlaps
Tensor proposal_to_gt_overlaps; Tensor proposal_to_gt_overlaps;
proposal_to_gt_overlaps.mutable_data<T>({proposals_num, gt_boxes.dims()[0]}, proposal_to_gt_overlaps.mutable_data<T>({proposals_num, gt_boxes.dims()[0]},
context.GetPlace()); context.GetPlace());
BboxOverlaps<T>(boxes, gt_boxes, &proposal_to_gt_overlaps); BboxOverlaps<T>(boxes, gt_boxes, &proposal_to_gt_overlaps);
// Generate proposal index // Generate proposal index
std::vector<std::vector<int>> fg_bg_gt = SampleFgBgGt<T>( std::vector<std::vector<int>> fg_bg_gt =
context, &proposal_to_gt_overlaps, is_crowd, batch_size_per_im, SampleFgBgGt<T>(context, &proposal_to_gt_overlaps, is_crowd,
fg_fraction, fg_thresh, bg_thresh_hi, bg_thresh_lo, engine, use_random); batch_size_per_im, fg_fraction, fg_thresh, bg_thresh_hi,
bg_thresh_lo, engine, use_random, is_cascade_rcnn, boxes);
std::vector<int> fg_inds = fg_bg_gt[0]; std::vector<int> fg_inds = fg_bg_gt[0];
std::vector<int> bg_inds = fg_bg_gt[1]; std::vector<int> bg_inds = fg_bg_gt[1];
std::vector<int> gt_inds = fg_bg_gt[2]; std::vector<int> mapped_gt_inds = fg_bg_gt[2]; // mapped_gt_labels
// Gather boxes and labels // Gather boxes and labels
Tensor sampled_boxes, sampled_labels, sampled_gts; Tensor sampled_boxes, sampled_labels, sampled_gts;
...@@ -271,7 +305,8 @@ std::vector<Tensor> SampleRoisForOneImage( ...@@ -271,7 +305,8 @@ std::vector<Tensor> SampleRoisForOneImage(
sampled_labels.mutable_data<int>({boxes_num}, context.GetPlace()); sampled_labels.mutable_data<int>({boxes_num}, context.GetPlace());
sampled_gts.mutable_data<T>({fg_num, kBoxDim}, context.GetPlace()); sampled_gts.mutable_data<T>({fg_num, kBoxDim}, context.GetPlace());
GatherBoxesLabels<T>(context, boxes, gt_boxes, gt_classes, fg_inds, bg_inds, GatherBoxesLabels<T>(context, boxes, gt_boxes, gt_classes, fg_inds, bg_inds,
gt_inds, &sampled_boxes, &sampled_labels, &sampled_gts); mapped_gt_inds, &sampled_boxes, &sampled_labels,
&sampled_gts);
// Compute targets // Compute targets
Tensor bbox_targets_single; Tensor bbox_targets_single;
...@@ -305,6 +340,9 @@ std::vector<Tensor> SampleRoisForOneImage( ...@@ -305,6 +340,9 @@ std::vector<Tensor> SampleRoisForOneImage(
for (int64_t i = 0; i < boxes_num; ++i) { for (int64_t i = 0; i < boxes_num; ++i) {
int label = sampled_labels_data[i]; int label = sampled_labels_data[i];
if (label > 0) { if (label > 0) {
if (is_cls_agnostic) {
label = 1;
}
int dst_idx = i * width + kBoxDim * label; int dst_idx = i * width + kBoxDim * label;
int src_idx = kBoxDim * i; int src_idx = kBoxDim * i;
bbox_targets_data[dst_idx] = bbox_targets_single_data[src_idx]; bbox_targets_data[dst_idx] = bbox_targets_single_data[src_idx];
...@@ -356,7 +394,8 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> { ...@@ -356,7 +394,8 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> {
context.Attr<std::vector<float>>("bbox_reg_weights"); context.Attr<std::vector<float>>("bbox_reg_weights");
int class_nums = context.Attr<int>("class_nums"); int class_nums = context.Attr<int>("class_nums");
bool use_random = context.Attr<bool>("use_random"); bool use_random = context.Attr<bool>("use_random");
bool is_cascade_rcnn = context.Attr<bool>("is_cascade_rcnn");
bool is_cls_agnostic = context.Attr<bool>("is_cls_agnostic");
PADDLE_ENFORCE_EQ(rpn_rois->lod().size(), 1UL, PADDLE_ENFORCE_EQ(rpn_rois->lod().size(), 1UL,
"GenerateProposalLabelsOp rpn_rois needs 1 level of LoD"); "GenerateProposalLabelsOp rpn_rois needs 1 level of LoD");
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
...@@ -411,7 +450,7 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> { ...@@ -411,7 +450,7 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> {
dev_ctx, rpn_rois_slice, gt_classes_slice, is_crowd_slice, dev_ctx, rpn_rois_slice, gt_classes_slice, is_crowd_slice,
gt_boxes_slice, im_info_slice, batch_size_per_im, fg_fraction, gt_boxes_slice, im_info_slice, batch_size_per_im, fg_fraction,
fg_thresh, bg_thresh_hi, bg_thresh_lo, bbox_reg_weights, class_nums, fg_thresh, bg_thresh_hi, bg_thresh_lo, bbox_reg_weights, class_nums,
engine, use_random); engine, use_random, is_cascade_rcnn, is_cls_agnostic);
Tensor sampled_rois = tensor_output[0]; Tensor sampled_rois = tensor_output[0];
Tensor sampled_labels_int32 = tensor_output[1]; Tensor sampled_labels_int32 = tensor_output[1];
Tensor sampled_bbox_targets = tensor_output[2]; Tensor sampled_bbox_targets = tensor_output[2];
...@@ -513,6 +552,13 @@ class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -513,6 +552,13 @@ class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
"use_random", "use_random",
"Use random sampling to choose foreground and background boxes.") "Use random sampling to choose foreground and background boxes.")
.SetDefault(true); .SetDefault(true);
AddAttr<bool>("is_cascade_rcnn",
"cascade rcnn sampling policy changed from stage 2.")
.SetDefault(false);
AddAttr<bool>(
"is_cls_agnostic",
"the box regress will only include fg and bg locations if set true ")
.SetDefault(false);
AddComment(R"DOC( AddComment(R"DOC(
This operator can be, for given the GenerateProposalOp output bounding boxes and groundtruth, This operator can be, for given the GenerateProposalOp output bounding boxes and groundtruth,
......
...@@ -2075,9 +2075,13 @@ def generate_proposal_labels(rpn_rois, ...@@ -2075,9 +2075,13 @@ def generate_proposal_labels(rpn_rois,
bg_thresh_lo=0.0, bg_thresh_lo=0.0,
bbox_reg_weights=[0.1, 0.1, 0.2, 0.2], bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
class_nums=None, class_nums=None,
use_random=True): use_random=True,
is_cls_agnostic=False,
is_cascade_rcnn=False):
""" """
** Generate Proposal Labels of Faster-RCNN ** ** Generate Proposal Labels of Faster-RCNN **
This operator can be, for given the GenerateProposalOp output bounding boxes and groundtruth, This operator can be, for given the GenerateProposalOp output bounding boxes and groundtruth,
to sample foreground boxes and background boxes, and compute loss target. to sample foreground boxes and background boxes, and compute loss target.
...@@ -2108,6 +2112,8 @@ def generate_proposal_labels(rpn_rois, ...@@ -2108,6 +2112,8 @@ def generate_proposal_labels(rpn_rois,
bbox_reg_weights(list|tuple): Box regression weights. bbox_reg_weights(list|tuple): Box regression weights.
class_nums(int): Class number. class_nums(int): Class number.
use_random(bool): Use random sampling to choose foreground and background boxes. use_random(bool): Use random sampling to choose foreground and background boxes.
is_cls_agnostic(bool): bbox regression use class agnostic simply which only represent fg and bg boxes.
is_cascade_rcnn(bool): it will filter some bbox crossing the image's boundary when setting True.
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -2166,7 +2172,9 @@ def generate_proposal_labels(rpn_rois, ...@@ -2166,7 +2172,9 @@ def generate_proposal_labels(rpn_rois,
'bg_thresh_lo': bg_thresh_lo, 'bg_thresh_lo': bg_thresh_lo,
'bbox_reg_weights': bbox_reg_weights, 'bbox_reg_weights': bbox_reg_weights,
'class_nums': class_nums, 'class_nums': class_nums,
'use_random': use_random 'use_random': use_random,
'is_cls_agnostic': is_cls_agnostic,
'is_cascade_rcnn': is_cascade_rcnn
}) })
rois.stop_gradient = True rois.stop_gradient = True
......
...@@ -22,10 +22,10 @@ import paddle.fluid as fluid ...@@ -22,10 +22,10 @@ import paddle.fluid as fluid
from op_test import OpTest from op_test import OpTest
def generate_proposal_labels_in_python(rpn_rois, gt_classes, is_crowd, gt_boxes, def generate_proposal_labels_in_python(
im_info, batch_size_per_im, fg_fraction, rpn_rois, gt_classes, is_crowd, gt_boxes, im_info, batch_size_per_im,
fg_thresh, bg_thresh_hi, bg_thresh_lo, fg_fraction, fg_thresh, bg_thresh_hi, bg_thresh_lo, bbox_reg_weights,
bbox_reg_weights, class_nums): class_nums, is_cls_agnostic, is_cascade_rcnn):
rois = [] rois = []
labels_int32 = [] labels_int32 = []
bbox_targets = [] bbox_targets = []
...@@ -36,13 +36,12 @@ def generate_proposal_labels_in_python(rpn_rois, gt_classes, is_crowd, gt_boxes, ...@@ -36,13 +36,12 @@ def generate_proposal_labels_in_python(rpn_rois, gt_classes, is_crowd, gt_boxes,
im_info), 'batch size of rpn_rois and ground_truth is not matched' im_info), 'batch size of rpn_rois and ground_truth is not matched'
for im_i in range(len(im_info)): for im_i in range(len(im_info)):
frcn_blobs = _sample_rois( frcn_blobs = _sample_rois(rpn_rois[im_i], gt_classes[im_i],
rpn_rois[im_i], gt_classes[im_i], is_crowd[im_i], gt_boxes[im_i], is_crowd[im_i], gt_boxes[im_i], im_info[im_i],
im_info[im_i], batch_size_per_im, fg_fraction, fg_thresh, batch_size_per_im, fg_fraction, fg_thresh,
bg_thresh_hi, bg_thresh_lo, bbox_reg_weights, class_nums) bg_thresh_hi, bg_thresh_lo, bbox_reg_weights,
class_nums, is_cls_agnostic, is_cascade_rcnn)
lod.append(frcn_blobs['rois'].shape[0]) lod.append(frcn_blobs['rois'].shape[0])
rois.append(frcn_blobs['rois']) rois.append(frcn_blobs['rois'])
labels_int32.append(frcn_blobs['labels_int32']) labels_int32.append(frcn_blobs['labels_int32'])
bbox_targets.append(frcn_blobs['bbox_targets']) bbox_targets.append(frcn_blobs['bbox_targets'])
...@@ -54,7 +53,8 @@ def generate_proposal_labels_in_python(rpn_rois, gt_classes, is_crowd, gt_boxes, ...@@ -54,7 +53,8 @@ def generate_proposal_labels_in_python(rpn_rois, gt_classes, is_crowd, gt_boxes,
def _sample_rois(rpn_rois, gt_classes, is_crowd, gt_boxes, im_info, def _sample_rois(rpn_rois, gt_classes, is_crowd, gt_boxes, im_info,
batch_size_per_im, fg_fraction, fg_thresh, bg_thresh_hi, batch_size_per_im, fg_fraction, fg_thresh, bg_thresh_hi,
bg_thresh_lo, bbox_reg_weights, class_nums): bg_thresh_lo, bbox_reg_weights, class_nums, is_cls_agnostic,
is_cascade_rcnn):
rois_per_image = int(batch_size_per_im) rois_per_image = int(batch_size_per_im)
fg_rois_per_im = int(np.round(fg_fraction * rois_per_image)) fg_rois_per_im = int(np.round(fg_fraction * rois_per_image))
...@@ -62,7 +62,8 @@ def _sample_rois(rpn_rois, gt_classes, is_crowd, gt_boxes, im_info, ...@@ -62,7 +62,8 @@ def _sample_rois(rpn_rois, gt_classes, is_crowd, gt_boxes, im_info,
im_scale = im_info[2] im_scale = im_info[2]
inv_im_scale = 1. / im_scale inv_im_scale = 1. / im_scale
rpn_rois = rpn_rois * inv_im_scale rpn_rois = rpn_rois * inv_im_scale
if is_cascade_rcnn:
rpn_rois = rpn_rois[gt_boxes.shape[0]:, :]
boxes = np.vstack([gt_boxes, rpn_rois]) boxes = np.vstack([gt_boxes, rpn_rois])
gt_overlaps = np.zeros((boxes.shape[0], class_nums)) gt_overlaps = np.zeros((boxes.shape[0], class_nums))
box_to_gt_ind_map = np.zeros((boxes.shape[0]), dtype=np.int32) box_to_gt_ind_map = np.zeros((boxes.shape[0]), dtype=np.int32)
...@@ -87,26 +88,37 @@ def _sample_rois(rpn_rois, gt_classes, is_crowd, gt_boxes, im_info, ...@@ -87,26 +88,37 @@ def _sample_rois(rpn_rois, gt_classes, is_crowd, gt_boxes, im_info,
max_overlaps = gt_overlaps.max(axis=1) max_overlaps = gt_overlaps.max(axis=1)
max_classes = gt_overlaps.argmax(axis=1) max_classes = gt_overlaps.argmax(axis=1)
# Foreground # Cascade RCNN Decode Filter
fg_inds = np.where(max_overlaps >= fg_thresh)[0] if is_cascade_rcnn:
fg_rois_per_this_image = np.minimum(fg_rois_per_im, fg_inds.shape[0]) ws = boxes[:, 2] - boxes[:, 0] + 1
# Sample foreground if there are too many hs = boxes[:, 3] - boxes[:, 1] + 1
# if fg_inds.shape[0] > fg_rois_per_this_image: keep = np.where((ws > 0) & (hs > 0))[0]
# fg_inds = np.random.choice( boxes = boxes[keep]
# fg_inds, size=fg_rois_per_this_image, replace=False) fg_inds = np.where(max_overlaps >= fg_thresh)[0]
fg_inds = fg_inds[:fg_rois_per_this_image] bg_inds = np.where((max_overlaps < bg_thresh_hi) & (max_overlaps >=
bg_thresh_lo))[0]
# Background fg_rois_per_this_image = fg_inds.shape[0]
bg_inds = np.where((max_overlaps < bg_thresh_hi) & (max_overlaps >= bg_rois_per_this_image = bg_inds.shape[0]
bg_thresh_lo))[0] else:
bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image # Foreground
bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, fg_inds = np.where(max_overlaps >= fg_thresh)[0]
bg_inds.shape[0]) fg_rois_per_this_image = np.minimum(fg_rois_per_im, fg_inds.shape[0])
# Sample background if there are too many # Sample foreground if there are too many
# if bg_inds.shape[0] > bg_rois_per_this_image: if fg_inds.shape[0] > fg_rois_per_this_image:
# bg_inds = np.random.choice( fg_inds = np.random.choice(
# bg_inds, size=bg_rois_per_this_image, replace=False) fg_inds, size=fg_rois_per_this_image, replace=False)
bg_inds = bg_inds[:bg_rois_per_this_image] fg_inds = fg_inds[:fg_rois_per_this_image]
# Background
bg_inds = np.where((max_overlaps < bg_thresh_hi) & (max_overlaps >=
bg_thresh_lo))[0]
bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
bg_rois_per_this_image = np.minimum(bg_rois_per_this_image,
bg_inds.shape[0])
# Sample background if there are too many
if bg_inds.shape[0] > bg_rois_per_this_image:
bg_inds = np.random.choice(
bg_inds, size=bg_rois_per_this_image, replace=False)
bg_inds = bg_inds[:bg_rois_per_this_image]
keep_inds = np.append(fg_inds, bg_inds) keep_inds = np.append(fg_inds, bg_inds)
sampled_labels = max_classes[keep_inds] sampled_labels = max_classes[keep_inds]
...@@ -114,14 +126,12 @@ def _sample_rois(rpn_rois, gt_classes, is_crowd, gt_boxes, im_info, ...@@ -114,14 +126,12 @@ def _sample_rois(rpn_rois, gt_classes, is_crowd, gt_boxes, im_info,
sampled_boxes = boxes[keep_inds] sampled_boxes = boxes[keep_inds]
sampled_gts = gt_boxes[box_to_gt_ind_map[keep_inds]] sampled_gts = gt_boxes[box_to_gt_ind_map[keep_inds]]
sampled_gts[fg_rois_per_this_image:, :] = gt_boxes[0] sampled_gts[fg_rois_per_this_image:, :] = gt_boxes[0]
bbox_label_targets = _compute_targets(sampled_boxes, sampled_gts, bbox_label_targets = _compute_targets(sampled_boxes, sampled_gts,
sampled_labels, bbox_reg_weights) sampled_labels, bbox_reg_weights)
bbox_targets, bbox_inside_weights = _expand_bbox_targets(bbox_label_targets, bbox_targets, bbox_inside_weights = _expand_bbox_targets(
class_nums) bbox_label_targets, class_nums, is_cls_agnostic)
bbox_outside_weights = np.array( bbox_outside_weights = np.array(
bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype) bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype)
# Scale rois # Scale rois
sampled_rois = sampled_boxes * im_scale sampled_rois = sampled_boxes * im_scale
...@@ -192,19 +202,22 @@ def _box_to_delta(ex_boxes, gt_boxes, weights): ...@@ -192,19 +202,22 @@ def _box_to_delta(ex_boxes, gt_boxes, weights):
return targets return targets
def _expand_bbox_targets(bbox_targets_input, class_nums): def _expand_bbox_targets(bbox_targets_input, class_nums, is_cls_agnostic):
class_labels = bbox_targets_input[:, 0] class_labels = bbox_targets_input[:, 0]
fg_inds = np.where(class_labels > 0)[0] fg_inds = np.where(class_labels > 0)[0]
#if is_cls_agnostic:
bbox_targets = np.zeros((class_labels.shape[0], 4 * class_nums)) # class_labels = [1 if ll > 0 else 0 for ll in class_labels]
# class_labels = np.array(class_labels, dtype=np.int32)
# class_nums = 2
bbox_targets = np.zeros((class_labels.shape[0], 4 * class_nums
if not is_cls_agnostic else 4 * 2))
bbox_inside_weights = np.zeros(bbox_targets.shape) bbox_inside_weights = np.zeros(bbox_targets.shape)
for ind in fg_inds: for ind in fg_inds:
class_label = int(class_labels[ind]) class_label = int(class_labels[ind]) if not is_cls_agnostic else 1
start_ind = class_label * 4 start_ind = class_label * 4
end_ind = class_label * 4 + 4 end_ind = class_label * 4 + 4
bbox_targets[ind, start_ind:end_ind] = bbox_targets_input[ind, 1:] bbox_targets[ind, start_ind:end_ind] = bbox_targets_input[ind, 1:]
bbox_inside_weights[ind, start_ind:end_ind] = (1.0, 1.0, 1.0, 1.0) bbox_inside_weights[ind, start_ind:end_ind] = (1.0, 1.0, 1.0, 1.0)
return bbox_targets, bbox_inside_weights return bbox_targets, bbox_inside_weights
...@@ -228,7 +241,9 @@ class TestGenerateProposalLabelsOp(OpTest): ...@@ -228,7 +241,9 @@ class TestGenerateProposalLabelsOp(OpTest):
'bg_thresh_lo': self.bg_thresh_lo, 'bg_thresh_lo': self.bg_thresh_lo,
'bbox_reg_weights': self.bbox_reg_weights, 'bbox_reg_weights': self.bbox_reg_weights,
'class_nums': self.class_nums, 'class_nums': self.class_nums,
'use_random': False 'use_random': False,
'is_cls_agnostic': self.is_cls_agnostic,
'is_cascade_rcnn': self.is_cascade_rcnn
} }
self.outputs = { self.outputs = {
'Rois': (self.rois, [self.lod]), 'Rois': (self.rois, [self.lod]),
...@@ -252,12 +267,15 @@ class TestGenerateProposalLabelsOp(OpTest): ...@@ -252,12 +267,15 @@ class TestGenerateProposalLabelsOp(OpTest):
self.bg_thresh_hi = 0.5 self.bg_thresh_hi = 0.5
self.bg_thresh_lo = 0.0 self.bg_thresh_lo = 0.0
self.bbox_reg_weights = [0.1, 0.1, 0.2, 0.2] self.bbox_reg_weights = [0.1, 0.1, 0.2, 0.2]
self.class_nums = 81 #self.class_nums = 81
self.is_cls_agnostic = False #True
self.is_cascade_rcnn = True
self.class_nums = 2 if self.is_cls_agnostic else 81
def init_test_input(self): def init_test_input(self):
np.random.seed(0) np.random.seed(0)
gt_nums = 6 # Keep same with batch_size_per_im for unittest gt_nums = 6 # Keep same with batch_size_per_im for unittest
proposal_nums = 2000 #self.batch_size_per_im - gt_nums proposal_nums = 2000 if not self.is_cascade_rcnn else 512 #self.batch_size_per_im - gt_nums
images_shape = [[64, 64]] images_shape = [[64, 64]]
self.im_info = np.ones((len(images_shape), 3)).astype(np.float32) self.im_info = np.ones((len(images_shape), 3)).astype(np.float32)
for i in range(len(images_shape)): for i in range(len(images_shape)):
...@@ -280,7 +298,8 @@ class TestGenerateProposalLabelsOp(OpTest): ...@@ -280,7 +298,8 @@ class TestGenerateProposalLabelsOp(OpTest):
self.rpn_rois, self.gt_classes, self.is_crowd, self.gt_boxes, self.im_info, self.rpn_rois, self.gt_classes, self.is_crowd, self.gt_boxes, self.im_info,
self.batch_size_per_im, self.fg_fraction, self.batch_size_per_im, self.fg_fraction,
self.fg_thresh, self.bg_thresh_hi, self.bg_thresh_lo, self.fg_thresh, self.bg_thresh_hi, self.bg_thresh_lo,
self.bbox_reg_weights, self.class_nums self.bbox_reg_weights, self.class_nums,
self.is_cls_agnostic, self.is_cascade_rcnn
) )
self.rois = np.vstack(self.rois) self.rois = np.vstack(self.rois)
self.labels_int32 = np.hstack(self.labels_int32) self.labels_int32 = np.hstack(self.labels_int32)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册