未验证 提交 97dbf281 编写于 作者: G Guanghua Yu 提交者: GitHub

[Cherry-pick]add offset parameter in roi_align,generate_proposals.etc ops (#31030)

* add  parameter in roi_align op

* fix compatibility of ops

* fix op test & cpu kernel

* fix JaccardOverlap in nms
上级 6ec5f0fb
...@@ -77,17 +77,20 @@ struct BoxDecodeAndClipFunctor { ...@@ -77,17 +77,20 @@ struct BoxDecodeAndClipFunctor {
const T *var; const T *var;
const int *index; const int *index;
const T *im_info; const T *im_info;
const bool pixel_offset;
T *proposals; T *proposals;
BoxDecodeAndClipFunctor(const T *anchor, const T *deltas, const T *var, BoxDecodeAndClipFunctor(const T *anchor, const T *deltas, const T *var,
const int *index, const T *im_info, T *proposals) const int *index, const T *im_info, T *proposals,
bool pixel_offset = true)
: anchor(anchor), : anchor(anchor),
deltas(deltas), deltas(deltas),
var(var), var(var),
index(index), index(index),
im_info(im_info), im_info(im_info),
proposals(proposals) {} proposals(proposals),
pixel_offset(pixel_offset) {}
T bbox_clip_default{static_cast<T>(kBBoxClipDefault)}; T bbox_clip_default{static_cast<T>(kBBoxClipDefault)};
...@@ -98,8 +101,9 @@ struct BoxDecodeAndClipFunctor { ...@@ -98,8 +101,9 @@ struct BoxDecodeAndClipFunctor {
T axmax = anchor[k + 2]; T axmax = anchor[k + 2];
T aymax = anchor[k + 3]; T aymax = anchor[k + 3];
T w = axmax - axmin + 1.0; T offset = pixel_offset ? static_cast<T>(1.0) : 0;
T h = aymax - aymin + 1.0; T w = axmax - axmin + offset;
T h = aymax - aymin + offset;
T cx = axmin + 0.5 * w; T cx = axmin + 0.5 * w;
T cy = aymin + 0.5 * h; T cy = aymin + 0.5 * h;
...@@ -123,13 +127,13 @@ struct BoxDecodeAndClipFunctor { ...@@ -123,13 +127,13 @@ struct BoxDecodeAndClipFunctor {
T oxmin = d_cx - d_w * 0.5; T oxmin = d_cx - d_w * 0.5;
T oymin = d_cy - d_h * 0.5; T oymin = d_cy - d_h * 0.5;
T oxmax = d_cx + d_w * 0.5 - 1.; T oxmax = d_cx + d_w * 0.5 - offset;
T oymax = d_cy + d_h * 0.5 - 1.; T oymax = d_cy + d_h * 0.5 - offset;
proposals[i * 4] = Max(Min(oxmin, im_info[1] - 1.), 0.); proposals[i * 4] = Max(Min(oxmin, im_info[1] - offset), 0.);
proposals[i * 4 + 1] = Max(Min(oymin, im_info[0] - 1.), 0.); proposals[i * 4 + 1] = Max(Min(oymin, im_info[0] - offset), 0.);
proposals[i * 4 + 2] = Max(Min(oxmax, im_info[1] - 1.), 0.); proposals[i * 4 + 2] = Max(Min(oxmax, im_info[1] - offset), 0.);
proposals[i * 4 + 3] = Max(Min(oymax, im_info[0] - 1.), 0.); proposals[i * 4 + 3] = Max(Min(oymax, im_info[0] - offset), 0.);
} }
__device__ __forceinline__ T Min(T a, T b) const { return a > b ? b : a; } __device__ __forceinline__ T Min(T a, T b) const { return a > b ? b : a; }
...@@ -141,7 +145,8 @@ template <typename T, int BlockSize> ...@@ -141,7 +145,8 @@ template <typename T, int BlockSize>
static __global__ void FilterBBoxes(const T *bboxes, const T *im_info, static __global__ void FilterBBoxes(const T *bboxes, const T *im_info,
const T min_size, const int num, const T min_size, const int num,
int *keep_num, int *keep, int *keep_num, int *keep,
bool is_scale = true) { bool is_scale = true,
bool pixel_offset = true) {
T im_h = im_info[0]; T im_h = im_info[0];
T im_w = im_info[1]; T im_w = im_info[1];
...@@ -157,19 +162,25 @@ static __global__ void FilterBBoxes(const T *bboxes, const T *im_info, ...@@ -157,19 +162,25 @@ static __global__ void FilterBBoxes(const T *bboxes, const T *im_info,
T ymin = bboxes[k + 1]; T ymin = bboxes[k + 1];
T xmax = bboxes[k + 2]; T xmax = bboxes[k + 2];
T ymax = bboxes[k + 3]; T ymax = bboxes[k + 3];
T offset = pixel_offset ? static_cast<T>(1.0) : 0;
T w = xmax - xmin + offset;
T h = ymax - ymin + offset;
if (pixel_offset) {
T cx = xmin + w / 2.;
T cy = ymin + h / 2.;
if (is_scale) {
w = (xmax - xmin) / im_info[2] + 1.;
h = (ymax - ymin) / im_info[2] + 1.;
}
T w = xmax - xmin + 1.0; if (w >= min_size && h >= min_size && cx <= im_w && cy <= im_h) {
T h = ymax - ymin + 1.0; keep_index[threadIdx.x] = i;
T cx = xmin + w / 2.; }
T cy = ymin + h / 2.; } else {
if (w >= min_size && h >= min_size) {
if (is_scale) { keep_index[threadIdx.x] = i;
w = (xmax - xmin) / im_info[2] + 1.; }
h = (ymax - ymin) / im_info[2] + 1.;
}
if (w >= min_size && h >= min_size && cx <= im_w && cy <= im_h) {
keep_index[threadIdx.x] = i;
} }
__syncthreads(); __syncthreads();
if (threadIdx.x == 0) { if (threadIdx.x == 0) {
...@@ -187,19 +198,23 @@ static __global__ void FilterBBoxes(const T *bboxes, const T *im_info, ...@@ -187,19 +198,23 @@ static __global__ void FilterBBoxes(const T *bboxes, const T *im_info,
} }
} }
static __device__ float IoU(const float *a, const float *b) { static __device__ float IoU(const float *a, const float *b,
const bool pixel_offset = true) {
float offset = pixel_offset ? static_cast<float>(1.0) : 0;
float left = max(a[0], b[0]), right = min(a[2], b[2]); float left = max(a[0], b[0]), right = min(a[2], b[2]);
float top = max(a[1], b[1]), bottom = min(a[3], b[3]); float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); float width = max(right - left + offset, 0.f),
height = max(bottom - top + offset, 0.f);
float inter_s = width * height; float inter_s = width * height;
float s_a = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); float s_a = (a[2] - a[0] + offset) * (a[3] - a[1] + offset);
float s_b = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); float s_b = (b[2] - b[0] + offset) * (b[3] - b[1] + offset);
return inter_s / (s_a + s_b - inter_s); return inter_s / (s_a + s_b - inter_s);
} }
static __global__ void NMSKernel(const int n_boxes, static __global__ void NMSKernel(const int n_boxes,
const float nms_overlap_thresh, const float nms_overlap_thresh,
const float *dev_boxes, uint64_t *dev_mask) { const float *dev_boxes, uint64_t *dev_mask,
bool pixel_offset = true) {
const int row_start = blockIdx.y; const int row_start = blockIdx.y;
const int col_start = blockIdx.x; const int col_start = blockIdx.x;
...@@ -231,7 +246,8 @@ static __global__ void NMSKernel(const int n_boxes, ...@@ -231,7 +246,8 @@ static __global__ void NMSKernel(const int n_boxes,
start = threadIdx.x + 1; start = threadIdx.x + 1;
} }
for (i = start; i < col_size; i++) { for (i = start; i < col_size; i++) {
if (IoU(cur_box, block_boxes + i * 4) > nms_overlap_thresh) { if (IoU(cur_box, block_boxes + i * 4, pixel_offset) >
nms_overlap_thresh) {
t |= 1ULL << i; t |= 1ULL << i;
} }
} }
...@@ -243,7 +259,7 @@ static __global__ void NMSKernel(const int n_boxes, ...@@ -243,7 +259,7 @@ static __global__ void NMSKernel(const int n_boxes,
template <typename T> template <typename T>
static void NMS(const platform::CUDADeviceContext &ctx, const Tensor &proposals, static void NMS(const platform::CUDADeviceContext &ctx, const Tensor &proposals,
const Tensor &sorted_indices, const T nms_threshold, const Tensor &sorted_indices, const T nms_threshold,
Tensor *keep_out) { Tensor *keep_out, bool pixel_offset = true) {
int boxes_num = proposals.dims()[0]; int boxes_num = proposals.dims()[0];
const int col_blocks = DIVUP(boxes_num, kThreadsPerBlock); const int col_blocks = DIVUP(boxes_num, kThreadsPerBlock);
dim3 blocks(DIVUP(boxes_num, kThreadsPerBlock), dim3 blocks(DIVUP(boxes_num, kThreadsPerBlock),
...@@ -255,7 +271,8 @@ static void NMS(const platform::CUDADeviceContext &ctx, const Tensor &proposals, ...@@ -255,7 +271,8 @@ static void NMS(const platform::CUDADeviceContext &ctx, const Tensor &proposals,
framework::Vector<uint64_t> mask(boxes_num * col_blocks); framework::Vector<uint64_t> mask(boxes_num * col_blocks);
NMSKernel<<<blocks, threads>>>(boxes_num, nms_threshold, boxes, NMSKernel<<<blocks, threads>>>(boxes_num, nms_threshold, boxes,
mask.CUDAMutableData(BOOST_GET_CONST( mask.CUDAMutableData(BOOST_GET_CONST(
platform::CUDAPlace, ctx.GetPlace()))); platform::CUDAPlace, ctx.GetPlace())),
pixel_offset);
std::vector<uint64_t> remv(col_blocks); std::vector<uint64_t> remv(col_blocks);
memset(&remv[0], 0, sizeof(uint64_t) * col_blocks); memset(&remv[0], 0, sizeof(uint64_t) * col_blocks);
......
...@@ -31,7 +31,7 @@ struct RangeInitFunctor { ...@@ -31,7 +31,7 @@ struct RangeInitFunctor {
}; };
template <typename T> template <typename T>
inline HOSTDEVICE T RoIArea(const T* box, bool normalized) { inline HOSTDEVICE T RoIArea(const T* box, bool pixel_offset = true) {
if (box[2] < box[0] || box[3] < box[1]) { if (box[2] < box[0] || box[3] < box[1]) {
// If coordinate values are is invalid // If coordinate values are is invalid
// (e.g. xmax < xmin or ymax < ymin), return 0. // (e.g. xmax < xmin or ymax < ymin), return 0.
...@@ -39,11 +39,11 @@ inline HOSTDEVICE T RoIArea(const T* box, bool normalized) { ...@@ -39,11 +39,11 @@ inline HOSTDEVICE T RoIArea(const T* box, bool normalized) {
} else { } else {
const T w = box[2] - box[0]; const T w = box[2] - box[0];
const T h = box[3] - box[1]; const T h = box[3] - box[1];
if (normalized) { if (pixel_offset) {
return w * h;
} else {
// If coordinate values are not within range [0, 1]. // If coordinate values are not within range [0, 1].
return (w + 1) * (h + 1); return (w + 1) * (h + 1);
} else {
return w * h;
} }
} }
} }
...@@ -157,10 +157,12 @@ template <class T> ...@@ -157,10 +157,12 @@ template <class T>
void ClipTiledBoxes(const platform::DeviceContext& ctx, void ClipTiledBoxes(const platform::DeviceContext& ctx,
const framework::Tensor& im_info, const framework::Tensor& im_info,
const framework::Tensor& input_boxes, const framework::Tensor& input_boxes,
framework::Tensor* out, bool is_scale = true) { framework::Tensor* out, bool is_scale = true,
bool pixel_offset = true) {
T* out_data = out->mutable_data<T>(ctx.GetPlace()); T* out_data = out->mutable_data<T>(ctx.GetPlace());
const T* im_info_data = im_info.data<T>(); const T* im_info_data = im_info.data<T>();
const T* input_boxes_data = input_boxes.data<T>(); const T* input_boxes_data = input_boxes.data<T>();
T offset = pixel_offset ? static_cast<T>(1.0) : 0;
T zero(0); T zero(0);
T im_w = T im_w =
is_scale ? round(im_info_data[1] / im_info_data[2]) : im_info_data[1]; is_scale ? round(im_info_data[1] / im_info_data[2]) : im_info_data[1];
...@@ -168,13 +170,17 @@ void ClipTiledBoxes(const platform::DeviceContext& ctx, ...@@ -168,13 +170,17 @@ void ClipTiledBoxes(const platform::DeviceContext& ctx,
is_scale ? round(im_info_data[0] / im_info_data[2]) : im_info_data[0]; is_scale ? round(im_info_data[0] / im_info_data[2]) : im_info_data[0];
for (int64_t i = 0; i < input_boxes.numel(); ++i) { for (int64_t i = 0; i < input_boxes.numel(); ++i) {
if (i % 4 == 0) { if (i % 4 == 0) {
out_data[i] = std::max(std::min(input_boxes_data[i], im_w - 1), zero); out_data[i] =
std::max(std::min(input_boxes_data[i], im_w - offset), zero);
} else if (i % 4 == 1) { } else if (i % 4 == 1) {
out_data[i] = std::max(std::min(input_boxes_data[i], im_h - 1), zero); out_data[i] =
std::max(std::min(input_boxes_data[i], im_h - offset), zero);
} else if (i % 4 == 2) { } else if (i % 4 == 2) {
out_data[i] = std::max(std::min(input_boxes_data[i], im_w - 1), zero); out_data[i] =
std::max(std::min(input_boxes_data[i], im_w - offset), zero);
} else { } else {
out_data[i] = std::max(std::min(input_boxes_data[i], im_h - 1), zero); out_data[i] =
std::max(std::min(input_boxes_data[i], im_h - offset), zero);
} }
} }
} }
...@@ -184,29 +190,35 @@ template <class T> ...@@ -184,29 +190,35 @@ template <class T>
void FilterBoxes(const platform::DeviceContext& ctx, void FilterBoxes(const platform::DeviceContext& ctx,
const framework::Tensor* boxes, float min_size, const framework::Tensor* boxes, float min_size,
const framework::Tensor& im_info, bool is_scale, const framework::Tensor& im_info, bool is_scale,
framework::Tensor* keep) { framework::Tensor* keep, bool pixel_offset = true) {
const T* im_info_data = im_info.data<T>(); const T* im_info_data = im_info.data<T>();
const T* boxes_data = boxes->data<T>(); const T* boxes_data = boxes->data<T>();
keep->Resize({boxes->dims()[0]}); keep->Resize({boxes->dims()[0]});
min_size = std::max(min_size, 1.0f); min_size = std::max(min_size, 1.0f);
int* keep_data = keep->mutable_data<int>(ctx.GetPlace()); int* keep_data = keep->mutable_data<int>(ctx.GetPlace());
T offset = pixel_offset ? static_cast<T>(1.0) : 0;
int keep_len = 0; int keep_len = 0;
for (int i = 0; i < boxes->dims()[0]; ++i) { for (int i = 0; i < boxes->dims()[0]; ++i) {
T ws = boxes_data[4 * i + 2] - boxes_data[4 * i] + 1; T ws = boxes_data[4 * i + 2] - boxes_data[4 * i] + offset;
T hs = boxes_data[4 * i + 3] - boxes_data[4 * i + 1] + 1; T hs = boxes_data[4 * i + 3] - boxes_data[4 * i + 1] + offset;
T x_ctr = boxes_data[4 * i] + ws / 2; if (pixel_offset) {
T y_ctr = boxes_data[4 * i + 1] + hs / 2; T x_ctr = boxes_data[4 * i] + ws / 2;
T y_ctr = boxes_data[4 * i + 1] + hs / 2;
if (is_scale) { if (is_scale) {
ws = (boxes_data[4 * i + 2] - boxes_data[4 * i]) / im_info_data[2] + 1; ws = (boxes_data[4 * i + 2] - boxes_data[4 * i]) / im_info_data[2] + 1;
hs = hs = (boxes_data[4 * i + 3] - boxes_data[4 * i + 1]) / im_info_data[2] +
(boxes_data[4 * i + 3] - boxes_data[4 * i + 1]) / im_info_data[2] + 1; 1;
} }
if (ws >= min_size && hs >= min_size && x_ctr <= im_info_data[1] &&
if (ws >= min_size && hs >= min_size && x_ctr <= im_info_data[1] && y_ctr <= im_info_data[0]) {
y_ctr <= im_info_data[0]) { keep_data[keep_len++] = i;
keep_data[keep_len++] = i; }
} else {
if (ws >= min_size && hs >= min_size) {
keep_data[keep_len++] = i;
}
} }
} }
keep->Resize({keep_len}); keep->Resize({keep_len});
...@@ -216,8 +228,8 @@ template <class T> ...@@ -216,8 +228,8 @@ template <class T>
static void BoxCoder(const platform::DeviceContext& ctx, static void BoxCoder(const platform::DeviceContext& ctx,
framework::Tensor* all_anchors, framework::Tensor* all_anchors,
framework::Tensor* bbox_deltas, framework::Tensor* bbox_deltas,
framework::Tensor* variances, framework::Tensor* variances, framework::Tensor* proposals,
framework::Tensor* proposals) { const bool pixel_offset = true) {
T* proposals_data = proposals->mutable_data<T>(ctx.GetPlace()); T* proposals_data = proposals->mutable_data<T>(ctx.GetPlace());
int64_t row = all_anchors->dims()[0]; int64_t row = all_anchors->dims()[0];
...@@ -230,9 +242,11 @@ static void BoxCoder(const platform::DeviceContext& ctx, ...@@ -230,9 +242,11 @@ static void BoxCoder(const platform::DeviceContext& ctx,
variances_data = variances->data<T>(); variances_data = variances->data<T>();
} }
T offset = pixel_offset ? static_cast<T>(1.0) : 0;
for (int64_t i = 0; i < row; ++i) { for (int64_t i = 0; i < row; ++i) {
T anchor_width = anchor_data[i * len + 2] - anchor_data[i * len] + 1.0; T anchor_width = anchor_data[i * len + 2] - anchor_data[i * len] + offset;
T anchor_height = anchor_data[i * len + 3] - anchor_data[i * len + 1] + 1.0; T anchor_height =
anchor_data[i * len + 3] - anchor_data[i * len + 1] + offset;
T anchor_center_x = anchor_data[i * len] + 0.5 * anchor_width; T anchor_center_x = anchor_data[i * len] + 0.5 * anchor_width;
T anchor_center_y = anchor_data[i * len + 1] + 0.5 * anchor_height; T anchor_center_y = anchor_data[i * len + 1] + 0.5 * anchor_height;
...@@ -270,8 +284,8 @@ static void BoxCoder(const platform::DeviceContext& ctx, ...@@ -270,8 +284,8 @@ static void BoxCoder(const platform::DeviceContext& ctx,
proposals_data[i * len] = bbox_center_x - bbox_width / 2; proposals_data[i * len] = bbox_center_x - bbox_width / 2;
proposals_data[i * len + 1] = bbox_center_y - bbox_height / 2; proposals_data[i * len + 1] = bbox_center_y - bbox_height / 2;
proposals_data[i * len + 2] = bbox_center_x + bbox_width / 2 - 1; proposals_data[i * len + 2] = bbox_center_x + bbox_width / 2 - offset;
proposals_data[i * len + 3] = bbox_center_y + bbox_height / 2 - 1; proposals_data[i * len + 3] = bbox_center_y + bbox_height / 2 - offset;
} }
// return proposals; // return proposals;
} }
......
...@@ -103,6 +103,9 @@ class DistributeFpnProposalsOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -103,6 +103,9 @@ class DistributeFpnProposalsOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<int>("refer_scale", AddAttr<int>("refer_scale",
"The referring scale of FPN layer with" "The referring scale of FPN layer with"
" specified level"); " specified level");
AddAttr<bool>("pixel_offset", "(bool, default True),",
"If true, im_shape pixel offset is 1.")
.SetDefault(true);
AddComment(R"DOC( AddComment(R"DOC(
This operator distribute all proposals into different fpn level, This operator distribute all proposals into different fpn level,
with respect to scale of the proposals, the referring scale and with respect to scale of the proposals, the referring scale and
...@@ -134,4 +137,8 @@ REGISTER_OP_VERSION(distribute_fpn_proposals) ...@@ -134,4 +137,8 @@ REGISTER_OP_VERSION(distribute_fpn_proposals)
.NewOutput("MultiLevelRoisNum", .NewOutput("MultiLevelRoisNum",
"The RoIs' number of each image on multiple " "The RoIs' number of each image on multiple "
"levels. The number on each level has the shape of (B)," "levels. The number on each level has the shape of (B),"
"B is the number of images.")); "B is the number of images."))
.AddCheckpoint(
R"ROC(Register distribute_fpn_proposals for adding the attribute of pixel_offset)ROC",
paddle::framework::compatible::OpVersionDesc().NewAttr(
"pixel_offset", "If true, im_shape pixel offset is 1.", true));
...@@ -43,15 +43,15 @@ __global__ void GPUDistFpnProposalsHelper( ...@@ -43,15 +43,15 @@ __global__ void GPUDistFpnProposalsHelper(
const int nthreads, const T* rois, const int lod_size, const int nthreads, const T* rois, const int lod_size,
const int refer_level, const int refer_scale, const int max_level, const int refer_level, const int refer_scale, const int max_level,
const int min_level, int* roi_batch_id_data, int* sub_lod_list, const int min_level, int* roi_batch_id_data, int* sub_lod_list,
int* target_lvls) { int* target_lvls, bool pixel_offset = true) {
CUDA_KERNEL_LOOP(i, nthreads) { CUDA_KERNEL_LOOP(i, nthreads) {
const T* offset_roi = rois + i * BBoxSize; const T* offset_roi = rois + i * BBoxSize;
int roi_batch_ind = roi_batch_id_data[i]; int roi_batch_ind = roi_batch_id_data[i];
// get the target level of current rois // get the target level of current rois
T roi_area = RoIArea(offset_roi, false); T roi_area = RoIArea(offset_roi, pixel_offset);
T roi_scale = sqrt(roi_area); T roi_scale = sqrt(roi_area);
int tgt_lvl = floor( int tgt_lvl = floor(
log2(roi_scale / static_cast<T>(refer_scale) + (T)1e-6) + refer_level); log2(roi_scale / static_cast<T>(refer_scale) + (T)1e-8) + refer_level);
tgt_lvl = min(max_level, max(tgt_lvl, min_level)); tgt_lvl = min(max_level, max(tgt_lvl, min_level));
target_lvls[i] = tgt_lvl; target_lvls[i] = tgt_lvl;
// compute number of rois in the same batch and same target level // compute number of rois in the same batch and same target level
...@@ -73,6 +73,7 @@ class GPUDistributeFpnProposalsOpKernel : public framework::OpKernel<T> { ...@@ -73,6 +73,7 @@ class GPUDistributeFpnProposalsOpKernel : public framework::OpKernel<T> {
const int max_level = ctx.Attr<int>("max_level"); const int max_level = ctx.Attr<int>("max_level");
const int refer_level = ctx.Attr<int>("refer_level"); const int refer_level = ctx.Attr<int>("refer_level");
const int refer_scale = ctx.Attr<int>("refer_scale"); const int refer_scale = ctx.Attr<int>("refer_scale");
const bool pixel_offset = ctx.Attr<bool>("pixel_offset");
int num_level = max_level - min_level + 1; int num_level = max_level - min_level + 1;
// check that the fpn_rois is not empty // check that the fpn_rois is not empty
...@@ -126,7 +127,7 @@ class GPUDistributeFpnProposalsOpKernel : public framework::OpKernel<T> { ...@@ -126,7 +127,7 @@ class GPUDistributeFpnProposalsOpKernel : public framework::OpKernel<T> {
GPUDistFpnProposalsHelper<T><<<dist_blocks, threads>>>( GPUDistFpnProposalsHelper<T><<<dist_blocks, threads>>>(
roi_num, fpn_rois->data<T>(), lod_size, refer_level, refer_scale, roi_num, fpn_rois->data<T>(), lod_size, refer_level, refer_scale,
max_level, min_level, roi_batch_id_list_gpu.data<int>(), max_level, min_level, roi_batch_id_list_gpu.data<int>(),
sub_lod_list_data, target_lvls_data); sub_lod_list_data, target_lvls_data, pixel_offset);
dev_ctx.Wait(); dev_ctx.Wait();
auto place = BOOST_GET_CONST(platform::CUDAPlace, dev_ctx.GetPlace()); auto place = BOOST_GET_CONST(platform::CUDAPlace, dev_ctx.GetPlace());
......
...@@ -44,7 +44,7 @@ inline std::vector<size_t> GetLodFromRoisNum(const Tensor* rois_num) { ...@@ -44,7 +44,7 @@ inline std::vector<size_t> GetLodFromRoisNum(const Tensor* rois_num) {
} }
template <typename T> template <typename T>
static inline T BBoxArea(const T* box, bool normalized) { static inline T BBoxArea(const T* box, bool pixel_offset) {
if (box[2] < box[0] || box[3] < box[1]) { if (box[2] < box[0] || box[3] < box[1]) {
// If coordinate values are is invalid // If coordinate values are is invalid
// (e.g. xmax < xmin or ymax < ymin), return 0. // (e.g. xmax < xmin or ymax < ymin), return 0.
...@@ -52,11 +52,11 @@ static inline T BBoxArea(const T* box, bool normalized) { ...@@ -52,11 +52,11 @@ static inline T BBoxArea(const T* box, bool normalized) {
} else { } else {
const T w = box[2] - box[0]; const T w = box[2] - box[0];
const T h = box[3] - box[1]; const T h = box[3] - box[1];
if (normalized) { if (pixel_offset) {
return w * h;
} else {
// If coordinate values are not within range [0, 1]. // If coordinate values are not within range [0, 1].
return (w + 1) * (h + 1); return (w + 1) * (h + 1);
} else {
return w * h;
} }
} }
} }
...@@ -77,6 +77,7 @@ class DistributeFpnProposalsOpKernel : public framework::OpKernel<T> { ...@@ -77,6 +77,7 @@ class DistributeFpnProposalsOpKernel : public framework::OpKernel<T> {
const int max_level = context.Attr<int>("max_level"); const int max_level = context.Attr<int>("max_level");
const int refer_level = context.Attr<int>("refer_level"); const int refer_level = context.Attr<int>("refer_level");
const int refer_scale = context.Attr<int>("refer_scale"); const int refer_scale = context.Attr<int>("refer_scale");
const bool pixel_offset = context.Attr<bool>("pixel_offset");
const int num_level = max_level - min_level + 1; const int num_level = max_level - min_level + 1;
// check that the fpn_rois is not empty // check that the fpn_rois is not empty
...@@ -108,7 +109,7 @@ class DistributeFpnProposalsOpKernel : public framework::OpKernel<T> { ...@@ -108,7 +109,7 @@ class DistributeFpnProposalsOpKernel : public framework::OpKernel<T> {
const T* rois_data = fpn_rois_slice.data<T>(); const T* rois_data = fpn_rois_slice.data<T>();
for (int j = 0; j < fpn_rois_slice.dims()[0]; ++j) { for (int j = 0; j < fpn_rois_slice.dims()[0]; ++j) {
// get the target level of current rois // get the target level of current rois
T roi_scale = std::sqrt(BBoxArea(rois_data, false)); T roi_scale = std::sqrt(BBoxArea(rois_data, pixel_offset));
int tgt_lvl = std::floor(std::log2(roi_scale / refer_scale + (T)1e-6) + int tgt_lvl = std::floor(std::log2(roi_scale / refer_scale + (T)1e-6) +
refer_level); refer_level);
tgt_lvl = std::min(max_level, std::max(tgt_lvl, min_level)); tgt_lvl = std::min(max_level, std::max(tgt_lvl, min_level));
......
...@@ -87,6 +87,7 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> { ...@@ -87,6 +87,7 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> {
float nms_thresh = context.Attr<float>("nms_thresh"); float nms_thresh = context.Attr<float>("nms_thresh");
float min_size = context.Attr<float>("min_size"); float min_size = context.Attr<float>("min_size");
float eta = context.Attr<float>("eta"); float eta = context.Attr<float>("eta");
bool pixel_offset = context.Attr<bool>("pixel_offset");
auto &dev_ctx = auto &dev_ctx =
context.template device_context<platform::CPUDeviceContext>(); context.template device_context<platform::CPUDeviceContext>();
...@@ -134,10 +135,10 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> { ...@@ -134,10 +135,10 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> {
bbox_deltas_slice.Resize({h_bbox * w_bbox * c_bbox / 4, 4}); bbox_deltas_slice.Resize({h_bbox * w_bbox * c_bbox / 4, 4});
scores_slice.Resize({h_score * w_score * c_score, 1}); scores_slice.Resize({h_score * w_score * c_score, 1});
std::pair<Tensor, Tensor> tensor_pair = std::pair<Tensor, Tensor> tensor_pair = ProposalForOneImage(
ProposalForOneImage(dev_ctx, im_shape_slice, anchors, variances, dev_ctx, im_shape_slice, anchors, variances, bbox_deltas_slice,
bbox_deltas_slice, scores_slice, pre_nms_top_n, scores_slice, pre_nms_top_n, post_nms_top_n, nms_thresh, min_size,
post_nms_top_n, nms_thresh, min_size, eta); eta, pixel_offset);
Tensor &proposals = tensor_pair.first; Tensor &proposals = tensor_pair.first;
Tensor &scores = tensor_pair.second; Tensor &scores = tensor_pair.second;
...@@ -168,7 +169,7 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> { ...@@ -168,7 +169,7 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> {
const Tensor &bbox_deltas_slice, // [M, 4] const Tensor &bbox_deltas_slice, // [M, 4]
const Tensor &scores_slice, // [N, 1] const Tensor &scores_slice, // [N, 1]
int pre_nms_top_n, int post_nms_top_n, float nms_thresh, float min_size, int pre_nms_top_n, int post_nms_top_n, float nms_thresh, float min_size,
float eta) const { float eta, bool pixel_offset = true) const {
auto *scores_data = scores_slice.data<T>(); auto *scores_data = scores_slice.data<T>();
// Sort index // Sort index
...@@ -203,12 +204,15 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> { ...@@ -203,12 +204,15 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> {
Tensor proposals; Tensor proposals;
proposals.mutable_data<T>({index_t.numel(), 4}, ctx.GetPlace()); proposals.mutable_data<T>({index_t.numel(), 4}, ctx.GetPlace());
BoxCoder<T>(ctx, &anchor_sel, &bbox_sel, &var_sel, &proposals); BoxCoder<T>(ctx, &anchor_sel, &bbox_sel, &var_sel, &proposals,
pixel_offset);
ClipTiledBoxes<T>(ctx, im_shape_slice, proposals, &proposals, false); ClipTiledBoxes<T>(ctx, im_shape_slice, proposals, &proposals, false,
pixel_offset);
Tensor keep; Tensor keep;
FilterBoxes<T>(ctx, &proposals, min_size, im_shape_slice, false, &keep); FilterBoxes<T>(ctx, &proposals, min_size, im_shape_slice, false, &keep,
pixel_offset);
// Handle the case when there is no keep index left // Handle the case when there is no keep index left
if (keep.numel() == 0) { if (keep.numel() == 0) {
math::SetConstant<platform::CPUDeviceContext, T> set_zero; math::SetConstant<platform::CPUDeviceContext, T> set_zero;
...@@ -229,7 +233,8 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> { ...@@ -229,7 +233,8 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> {
return std::make_pair(bbox_sel, scores_filter); return std::make_pair(bbox_sel, scores_filter);
} }
Tensor keep_nms = NMS<T>(ctx, &bbox_sel, &scores_filter, nms_thresh, eta); Tensor keep_nms =
NMS<T>(ctx, &bbox_sel, &scores_filter, nms_thresh, eta, pixel_offset);
if (post_nms_top_n > 0 && post_nms_top_n < keep_nms.numel()) { if (post_nms_top_n > 0 && post_nms_top_n < keep_nms.numel()) {
keep_nms.Resize({post_nms_top_n}); keep_nms.Resize({post_nms_top_n});
...@@ -280,6 +285,9 @@ class GenerateProposalsV2OpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -280,6 +285,9 @@ class GenerateProposalsV2OpMaker : public framework::OpProtoAndCheckerMaker {
"Proposal height and width both need to be greater " "Proposal height and width both need to be greater "
"than this min_size."); "than this min_size.");
AddAttr<float>("eta", "The parameter for adaptive NMS."); AddAttr<float>("eta", "The parameter for adaptive NMS.");
AddAttr<bool>("pixel_offset", "(bool, default True),",
"If true, im_shape pixel offset is 1.")
.SetDefault(true);
AddComment(R"DOC( AddComment(R"DOC(
This operator is the second version of generate_proposals op to generate This operator is the second version of generate_proposals op to generate
bounding box proposals for Faster RCNN. bounding box proposals for Faster RCNN.
...@@ -312,3 +320,8 @@ REGISTER_OPERATOR( ...@@ -312,3 +320,8 @@ REGISTER_OPERATOR(
REGISTER_OP_CPU_KERNEL(generate_proposals_v2, REGISTER_OP_CPU_KERNEL(generate_proposals_v2,
ops::GenerateProposalsV2Kernel<float>, ops::GenerateProposalsV2Kernel<float>,
ops::GenerateProposalsV2Kernel<double>); ops::GenerateProposalsV2Kernel<double>);
REGISTER_OP_VERSION(generate_proposals_v2)
.AddCheckpoint(
R"ROC(Registe generate_proposals_v2 for adding the attribute of pixel_offset)ROC",
paddle::framework::compatible::OpVersionDesc().NewAttr(
"pixel_offset", "If true, im_shape pixel offset is 1.", true));
...@@ -36,7 +36,7 @@ static std::pair<Tensor, Tensor> ProposalForOneImage( ...@@ -36,7 +36,7 @@ static std::pair<Tensor, Tensor> ProposalForOneImage(
const Tensor &bbox_deltas, // [M, 4] const Tensor &bbox_deltas, // [M, 4]
const Tensor &scores, // [N, 1] const Tensor &scores, // [N, 1]
int pre_nms_top_n, int post_nms_top_n, float nms_thresh, float min_size, int pre_nms_top_n, int post_nms_top_n, float nms_thresh, float min_size,
float eta) { float eta, bool pixel_offset) {
// 1. pre nms // 1. pre nms
Tensor scores_sort, index_sort; Tensor scores_sort, index_sort;
SortDescending<T>(ctx, scores, &scores_sort, &index_sort); SortDescending<T>(ctx, scores, &scores_sort, &index_sort);
...@@ -54,7 +54,8 @@ static std::pair<Tensor, Tensor> ProposalForOneImage( ...@@ -54,7 +54,8 @@ static std::pair<Tensor, Tensor> ProposalForOneImage(
platform::ForRange<platform::CUDADeviceContext> for_range(ctx, pre_nms_num); platform::ForRange<platform::CUDADeviceContext> for_range(ctx, pre_nms_num);
for_range(BoxDecodeAndClipFunctor<T>{ for_range(BoxDecodeAndClipFunctor<T>{
anchors.data<T>(), bbox_deltas.data<T>(), variances.data<T>(), anchors.data<T>(), bbox_deltas.data<T>(), variances.data<T>(),
index_sort.data<int>(), im_shape.data<T>(), proposals.data<T>()}); index_sort.data<int>(), im_shape.data<T>(), proposals.data<T>(),
pixel_offset});
} }
// 3. filter // 3. filter
...@@ -65,7 +66,7 @@ static std::pair<Tensor, Tensor> ProposalForOneImage( ...@@ -65,7 +66,7 @@ static std::pair<Tensor, Tensor> ProposalForOneImage(
auto stream = ctx.stream(); auto stream = ctx.stream();
FilterBBoxes<T, 512><<<1, 512, 0, stream>>>( FilterBBoxes<T, 512><<<1, 512, 0, stream>>>(
proposals.data<T>(), im_shape.data<T>(), min_size, pre_nms_num, proposals.data<T>(), im_shape.data<T>(), min_size, pre_nms_num,
keep_num_t.data<int>(), keep_index.data<int>(), false); keep_num_t.data<int>(), keep_index.data<int>(), false, pixel_offset);
int keep_num; int keep_num;
const auto gpu_place = BOOST_GET_CONST(platform::CUDAPlace, ctx.GetPlace()); const auto gpu_place = BOOST_GET_CONST(platform::CUDAPlace, ctx.GetPlace());
memory::Copy(platform::CPUPlace(), &keep_num, gpu_place, memory::Copy(platform::CPUPlace(), &keep_num, gpu_place,
...@@ -94,7 +95,8 @@ static std::pair<Tensor, Tensor> ProposalForOneImage( ...@@ -94,7 +95,8 @@ static std::pair<Tensor, Tensor> ProposalForOneImage(
// 4. nms // 4. nms
Tensor keep_nms; Tensor keep_nms;
NMS<T>(ctx, proposals_filter, keep_index, nms_thresh, &keep_nms); NMS<T>(ctx, proposals_filter, keep_index, nms_thresh, &keep_nms,
pixel_offset);
if (post_nms_top_n > 0 && post_nms_top_n < keep_nms.numel()) { if (post_nms_top_n > 0 && post_nms_top_n < keep_nms.numel()) {
keep_nms.Resize({post_nms_top_n}); keep_nms.Resize({post_nms_top_n});
} }
...@@ -129,6 +131,7 @@ class CUDAGenerateProposalsV2Kernel : public framework::OpKernel<T> { ...@@ -129,6 +131,7 @@ class CUDAGenerateProposalsV2Kernel : public framework::OpKernel<T> {
float nms_thresh = context.Attr<float>("nms_thresh"); float nms_thresh = context.Attr<float>("nms_thresh");
float min_size = context.Attr<float>("min_size"); float min_size = context.Attr<float>("min_size");
float eta = context.Attr<float>("eta"); float eta = context.Attr<float>("eta");
bool pixel_offset = context.Attr<bool>("pixel_offset");
PADDLE_ENFORCE_GE(eta, 1., PADDLE_ENFORCE_GE(eta, 1.,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"Not support adaptive NMS. The attribute 'eta' " "Not support adaptive NMS. The attribute 'eta' "
...@@ -184,10 +187,10 @@ class CUDAGenerateProposalsV2Kernel : public framework::OpKernel<T> { ...@@ -184,10 +187,10 @@ class CUDAGenerateProposalsV2Kernel : public framework::OpKernel<T> {
bbox_deltas_slice.Resize({h_bbox * w_bbox * c_bbox / 4, 4}); bbox_deltas_slice.Resize({h_bbox * w_bbox * c_bbox / 4, 4});
scores_slice.Resize({h_score * w_score * c_score, 1}); scores_slice.Resize({h_score * w_score * c_score, 1});
std::pair<Tensor, Tensor> box_score_pair = std::pair<Tensor, Tensor> box_score_pair = ProposalForOneImage<T>(
ProposalForOneImage<T>(dev_ctx, im_shape_slice, anchors, variances, dev_ctx, im_shape_slice, anchors, variances, bbox_deltas_slice,
bbox_deltas_slice, scores_slice, pre_nms_top_n, scores_slice, pre_nms_top_n, post_nms_top_n, nms_thresh, min_size,
post_nms_top_n, nms_thresh, min_size, eta); eta, pixel_offset);
Tensor &proposals = box_score_pair.first; Tensor &proposals = box_score_pair.first;
Tensor &scores = box_score_pair.second; Tensor &scores = box_score_pair.second;
......
...@@ -130,7 +130,7 @@ static inline framework::Tensor VectorToTensor( ...@@ -130,7 +130,7 @@ static inline framework::Tensor VectorToTensor(
template <class T> template <class T>
framework::Tensor NMS(const platform::DeviceContext& ctx, framework::Tensor NMS(const platform::DeviceContext& ctx,
framework::Tensor* bbox, framework::Tensor* scores, framework::Tensor* bbox, framework::Tensor* scores,
T nms_threshold, float eta) { T nms_threshold, float eta, bool pixel_offset = true) {
int64_t num_boxes = bbox->dims()[0]; int64_t num_boxes = bbox->dims()[0];
// 4: [xmin ymin xmax ymax] // 4: [xmin ymin xmax ymax]
int64_t box_size = bbox->dims()[1]; int64_t box_size = bbox->dims()[1];
...@@ -144,13 +144,15 @@ framework::Tensor NMS(const platform::DeviceContext& ctx, ...@@ -144,13 +144,15 @@ framework::Tensor NMS(const platform::DeviceContext& ctx,
int selected_num = 0; int selected_num = 0;
T adaptive_threshold = nms_threshold; T adaptive_threshold = nms_threshold;
const T* bbox_data = bbox->data<T>(); const T* bbox_data = bbox->data<T>();
bool normalized = pixel_offset ? false : true;
while (sorted_indices.size() != 0) { while (sorted_indices.size() != 0) {
int idx = sorted_indices.back().second; int idx = sorted_indices.back().second;
bool flag = true; bool flag = true;
for (int kept_idx : selected_indices) { for (int kept_idx : selected_indices) {
if (flag) { if (flag) {
T overlap = JaccardOverlap<T>(bbox_data + idx * box_size, T overlap =
bbox_data + kept_idx * box_size, false); JaccardOverlap<T>(bbox_data + idx * box_size,
bbox_data + kept_idx * box_size, normalized);
flag = (overlap <= adaptive_threshold); flag = (overlap <= adaptive_threshold);
} else { } else {
break; break;
......
...@@ -175,6 +175,10 @@ class ROIAlignOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -175,6 +175,10 @@ class ROIAlignOpMaker : public framework::OpProtoAndCheckerMaker {
"If <=0, then grid points are adaptive to roi_width " "If <=0, then grid points are adaptive to roi_width "
"and pooled_w, likewise for height") "and pooled_w, likewise for height")
.SetDefault(-1); .SetDefault(-1);
AddAttr<bool>("aligned",
"(bool, default False),"
"If true, pixel shift it by -0.5 for align more perfectly")
.SetDefault(false);
AddComment(R"DOC( AddComment(R"DOC(
**RoIAlign Operator** **RoIAlign Operator**
...@@ -242,7 +246,14 @@ REGISTER_OP_VERSION(roi_align) ...@@ -242,7 +246,14 @@ REGISTER_OP_VERSION(roi_align)
"it is not used in object detection models yet.")) "it is not used in object detection models yet."))
.AddCheckpoint( .AddCheckpoint(
R"ROC( R"ROC(
Upgrade roi_align add a new input [RoisNum])ROC", Upgrade roi_align add a new input [RoisNum])ROC",
paddle::framework::compatible::OpVersionDesc().NewInput( paddle::framework::compatible::OpVersionDesc().NewInput(
"RoisNum", "RoisNum",
"The number of RoIs in each image. RoisNum is dispensable.")); "The number of RoIs in each image. RoisNum is dispensable."))
.AddCheckpoint(
R"ROC(
Upgrade roi_align add a new input [aligned])ROC",
paddle::framework::compatible::OpVersionDesc().NewAttr(
"aligned",
"If true, pixel shift it by -0.5 for align more perfectly.",
false));
...@@ -105,7 +105,8 @@ __global__ void GPUROIAlignForward( ...@@ -105,7 +105,8 @@ __global__ void GPUROIAlignForward(
const int nthreads, const T* input_data, const T* input_rois, const int nthreads, const T* input_data, const T* input_rois,
const float spatial_scale, const int channels, const int height, const float spatial_scale, const int channels, const int height,
const int width, const int pooled_height, const int pooled_width, const int width, const int pooled_height, const int pooled_width,
const int sampling_ratio, int* roi_batch_id_data, T* output_data) { const int sampling_ratio, int* roi_batch_id_data, T* output_data,
const bool continuous_coordinate) {
CUDA_KERNEL_LOOP(i, nthreads) { CUDA_KERNEL_LOOP(i, nthreads) {
int pw = i % pooled_width; int pw = i % pooled_width;
int ph = (i / pooled_width) % pooled_height; int ph = (i / pooled_width) % pooled_height;
...@@ -115,13 +116,19 @@ __global__ void GPUROIAlignForward( ...@@ -115,13 +116,19 @@ __global__ void GPUROIAlignForward(
const T* offset_input_rois = input_rois + n * kROISize; const T* offset_input_rois = input_rois + n * kROISize;
int roi_batch_ind = roi_batch_id_data[n]; int roi_batch_ind = roi_batch_id_data[n];
T roi_xmin = offset_input_rois[0] * spatial_scale; T roi_offset = continuous_coordinate ? static_cast<T>(0.5) : 0;
T roi_ymin = offset_input_rois[1] * spatial_scale; T roi_xmin = offset_input_rois[0] * spatial_scale - roi_offset;
T roi_xmax = offset_input_rois[2] * spatial_scale; T roi_ymin = offset_input_rois[1] * spatial_scale - roi_offset;
T roi_ymax = offset_input_rois[3] * spatial_scale; T roi_xmax = offset_input_rois[2] * spatial_scale - roi_offset;
T roi_ymax = offset_input_rois[3] * spatial_scale - roi_offset;
T roi_width = max(roi_xmax - roi_xmin, static_cast<T>(1.)); T roi_width = roi_xmax - roi_xmin;
T roi_height = max(roi_ymax - roi_ymin, static_cast<T>(1.)); T roi_height = roi_ymax - roi_ymin;
if (!continuous_coordinate) {
roi_width = max(roi_width, static_cast<T>(1.));
roi_height = max(roi_height, static_cast<T>(1.));
}
T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height); T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width); T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
...@@ -153,14 +160,12 @@ __global__ void GPUROIAlignForward( ...@@ -153,14 +160,12 @@ __global__ void GPUROIAlignForward(
} }
template <typename T> template <typename T>
__global__ void GPUROIAlignBackward(const int nthreads, const T* input_rois, __global__ void GPUROIAlignBackward(
const T* out_grad, const int num_rois, const int nthreads, const T* input_rois, const T* out_grad,
const float spatial_scale, const int num_rois, const float spatial_scale, const int channels,
const int channels, const int height, const int height, const int width, const int pooled_height,
const int width, const int pooled_height, const int pooled_width, const int sampling_ratio, int* roi_batch_id_data,
const int pooled_width, T* input_grad, const bool continuous_coordinate) {
const int sampling_ratio,
int* roi_batch_id_data, T* input_grad) {
CUDA_KERNEL_LOOP(i, nthreads) { CUDA_KERNEL_LOOP(i, nthreads) {
int pw = i % pooled_width; int pw = i % pooled_width;
int ph = (i / pooled_width) % pooled_height; int ph = (i / pooled_width) % pooled_height;
...@@ -169,13 +174,18 @@ __global__ void GPUROIAlignBackward(const int nthreads, const T* input_rois, ...@@ -169,13 +174,18 @@ __global__ void GPUROIAlignBackward(const int nthreads, const T* input_rois,
const T* offset_input_rois = input_rois + n * kROISize; const T* offset_input_rois = input_rois + n * kROISize;
int roi_batch_ind = roi_batch_id_data[n]; int roi_batch_ind = roi_batch_id_data[n];
T roi_xmin = offset_input_rois[0] * spatial_scale; T roi_offset = continuous_coordinate ? T(0.5) : 0;
T roi_ymin = offset_input_rois[1] * spatial_scale; T roi_xmin = offset_input_rois[0] * spatial_scale - roi_offset;
T roi_xmax = offset_input_rois[2] * spatial_scale; T roi_ymin = offset_input_rois[1] * spatial_scale - roi_offset;
T roi_ymax = offset_input_rois[3] * spatial_scale; T roi_xmax = offset_input_rois[2] * spatial_scale - roi_offset;
T roi_ymax = offset_input_rois[3] * spatial_scale - roi_offset;
T roi_width = max(roi_xmax - roi_xmin, static_cast<T>(1.));
T roi_height = max(roi_ymax - roi_ymin, static_cast<T>(1.)); T roi_width = roi_xmax - roi_xmin;
T roi_height = roi_ymax - roi_ymin;
if (!continuous_coordinate) {
roi_width = max(roi_width, static_cast<T>(1.));
roi_height = max(roi_height, static_cast<T>(1.));
}
T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height); T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width); T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
...@@ -236,6 +246,7 @@ class GPUROIAlignOpKernel : public framework::OpKernel<T> { ...@@ -236,6 +246,7 @@ class GPUROIAlignOpKernel : public framework::OpKernel<T> {
auto pooled_width = ctx.Attr<int>("pooled_width"); auto pooled_width = ctx.Attr<int>("pooled_width");
auto spatial_scale = ctx.Attr<float>("spatial_scale"); auto spatial_scale = ctx.Attr<float>("spatial_scale");
auto sampling_ratio = ctx.Attr<int>("sampling_ratio"); auto sampling_ratio = ctx.Attr<int>("sampling_ratio");
auto aligned = ctx.Attr<bool>("aligned");
auto in_dims = in->dims(); auto in_dims = in->dims();
int batch_size = in_dims[0]; int batch_size = in_dims[0];
...@@ -316,7 +327,7 @@ class GPUROIAlignOpKernel : public framework::OpKernel<T> { ...@@ -316,7 +327,7 @@ class GPUROIAlignOpKernel : public framework::OpKernel<T> {
GPUROIAlignForward<T><<<blocks, threads, 0, dev_ctx.stream()>>>( GPUROIAlignForward<T><<<blocks, threads, 0, dev_ctx.stream()>>>(
output_size, in->data<T>(), rois->data<T>(), spatial_scale, channels, output_size, in->data<T>(), rois->data<T>(), spatial_scale, channels,
height, width, pooled_height, pooled_width, sampling_ratio, roi_id_data, height, width, pooled_height, pooled_width, sampling_ratio, roi_id_data,
out->mutable_data<T>(ctx.GetPlace())); out->mutable_data<T>(ctx.GetPlace()), aligned);
} }
}; };
...@@ -334,6 +345,7 @@ class GPUROIAlignGradOpKernel : public framework::OpKernel<T> { ...@@ -334,6 +345,7 @@ class GPUROIAlignGradOpKernel : public framework::OpKernel<T> {
auto pooled_width = ctx.Attr<int>("pooled_width"); auto pooled_width = ctx.Attr<int>("pooled_width");
auto spatial_scale = ctx.Attr<float>("spatial_scale"); auto spatial_scale = ctx.Attr<float>("spatial_scale");
auto sampling_ratio = ctx.Attr<int>("sampling_ratio"); auto sampling_ratio = ctx.Attr<int>("sampling_ratio");
auto aligned = ctx.Attr<bool>("aligned");
int rois_num = rois->dims()[0]; int rois_num = rois->dims()[0];
int channels = in->dims()[1]; int channels = in->dims()[1];
...@@ -390,8 +402,8 @@ class GPUROIAlignGradOpKernel : public framework::OpKernel<T> { ...@@ -390,8 +402,8 @@ class GPUROIAlignGradOpKernel : public framework::OpKernel<T> {
GPUROIAlignBackward<T><<<blocks, threads, 0, dev_ctx.stream()>>>( GPUROIAlignBackward<T><<<blocks, threads, 0, dev_ctx.stream()>>>(
output_grad_size, rois->data<T>(), out_grad->data<T>(), rois_num, output_grad_size, rois->data<T>(), out_grad->data<T>(), rois_num,
spatial_scale, channels, height, width, pooled_height, pooled_width, spatial_scale, channels, height, width, pooled_height, pooled_width,
sampling_ratio, roi_id_data, sampling_ratio, roi_id_data, in_grad->mutable_data<T>(ctx.GetPlace()),
in_grad->mutable_data<T>(ctx.GetPlace())); aligned);
} }
} }
}; };
......
...@@ -145,6 +145,7 @@ class CPUROIAlignOpKernel : public framework::OpKernel<T> { ...@@ -145,6 +145,7 @@ class CPUROIAlignOpKernel : public framework::OpKernel<T> {
auto pooled_width = ctx.Attr<int>("pooled_width"); auto pooled_width = ctx.Attr<int>("pooled_width");
auto spatial_scale = ctx.Attr<float>("spatial_scale"); auto spatial_scale = ctx.Attr<float>("spatial_scale");
auto sampling_ratio = ctx.Attr<int>("sampling_ratio"); auto sampling_ratio = ctx.Attr<int>("sampling_ratio");
auto aligned = ctx.Attr<bool>("aligned");
auto& dev_ctx = ctx.template device_context<DeviceContext>(); auto& dev_ctx = ctx.template device_context<DeviceContext>();
...@@ -215,15 +216,21 @@ class CPUROIAlignOpKernel : public framework::OpKernel<T> { ...@@ -215,15 +216,21 @@ class CPUROIAlignOpKernel : public framework::OpKernel<T> {
} }
T* output_data = out->mutable_data<T>(ctx.GetPlace()); T* output_data = out->mutable_data<T>(ctx.GetPlace());
const T* rois_data = rois->data<T>(); const T* rois_data = rois->data<T>();
T roi_offset = aligned ? T(0.5) : 0;
for (int n = 0; n < rois_num; ++n) { for (int n = 0; n < rois_num; ++n) {
int roi_batch_id = roi_batch_id_data[n]; int roi_batch_id = roi_batch_id_data[n];
T roi_xmin = rois_data[0] * spatial_scale; T roi_xmin = rois_data[0] * spatial_scale - roi_offset;
T roi_ymin = rois_data[1] * spatial_scale; T roi_ymin = rois_data[1] * spatial_scale - roi_offset;
T roi_xmax = rois_data[2] * spatial_scale; T roi_xmax = rois_data[2] * spatial_scale - roi_offset;
T roi_ymax = rois_data[3] * spatial_scale; T roi_ymax = rois_data[3] * spatial_scale - roi_offset;
T roi_width = roi_xmax - roi_xmin;
T roi_height = roi_ymax - roi_ymin;
if (!aligned) {
roi_width = std::max(roi_width, static_cast<T>(1.));
roi_height = std::max(roi_height, static_cast<T>(1.));
}
T roi_width = std::max(roi_xmax - roi_xmin, static_cast<T>(1.));
T roi_height = std::max(roi_ymax - roi_ymin, static_cast<T>(1.));
T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height); T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width); T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
const T* batch_data = input_data + roi_batch_id * in_stride[0]; const T* batch_data = input_data + roi_batch_id * in_stride[0];
...@@ -290,6 +297,7 @@ class CPUROIAlignGradOpKernel : public framework::OpKernel<T> { ...@@ -290,6 +297,7 @@ class CPUROIAlignGradOpKernel : public framework::OpKernel<T> {
auto spatial_scale = ctx.Attr<float>("spatial_scale"); auto spatial_scale = ctx.Attr<float>("spatial_scale");
auto sampling_ratio = ctx.Attr<int>("sampling_ratio"); auto sampling_ratio = ctx.Attr<int>("sampling_ratio");
auto in_dims = in->dims(); auto in_dims = in->dims();
auto aligned = ctx.Attr<bool>("aligned");
int channels = in_dims[1]; int channels = in_dims[1];
int height = in_dims[2]; int height = in_dims[2];
...@@ -344,14 +352,21 @@ class CPUROIAlignGradOpKernel : public framework::OpKernel<T> { ...@@ -344,14 +352,21 @@ class CPUROIAlignGradOpKernel : public framework::OpKernel<T> {
auto roi_stride = framework::stride(rois->dims()); auto roi_stride = framework::stride(rois->dims());
auto out_stride = framework::stride(out_grad->dims()); auto out_stride = framework::stride(out_grad->dims());
T roi_offset = aligned ? T(0.5) : 0;
for (int n = 0; n < rois_num; ++n) { for (int n = 0; n < rois_num; ++n) {
int roi_batch_idx = roi_batch_id_data[n]; int roi_batch_idx = roi_batch_id_data[n];
T roi_xmin = rois_data[0] * spatial_scale; T roi_xmin = rois_data[0] * spatial_scale - roi_offset;
T roi_ymin = rois_data[1] * spatial_scale; T roi_ymin = rois_data[1] * spatial_scale - roi_offset;
T roi_xmax = rois_data[2] * spatial_scale; T roi_xmax = rois_data[2] * spatial_scale - roi_offset;
T roi_ymax = rois_data[3] * spatial_scale; T roi_ymax = rois_data[3] * spatial_scale - roi_offset;
T roi_width = std::max(roi_xmax - roi_xmin, static_cast<T>(1.));
T roi_height = std::max(roi_ymax - roi_ymin, static_cast<T>(1.)); T roi_width = roi_xmax - roi_xmin;
T roi_height = roi_ymax - roi_ymin;
if (!aligned) {
roi_width = std::max(roi_width, static_cast<T>(1.));
roi_height = std::max(roi_height, static_cast<T>(1.));
}
T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height); T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width); T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
for (int c = 0; c < channels; ++c) { for (int c = 0; c < channels; ++c) {
......
...@@ -31,7 +31,8 @@ class TestDistributeFPNProposalsOp(OpTest): ...@@ -31,7 +31,8 @@ class TestDistributeFPNProposalsOp(OpTest):
'max_level': self.roi_max_level, 'max_level': self.roi_max_level,
'min_level': self.roi_min_level, 'min_level': self.roi_min_level,
'refer_scale': self.canonical_scale, 'refer_scale': self.canonical_scale,
'refer_level': self.canonical_level 'refer_level': self.canonical_level,
'pixel_offset': self.pixel_offset,
} }
output = [('out%d' % i, self.rois_fpn[i]) output = [('out%d' % i, self.rois_fpn[i])
for i in range(len(self.rois_fpn))] for i in range(len(self.rois_fpn))]
...@@ -47,10 +48,12 @@ class TestDistributeFPNProposalsOp(OpTest): ...@@ -47,10 +48,12 @@ class TestDistributeFPNProposalsOp(OpTest):
self.canonical_scale = 224 self.canonical_scale = 224
self.canonical_level = 4 self.canonical_level = 4
self.images_shape = [512, 512] self.images_shape = [512, 512]
self.pixel_offset = True
def boxes_area(self, boxes): def boxes_area(self, boxes):
w = (boxes[:, 2] - boxes[:, 0] + 1) offset = 1 if self.pixel_offset else 0
h = (boxes[:, 3] - boxes[:, 1] + 1) w = (boxes[:, 2] - boxes[:, 0] + offset)
h = (boxes[:, 3] - boxes[:, 1] + offset)
areas = w * h areas = w * h
assert np.all(areas >= 0), 'Negative areas founds' assert np.all(areas >= 0), 'Negative areas founds'
return areas return areas
...@@ -59,7 +62,7 @@ class TestDistributeFPNProposalsOp(OpTest): ...@@ -59,7 +62,7 @@ class TestDistributeFPNProposalsOp(OpTest):
s = np.sqrt(self.boxes_area(rois)) s = np.sqrt(self.boxes_area(rois))
s0 = self.canonical_scale s0 = self.canonical_scale
lvl0 = self.canonical_level lvl0 = self.canonical_level
target_lvls = np.floor(lvl0 + np.log2(s / s0 + 1e-6)) target_lvls = np.floor(lvl0 + np.log2(s / s0 + 1e-8))
target_lvls = np.clip(target_lvls, lvl_min, lvl_max) target_lvls = np.clip(target_lvls, lvl_min, lvl_max)
return target_lvls return target_lvls
...@@ -131,7 +134,8 @@ class TestDistributeFPNProposalsOpWithRoisNum(TestDistributeFPNProposalsOp): ...@@ -131,7 +134,8 @@ class TestDistributeFPNProposalsOpWithRoisNum(TestDistributeFPNProposalsOp):
'max_level': self.roi_max_level, 'max_level': self.roi_max_level,
'min_level': self.roi_min_level, 'min_level': self.roi_min_level,
'refer_scale': self.canonical_scale, 'refer_scale': self.canonical_scale,
'refer_level': self.canonical_level 'refer_level': self.canonical_level,
'pixel_offset': self.pixel_offset,
} }
output = [('out%d' % i, self.rois_fpn[i]) output = [('out%d' % i, self.rois_fpn[i])
for i in range(len(self.rois_fpn))] for i in range(len(self.rois_fpn))]
...@@ -147,5 +151,16 @@ class TestDistributeFPNProposalsOpWithRoisNum(TestDistributeFPNProposalsOp): ...@@ -147,5 +151,16 @@ class TestDistributeFPNProposalsOpWithRoisNum(TestDistributeFPNProposalsOp):
} }
class TestDistributeFPNProposalsOpNoOffset(
TestDistributeFPNProposalsOpWithRoisNum):
def init_test_case(self):
self.roi_max_level = 5
self.roi_min_level = 2
self.canonical_scale = 224
self.canonical_level = 4
self.images_shape = [512, 512]
self.pixel_offset = False
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -21,7 +21,6 @@ import math ...@@ -21,7 +21,6 @@ import math
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from op_test import OpTest from op_test import OpTest
from test_multiclass_nms_op import nms
from test_anchor_generator_op import anchor_generator_in_python from test_anchor_generator_op import anchor_generator_in_python
import copy import copy
...@@ -111,18 +110,19 @@ def proposal_for_one_image(im_info, all_anchors, variances, bbox_deltas, scores, ...@@ -111,18 +110,19 @@ def proposal_for_one_image(im_info, all_anchors, variances, bbox_deltas, scores,
return proposals, scores return proposals, scores
def box_coder(all_anchors, bbox_deltas, variances): def box_coder(all_anchors, bbox_deltas, variances, pixel_offset=True):
""" """
Decode proposals by anchors and bbox_deltas from RPN Decode proposals by anchors and bbox_deltas from RPN
""" """
offset = 1 if pixel_offset else 0
#proposals: xmin, ymin, xmax, ymax #proposals: xmin, ymin, xmax, ymax
proposals = np.zeros_like(bbox_deltas, dtype=np.float32) proposals = np.zeros_like(bbox_deltas, dtype=np.float32)
#anchor_loc: width, height, center_x, center_y #anchor_loc: width, height, center_x, center_y
anchor_loc = np.zeros_like(bbox_deltas, dtype=np.float32) anchor_loc = np.zeros_like(bbox_deltas, dtype=np.float32)
anchor_loc[:, 0] = all_anchors[:, 2] - all_anchors[:, 0] + 1 anchor_loc[:, 0] = all_anchors[:, 2] - all_anchors[:, 0] + offset
anchor_loc[:, 1] = all_anchors[:, 3] - all_anchors[:, 1] + 1 anchor_loc[:, 1] = all_anchors[:, 3] - all_anchors[:, 1] + offset
anchor_loc[:, 2] = all_anchors[:, 0] + 0.5 * anchor_loc[:, 0] anchor_loc[:, 2] = all_anchors[:, 0] + 0.5 * anchor_loc[:, 0]
anchor_loc[:, 3] = all_anchors[:, 1] + 0.5 * anchor_loc[:, 1] anchor_loc[:, 3] = all_anchors[:, 1] + 0.5 * anchor_loc[:, 1]
...@@ -152,51 +152,60 @@ def box_coder(all_anchors, bbox_deltas, variances): ...@@ -152,51 +152,60 @@ def box_coder(all_anchors, bbox_deltas, variances):
pred_bbox[i, 3] = math.exp( pred_bbox[i, 3] = math.exp(
min(bbox_deltas[i, 3], math.log(1000 / 16.0))) * anchor_loc[i, min(bbox_deltas[i, 3], math.log(1000 / 16.0))) * anchor_loc[i,
1] 1]
proposals[:, 0] = pred_bbox[:, 0] - pred_bbox[:, 2] / 2 proposals[:, 0] = pred_bbox[:, 0] - pred_bbox[:, 2] / 2
proposals[:, 1] = pred_bbox[:, 1] - pred_bbox[:, 3] / 2 proposals[:, 1] = pred_bbox[:, 1] - pred_bbox[:, 3] / 2
proposals[:, 2] = pred_bbox[:, 0] + pred_bbox[:, 2] / 2 - 1 proposals[:, 2] = pred_bbox[:, 0] + pred_bbox[:, 2] / 2 - offset
proposals[:, 3] = pred_bbox[:, 1] + pred_bbox[:, 3] / 2 - 1 proposals[:, 3] = pred_bbox[:, 1] + pred_bbox[:, 3] / 2 - offset
return proposals return proposals
def clip_tiled_boxes(boxes, im_shape): def clip_tiled_boxes(boxes, im_shape, pixel_offset=True):
"""Clip boxes to image boundaries. im_shape is [height, width] and boxes """Clip boxes to image boundaries. im_shape is [height, width] and boxes
has shape (N, 4 * num_tiled_boxes).""" has shape (N, 4 * num_tiled_boxes)."""
assert boxes.shape[1] % 4 == 0, \ assert boxes.shape[1] % 4 == 0, \
'boxes.shape[1] is {:d}, but must be divisible by 4.'.format( 'boxes.shape[1] is {:d}, but must be divisible by 4.'.format(
boxes.shape[1] boxes.shape[1]
) )
offset = 1 if pixel_offset else 0
# x1 >= 0 # x1 >= 0
boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) boxes[:, 0::4] = np.maximum(
np.minimum(boxes[:, 0::4], im_shape[1] - offset), 0)
# y1 >= 0 # y1 >= 0
boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) boxes[:, 1::4] = np.maximum(
np.minimum(boxes[:, 1::4], im_shape[0] - offset), 0)
# x2 < im_shape[1] # x2 < im_shape[1]
boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) boxes[:, 2::4] = np.maximum(
np.minimum(boxes[:, 2::4], im_shape[1] - offset), 0)
# y2 < im_shape[0] # y2 < im_shape[0]
boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) boxes[:, 3::4] = np.maximum(
np.minimum(boxes[:, 3::4], im_shape[0] - offset), 0)
return boxes return boxes
def filter_boxes(boxes, min_size, im_info): def filter_boxes(boxes, min_size, im_info, pixel_offset=True):
"""Only keep boxes with both sides >= min_size and center within the image. """Only keep boxes with both sides >= min_size and center within the image.
""" """
# Scale min_size to match image scale # Scale min_size to match image scale
im_scale = im_info[2] im_scale = im_info[2]
min_size = max(min_size, 1.0) min_size = max(min_size, 1.0)
ws = boxes[:, 2] - boxes[:, 0] + 1 offset = 1 if pixel_offset else 0
hs = boxes[:, 3] - boxes[:, 1] + 1 ws = boxes[:, 2] - boxes[:, 0] + offset
ws_orig_scale = (boxes[:, 2] - boxes[:, 0]) / im_scale + 1 hs = boxes[:, 3] - boxes[:, 1] + offset
hs_orig_scale = (boxes[:, 3] - boxes[:, 1]) / im_scale + 1 if pixel_offset:
x_ctr = boxes[:, 0] + ws / 2. ws_orig_scale = (boxes[:, 2] - boxes[:, 0]) / im_scale + 1
y_ctr = boxes[:, 1] + hs / 2. hs_orig_scale = (boxes[:, 3] - boxes[:, 1]) / im_scale + 1
keep = np.where((ws_orig_scale >= min_size) & (hs_orig_scale >= min_size) & x_ctr = boxes[:, 0] + ws / 2.
(x_ctr < im_info[1]) & (y_ctr < im_info[0]))[0] y_ctr = boxes[:, 1] + hs / 2.
keep = np.where((ws_orig_scale >= min_size) & (
hs_orig_scale >= min_size) & (x_ctr < im_info[1]) & (y_ctr <
im_info[0]))[0]
else:
keep = np.where((ws >= min_size) & (hs >= min_size))[0]
return keep return keep
def iou(box_a, box_b): def iou(box_a, box_b, pixel_offset=True):
""" """
Apply intersection-over-union overlap between box_a and box_b Apply intersection-over-union overlap between box_a and box_b
""" """
...@@ -209,9 +218,9 @@ def iou(box_a, box_b): ...@@ -209,9 +218,9 @@ def iou(box_a, box_b):
ymin_b = min(box_b[1], box_b[3]) ymin_b = min(box_b[1], box_b[3])
xmax_b = max(box_b[0], box_b[2]) xmax_b = max(box_b[0], box_b[2])
ymax_b = max(box_b[1], box_b[3]) ymax_b = max(box_b[1], box_b[3])
offset = 1 if pixel_offset else 0
area_a = (ymax_a - ymin_a + 1) * (xmax_a - xmin_a + 1) area_a = (ymax_a - ymin_a + offset) * (xmax_a - xmin_a + offset)
area_b = (ymax_b - ymin_b + 1) * (xmax_b - xmin_b + 1) area_b = (ymax_b - ymin_b + offset) * (xmax_b - xmin_b + offset)
if area_a <= 0 and area_b <= 0: if area_a <= 0 and area_b <= 0:
return 0.0 return 0.0
...@@ -220,14 +229,14 @@ def iou(box_a, box_b): ...@@ -220,14 +229,14 @@ def iou(box_a, box_b):
xb = min(xmax_a, xmax_b) xb = min(xmax_a, xmax_b)
yb = min(ymax_a, ymax_b) yb = min(ymax_a, ymax_b)
inter_area = max(xb - xa + 1, 0.0) * max(yb - ya + 1, 0.0) inter_area = max(xb - xa + offset, 0.0) * max(yb - ya + offset, 0.0)
iou_ratio = inter_area / (area_a + area_b - inter_area) iou_ratio = inter_area / (area_a + area_b - inter_area)
return iou_ratio return iou_ratio
def nms(boxes, scores, nms_threshold, eta=1.0): def nms(boxes, scores, nms_threshold, eta=1.0, pixel_offset=True):
"""Apply non-maximum suppression at test time to avoid detecting too many """Apply non-maximum suppression at test time to avoid detecting too many
overlapping bounding boxes for a given object. overlapping bounding boxes for a given object.
Args: Args:
...@@ -252,7 +261,9 @@ def nms(boxes, scores, nms_threshold, eta=1.0): ...@@ -252,7 +261,9 @@ def nms(boxes, scores, nms_threshold, eta=1.0):
for k in range(len(selected_indices)): for k in range(len(selected_indices)):
if keep: if keep:
kept_idx = selected_indices[k] kept_idx = selected_indices[k]
overlap = iou(boxes[idx], boxes[kept_idx]) overlap = iou(boxes[idx],
boxes[kept_idx],
pixel_offset=pixel_offset)
keep = True if overlap <= adaptive_threshold else False keep = True if overlap <= adaptive_threshold else False
else: else:
break break
......
...@@ -21,7 +21,6 @@ import math ...@@ -21,7 +21,6 @@ import math
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from op_test import OpTest from op_test import OpTest
from test_multiclass_nms_op import nms
from test_anchor_generator_op import anchor_generator_in_python from test_anchor_generator_op import anchor_generator_in_python
import copy import copy
from test_generate_proposals_op import clip_tiled_boxes, box_coder, nms from test_generate_proposals_op import clip_tiled_boxes, box_coder, nms
...@@ -29,7 +28,7 @@ from test_generate_proposals_op import clip_tiled_boxes, box_coder, nms ...@@ -29,7 +28,7 @@ from test_generate_proposals_op import clip_tiled_boxes, box_coder, nms
def generate_proposals_v2_in_python(scores, bbox_deltas, im_shape, anchors, def generate_proposals_v2_in_python(scores, bbox_deltas, im_shape, anchors,
variances, pre_nms_topN, post_nms_topN, variances, pre_nms_topN, post_nms_topN,
nms_thresh, min_size, eta): nms_thresh, min_size, eta, pixel_offset):
all_anchors = anchors.reshape(-1, 4) all_anchors = anchors.reshape(-1, 4)
rois = np.empty((0, 5), dtype=np.float32) rois = np.empty((0, 5), dtype=np.float32)
roi_probs = np.empty((0, 1), dtype=np.float32) roi_probs = np.empty((0, 1), dtype=np.float32)
...@@ -42,7 +41,8 @@ def generate_proposals_v2_in_python(scores, bbox_deltas, im_shape, anchors, ...@@ -42,7 +41,8 @@ def generate_proposals_v2_in_python(scores, bbox_deltas, im_shape, anchors,
img_i_boxes, img_i_probs = proposal_for_one_image( img_i_boxes, img_i_probs = proposal_for_one_image(
im_shape[img_idx, :], all_anchors, variances, im_shape[img_idx, :], all_anchors, variances,
bbox_deltas[img_idx, :, :, :], scores[img_idx, :, :, :], bbox_deltas[img_idx, :, :, :], scores[img_idx, :, :, :],
pre_nms_topN, post_nms_topN, nms_thresh, min_size, eta) pre_nms_topN, post_nms_topN, nms_thresh, min_size, eta,
pixel_offset)
rois_num.append(img_i_probs.shape[0]) rois_num.append(img_i_probs.shape[0])
rpn_rois.append(img_i_boxes) rpn_rois.append(img_i_boxes)
rpn_roi_probs.append(img_i_probs) rpn_roi_probs.append(img_i_probs)
...@@ -52,7 +52,7 @@ def generate_proposals_v2_in_python(scores, bbox_deltas, im_shape, anchors, ...@@ -52,7 +52,7 @@ def generate_proposals_v2_in_python(scores, bbox_deltas, im_shape, anchors,
def proposal_for_one_image(im_shape, all_anchors, variances, bbox_deltas, def proposal_for_one_image(im_shape, all_anchors, variances, bbox_deltas,
scores, pre_nms_topN, post_nms_topN, nms_thresh, scores, pre_nms_topN, post_nms_topN, nms_thresh,
min_size, eta): min_size, eta, pixel_offset):
# Transpose and reshape predicted bbox transformations to get them # Transpose and reshape predicted bbox transformations to get them
# into the same order as the anchors: # into the same order as the anchors:
# - bbox deltas will be (4 * A, H, W) format from conv output # - bbox deltas will be (4 * A, H, W) format from conv output
...@@ -83,12 +83,12 @@ def proposal_for_one_image(im_shape, all_anchors, variances, bbox_deltas, ...@@ -83,12 +83,12 @@ def proposal_for_one_image(im_shape, all_anchors, variances, bbox_deltas,
scores = scores[order, :] scores = scores[order, :]
bbox_deltas = bbox_deltas[order, :] bbox_deltas = bbox_deltas[order, :]
all_anchors = all_anchors[order, :] all_anchors = all_anchors[order, :]
proposals = box_coder(all_anchors, bbox_deltas, variances) proposals = box_coder(all_anchors, bbox_deltas, variances, pixel_offset)
# clip proposals to image (may result in proposals with zero area # clip proposals to image (may result in proposals with zero area
# that will be removed in the next step) # that will be removed in the next step)
proposals = clip_tiled_boxes(proposals, im_shape) proposals = clip_tiled_boxes(proposals, im_shape, pixel_offset)
# remove predicted boxes with height or width < min_size # remove predicted boxes with height or width < min_size
keep = filter_boxes(proposals, min_size, im_shape) keep = filter_boxes(proposals, min_size, im_shape, pixel_offset)
if len(keep) == 0: if len(keep) == 0:
proposals = np.zeros((1, 4)).astype('float32') proposals = np.zeros((1, 4)).astype('float32')
scores = np.zeros((1, 1)).astype('float32') scores = np.zeros((1, 1)).astype('float32')
...@@ -103,7 +103,8 @@ def proposal_for_one_image(im_shape, all_anchors, variances, bbox_deltas, ...@@ -103,7 +103,8 @@ def proposal_for_one_image(im_shape, all_anchors, variances, bbox_deltas,
keep = nms(boxes=proposals, keep = nms(boxes=proposals,
scores=scores, scores=scores,
nms_threshold=nms_thresh, nms_threshold=nms_thresh,
eta=eta) eta=eta,
pixel_offset=pixel_offset)
if post_nms_topN > 0 and post_nms_topN < len(keep): if post_nms_topN > 0 and post_nms_topN < len(keep):
keep = keep[:post_nms_topN] keep = keep[:post_nms_topN]
proposals = proposals[keep, :] proposals = proposals[keep, :]
...@@ -112,17 +113,21 @@ def proposal_for_one_image(im_shape, all_anchors, variances, bbox_deltas, ...@@ -112,17 +113,21 @@ def proposal_for_one_image(im_shape, all_anchors, variances, bbox_deltas,
return proposals, scores return proposals, scores
def filter_boxes(boxes, min_size, im_shape): def filter_boxes(boxes, min_size, im_shape, pixel_offset=True):
"""Only keep boxes with both sides >= min_size and center within the image. """Only keep boxes with both sides >= min_size and center within the image.
""" """
# Scale min_size to match image scale # Scale min_size to match image scale
min_size = max(min_size, 1.0) min_size = max(min_size, 1.0)
ws = boxes[:, 2] - boxes[:, 0] + 1 offset = 1 if pixel_offset else 0
hs = boxes[:, 3] - boxes[:, 1] + 1 ws = boxes[:, 2] - boxes[:, 0] + offset
x_ctr = boxes[:, 0] + ws / 2. hs = boxes[:, 3] - boxes[:, 1] + offset
y_ctr = boxes[:, 1] + hs / 2. if pixel_offset:
keep = np.where((ws >= min_size) & (hs >= min_size) & (x_ctr < im_shape[1]) x_ctr = boxes[:, 0] + ws / 2.
& (y_ctr < im_shape[0]))[0] y_ctr = boxes[:, 1] + hs / 2.
keep = np.where((ws >= min_size) & (hs >= min_size) & (x_ctr < im_shape[
1]) & (y_ctr < im_shape[0]))[0]
else:
keep = np.where((ws >= min_size) & (hs >= min_size))[0]
return keep return keep
...@@ -144,7 +149,8 @@ class TestGenerateProposalsV2Op(OpTest): ...@@ -144,7 +149,8 @@ class TestGenerateProposalsV2Op(OpTest):
'post_nms_topN': self.post_nms_topN, 'post_nms_topN': self.post_nms_topN,
'nms_thresh': self.nms_thresh, 'nms_thresh': self.nms_thresh,
'min_size': self.min_size, 'min_size': self.min_size,
'eta': self.eta 'eta': self.eta,
'pixel_offset': self.pixel_offset,
} }
self.outputs = { self.outputs = {
...@@ -165,6 +171,7 @@ class TestGenerateProposalsV2Op(OpTest): ...@@ -165,6 +171,7 @@ class TestGenerateProposalsV2Op(OpTest):
self.nms_thresh = 0.7 self.nms_thresh = 0.7
self.min_size = 3.0 self.min_size = 3.0
self.eta = 1. self.eta = 1.
self.pixel_offset = True
def init_test_input(self): def init_test_input(self):
batch_size = 1 batch_size = 1
...@@ -191,7 +198,7 @@ class TestGenerateProposalsV2Op(OpTest): ...@@ -191,7 +198,7 @@ class TestGenerateProposalsV2Op(OpTest):
self.rpn_rois, self.rpn_roi_probs, self.rois_num = generate_proposals_v2_in_python( self.rpn_rois, self.rpn_roi_probs, self.rois_num = generate_proposals_v2_in_python(
self.scores, self.bbox_deltas, self.im_shape, self.anchors, self.scores, self.bbox_deltas, self.im_shape, self.anchors,
self.variances, self.pre_nms_topN, self.post_nms_topN, self.variances, self.pre_nms_topN, self.post_nms_topN,
self.nms_thresh, self.min_size, self.eta) self.nms_thresh, self.min_size, self.eta, self.pixel_offset)
class TestGenerateProposalsV2OutLodOp(TestGenerateProposalsV2Op): class TestGenerateProposalsV2OutLodOp(TestGenerateProposalsV2Op):
...@@ -231,6 +238,17 @@ class TestGenerateProposalsV2OpNoBoxLeft(TestGenerateProposalsV2Op): ...@@ -231,6 +238,17 @@ class TestGenerateProposalsV2OpNoBoxLeft(TestGenerateProposalsV2Op):
self.nms_thresh = 0.7 self.nms_thresh = 0.7
self.min_size = 1000.0 self.min_size = 1000.0
self.eta = 1. self.eta = 1.
self.pixel_offset = True
class TestGenerateProposalsV2OpNoOffset(TestGenerateProposalsV2Op):
def init_test_params(self):
self.pre_nms_topN = 12000 # train 12000, test 2000
self.post_nms_topN = 5000 # train 6000, test 1000
self.nms_thresh = 0.7
self.min_size = 3.0
self.eta = 1.
self.pixel_offset = False
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -35,7 +35,8 @@ class TestROIAlignOp(OpTest): ...@@ -35,7 +35,8 @@ class TestROIAlignOp(OpTest):
'spatial_scale': self.spatial_scale, 'spatial_scale': self.spatial_scale,
'pooled_height': self.pooled_height, 'pooled_height': self.pooled_height,
'pooled_width': self.pooled_width, 'pooled_width': self.pooled_width,
'sampling_ratio': self.sampling_ratio 'sampling_ratio': self.sampling_ratio,
'aligned': self.aligned,
} }
self.outputs = {'Out': self.out_data} self.outputs = {'Out': self.out_data}
...@@ -53,6 +54,7 @@ class TestROIAlignOp(OpTest): ...@@ -53,6 +54,7 @@ class TestROIAlignOp(OpTest):
self.pooled_height = 2 self.pooled_height = 2
self.pooled_width = 2 self.pooled_width = 2
self.sampling_ratio = -1 self.sampling_ratio = -1
self.aligned = False
self.x = np.random.random(self.x_dim).astype('float64') self.x = np.random.random(self.x_dim).astype('float64')
...@@ -115,16 +117,21 @@ class TestROIAlignOp(OpTest): ...@@ -115,16 +117,21 @@ class TestROIAlignOp(OpTest):
(self.rois_num, self.channels, self.pooled_height, (self.rois_num, self.channels, self.pooled_height,
self.pooled_width)).astype('float64') self.pooled_width)).astype('float64')
offset = 0.5 if self.aligned else 0.
for i in range(self.rois_num): for i in range(self.rois_num):
roi = self.rois[i] roi = self.rois[i]
roi_batch_id = int(roi[0]) roi_batch_id = int(roi[0])
x_i = self.x[roi_batch_id] x_i = self.x[roi_batch_id]
roi_xmin = roi[1] * self.spatial_scale roi_xmin = roi[1] * self.spatial_scale - offset
roi_ymin = roi[2] * self.spatial_scale roi_ymin = roi[2] * self.spatial_scale - offset
roi_xmax = roi[3] * self.spatial_scale roi_xmax = roi[3] * self.spatial_scale - offset
roi_ymax = roi[4] * self.spatial_scale roi_ymax = roi[4] * self.spatial_scale - offset
roi_width = max(roi_xmax - roi_xmin, 1)
roi_height = max(roi_ymax - roi_ymin, 1) roi_width = roi_xmax - roi_xmin
roi_height = roi_ymax - roi_ymin
if not self.aligned:
roi_width = max(roi_width, 1)
roi_height = max(roi_height, 1)
bin_size_h = float(roi_height) / float(self.pooled_height) bin_size_h = float(roi_height) / float(self.pooled_height)
bin_size_w = float(roi_width) / float(self.pooled_width) bin_size_w = float(roi_width) / float(self.pooled_width)
roi_bin_grid_h = self.sampling_ratio if self.sampling_ratio > 0 else \ roi_bin_grid_h = self.sampling_ratio if self.sampling_ratio > 0 else \
...@@ -192,11 +199,31 @@ class TestROIAlignInLodOp(TestROIAlignOp): ...@@ -192,11 +199,31 @@ class TestROIAlignInLodOp(TestROIAlignOp):
'spatial_scale': self.spatial_scale, 'spatial_scale': self.spatial_scale,
'pooled_height': self.pooled_height, 'pooled_height': self.pooled_height,
'pooled_width': self.pooled_width, 'pooled_width': self.pooled_width,
'sampling_ratio': self.sampling_ratio 'sampling_ratio': self.sampling_ratio,
'aligned': self.aligned
} }
self.outputs = {'Out': self.out_data} self.outputs = {'Out': self.out_data}
class TestROIAlignOpWithAligned(TestROIAlignOp):
def init_test_case(self):
self.batch_size = 3
self.channels = 3
self.height = 8
self.width = 6
# n, c, h, w
self.x_dim = (self.batch_size, self.channels, self.height, self.width)
self.spatial_scale = 1.0 / 2.0
self.pooled_height = 2
self.pooled_width = 2
self.sampling_ratio = -1
self.aligned = True
self.x = np.random.random(self.x_dim).astype('float64')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册