未验证 提交 5b267474 编写于 作者: G Guanghua Yu 提交者: GitHub

add offset parameter in roi_align,generate_proposals.etc ops (#30864)

* add  parameter in roi_align op
上级 75f81233
......@@ -77,17 +77,20 @@ struct BoxDecodeAndClipFunctor {
const T *var;
const int *index;
const T *im_info;
const bool pixel_offset;
T *proposals;
BoxDecodeAndClipFunctor(const T *anchor, const T *deltas, const T *var,
const int *index, const T *im_info, T *proposals)
const int *index, const T *im_info, T *proposals,
bool pixel_offset = true)
: anchor(anchor),
deltas(deltas),
var(var),
index(index),
im_info(im_info),
proposals(proposals) {}
proposals(proposals),
pixel_offset(pixel_offset) {}
T bbox_clip_default{static_cast<T>(kBBoxClipDefault)};
......@@ -98,8 +101,9 @@ struct BoxDecodeAndClipFunctor {
T axmax = anchor[k + 2];
T aymax = anchor[k + 3];
T w = axmax - axmin + 1.0;
T h = aymax - aymin + 1.0;
T offset = pixel_offset ? static_cast<T>(1.0) : 0;
T w = axmax - axmin + offset;
T h = aymax - aymin + offset;
T cx = axmin + 0.5 * w;
T cy = aymin + 0.5 * h;
......@@ -123,13 +127,13 @@ struct BoxDecodeAndClipFunctor {
T oxmin = d_cx - d_w * 0.5;
T oymin = d_cy - d_h * 0.5;
T oxmax = d_cx + d_w * 0.5 - 1.;
T oymax = d_cy + d_h * 0.5 - 1.;
T oxmax = d_cx + d_w * 0.5 - offset;
T oymax = d_cy + d_h * 0.5 - offset;
proposals[i * 4] = Max(Min(oxmin, im_info[1] - 1.), 0.);
proposals[i * 4 + 1] = Max(Min(oymin, im_info[0] - 1.), 0.);
proposals[i * 4 + 2] = Max(Min(oxmax, im_info[1] - 1.), 0.);
proposals[i * 4 + 3] = Max(Min(oymax, im_info[0] - 1.), 0.);
proposals[i * 4] = Max(Min(oxmin, im_info[1] - offset), 0.);
proposals[i * 4 + 1] = Max(Min(oymin, im_info[0] - offset), 0.);
proposals[i * 4 + 2] = Max(Min(oxmax, im_info[1] - offset), 0.);
proposals[i * 4 + 3] = Max(Min(oymax, im_info[0] - offset), 0.);
}
__device__ __forceinline__ T Min(T a, T b) const { return a > b ? b : a; }
......@@ -141,7 +145,8 @@ template <typename T, int BlockSize>
static __global__ void FilterBBoxes(const T *bboxes, const T *im_info,
const T min_size, const int num,
int *keep_num, int *keep,
bool is_scale = true) {
bool is_scale = true,
bool pixel_offset = true) {
T im_h = im_info[0];
T im_w = im_info[1];
......@@ -157,9 +162,10 @@ static __global__ void FilterBBoxes(const T *bboxes, const T *im_info,
T ymin = bboxes[k + 1];
T xmax = bboxes[k + 2];
T ymax = bboxes[k + 3];
T w = xmax - xmin + 1.0;
T h = ymax - ymin + 1.0;
T offset = pixel_offset ? static_cast<T>(1.0) : 0;
T w = xmax - xmin + offset;
T h = ymax - ymin + offset;
if (pixel_offset) {
T cx = xmin + w / 2.;
T cy = ymin + h / 2.;
......@@ -171,6 +177,11 @@ static __global__ void FilterBBoxes(const T *bboxes, const T *im_info,
if (w >= min_size && h >= min_size && cx <= im_w && cy <= im_h) {
keep_index[threadIdx.x] = i;
}
} else {
if (w >= min_size && h >= min_size) {
keep_index[threadIdx.x] = i;
}
}
__syncthreads();
if (threadIdx.x == 0) {
int size = (num - i) < BlockSize ? num - i : BlockSize;
......@@ -187,19 +198,23 @@ static __global__ void FilterBBoxes(const T *bboxes, const T *im_info,
}
}
static __device__ float IoU(const float *a, const float *b) {
static __device__ float IoU(const float *a, const float *b,
const bool pixel_offset = true) {
float offset = pixel_offset ? static_cast<float>(1.0) : 0;
float left = max(a[0], b[0]), right = min(a[2], b[2]);
float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
float width = max(right - left + offset, 0.f),
height = max(bottom - top + offset, 0.f);
float inter_s = width * height;
float s_a = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
float s_b = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
float s_a = (a[2] - a[0] + offset) * (a[3] - a[1] + offset);
float s_b = (b[2] - b[0] + offset) * (b[3] - b[1] + offset);
return inter_s / (s_a + s_b - inter_s);
}
static __global__ void NMSKernel(const int n_boxes,
const float nms_overlap_thresh,
const float *dev_boxes, uint64_t *dev_mask) {
const float *dev_boxes, uint64_t *dev_mask,
bool pixel_offset = true) {
const int row_start = blockIdx.y;
const int col_start = blockIdx.x;
......@@ -231,7 +246,8 @@ static __global__ void NMSKernel(const int n_boxes,
start = threadIdx.x + 1;
}
for (i = start; i < col_size; i++) {
if (IoU(cur_box, block_boxes + i * 4) > nms_overlap_thresh) {
if (IoU(cur_box, block_boxes + i * 4, pixel_offset) >
nms_overlap_thresh) {
t |= 1ULL << i;
}
}
......@@ -243,7 +259,7 @@ static __global__ void NMSKernel(const int n_boxes,
template <typename T>
static void NMS(const platform::CUDADeviceContext &ctx, const Tensor &proposals,
const Tensor &sorted_indices, const T nms_threshold,
Tensor *keep_out) {
Tensor *keep_out, bool pixel_offset = true) {
int boxes_num = proposals.dims()[0];
const int col_blocks = DIVUP(boxes_num, kThreadsPerBlock);
dim3 blocks(DIVUP(boxes_num, kThreadsPerBlock),
......@@ -255,7 +271,8 @@ static void NMS(const platform::CUDADeviceContext &ctx, const Tensor &proposals,
framework::Vector<uint64_t> mask(boxes_num * col_blocks);
NMSKernel<<<blocks, threads>>>(boxes_num, nms_threshold, boxes,
mask.CUDAMutableData(BOOST_GET_CONST(
platform::CUDAPlace, ctx.GetPlace())));
platform::CUDAPlace, ctx.GetPlace())),
pixel_offset);
std::vector<uint64_t> remv(col_blocks);
memset(&remv[0], 0, sizeof(uint64_t) * col_blocks);
......
......@@ -31,7 +31,7 @@ struct RangeInitFunctor {
};
template <typename T>
inline HOSTDEVICE T RoIArea(const T* box, bool normalized) {
inline HOSTDEVICE T RoIArea(const T* box, bool pixel_offset = true) {
if (box[2] < box[0] || box[3] < box[1]) {
// If coordinate values are is invalid
// (e.g. xmax < xmin or ymax < ymin), return 0.
......@@ -39,11 +39,11 @@ inline HOSTDEVICE T RoIArea(const T* box, bool normalized) {
} else {
const T w = box[2] - box[0];
const T h = box[3] - box[1];
if (normalized) {
return w * h;
} else {
if (pixel_offset) {
// If coordinate values are not within range [0, 1].
return (w + 1) * (h + 1);
} else {
return w * h;
}
}
}
......@@ -157,10 +157,12 @@ template <class T>
void ClipTiledBoxes(const platform::DeviceContext& ctx,
const framework::Tensor& im_info,
const framework::Tensor& input_boxes,
framework::Tensor* out, bool is_scale = true) {
framework::Tensor* out, bool is_scale = true,
bool pixel_offset = true) {
T* out_data = out->mutable_data<T>(ctx.GetPlace());
const T* im_info_data = im_info.data<T>();
const T* input_boxes_data = input_boxes.data<T>();
T offset = pixel_offset ? static_cast<T>(1.0) : 0;
T zero(0);
T im_w =
is_scale ? round(im_info_data[1] / im_info_data[2]) : im_info_data[1];
......@@ -168,13 +170,17 @@ void ClipTiledBoxes(const platform::DeviceContext& ctx,
is_scale ? round(im_info_data[0] / im_info_data[2]) : im_info_data[0];
for (int64_t i = 0; i < input_boxes.numel(); ++i) {
if (i % 4 == 0) {
out_data[i] = std::max(std::min(input_boxes_data[i], im_w - 1), zero);
out_data[i] =
std::max(std::min(input_boxes_data[i], im_w - offset), zero);
} else if (i % 4 == 1) {
out_data[i] = std::max(std::min(input_boxes_data[i], im_h - 1), zero);
out_data[i] =
std::max(std::min(input_boxes_data[i], im_h - offset), zero);
} else if (i % 4 == 2) {
out_data[i] = std::max(std::min(input_boxes_data[i], im_w - 1), zero);
out_data[i] =
std::max(std::min(input_boxes_data[i], im_w - offset), zero);
} else {
out_data[i] = std::max(std::min(input_boxes_data[i], im_h - 1), zero);
out_data[i] =
std::max(std::min(input_boxes_data[i], im_h - offset), zero);
}
}
}
......@@ -184,30 +190,36 @@ template <class T>
void FilterBoxes(const platform::DeviceContext& ctx,
const framework::Tensor* boxes, float min_size,
const framework::Tensor& im_info, bool is_scale,
framework::Tensor* keep) {
framework::Tensor* keep, bool pixel_offset = true) {
const T* im_info_data = im_info.data<T>();
const T* boxes_data = boxes->data<T>();
keep->Resize({boxes->dims()[0]});
min_size = std::max(min_size, 1.0f);
int* keep_data = keep->mutable_data<int>(ctx.GetPlace());
T offset = pixel_offset ? static_cast<T>(1.0) : 0;
int keep_len = 0;
for (int i = 0; i < boxes->dims()[0]; ++i) {
T ws = boxes_data[4 * i + 2] - boxes_data[4 * i] + 1;
T hs = boxes_data[4 * i + 3] - boxes_data[4 * i + 1] + 1;
T ws = boxes_data[4 * i + 2] - boxes_data[4 * i] + offset;
T hs = boxes_data[4 * i + 3] - boxes_data[4 * i + 1] + offset;
if (pixel_offset) {
T x_ctr = boxes_data[4 * i] + ws / 2;
T y_ctr = boxes_data[4 * i + 1] + hs / 2;
if (is_scale) {
ws = (boxes_data[4 * i + 2] - boxes_data[4 * i]) / im_info_data[2] + 1;
hs =
(boxes_data[4 * i + 3] - boxes_data[4 * i + 1]) / im_info_data[2] + 1;
hs = (boxes_data[4 * i + 3] - boxes_data[4 * i + 1]) / im_info_data[2] +
1;
}
if (ws >= min_size && hs >= min_size && x_ctr <= im_info_data[1] &&
y_ctr <= im_info_data[0]) {
keep_data[keep_len++] = i;
}
} else {
if (ws >= min_size && hs >= min_size) {
keep_data[keep_len++] = i;
}
}
}
keep->Resize({keep_len});
}
......@@ -216,8 +228,8 @@ template <class T>
static void BoxCoder(const platform::DeviceContext& ctx,
framework::Tensor* all_anchors,
framework::Tensor* bbox_deltas,
framework::Tensor* variances,
framework::Tensor* proposals) {
framework::Tensor* variances, framework::Tensor* proposals,
const bool pixel_offset = true) {
T* proposals_data = proposals->mutable_data<T>(ctx.GetPlace());
int64_t row = all_anchors->dims()[0];
......@@ -230,9 +242,11 @@ static void BoxCoder(const platform::DeviceContext& ctx,
variances_data = variances->data<T>();
}
T offset = pixel_offset ? static_cast<T>(1.0) : 0;
for (int64_t i = 0; i < row; ++i) {
T anchor_width = anchor_data[i * len + 2] - anchor_data[i * len] + 1.0;
T anchor_height = anchor_data[i * len + 3] - anchor_data[i * len + 1] + 1.0;
T anchor_width = anchor_data[i * len + 2] - anchor_data[i * len] + offset;
T anchor_height =
anchor_data[i * len + 3] - anchor_data[i * len + 1] + offset;
T anchor_center_x = anchor_data[i * len] + 0.5 * anchor_width;
T anchor_center_y = anchor_data[i * len + 1] + 0.5 * anchor_height;
......@@ -270,8 +284,8 @@ static void BoxCoder(const platform::DeviceContext& ctx,
proposals_data[i * len] = bbox_center_x - bbox_width / 2;
proposals_data[i * len + 1] = bbox_center_y - bbox_height / 2;
proposals_data[i * len + 2] = bbox_center_x + bbox_width / 2 - 1;
proposals_data[i * len + 3] = bbox_center_y + bbox_height / 2 - 1;
proposals_data[i * len + 2] = bbox_center_x + bbox_width / 2 - offset;
proposals_data[i * len + 3] = bbox_center_y + bbox_height / 2 - offset;
}
// return proposals;
}
......
......@@ -103,6 +103,9 @@ class DistributeFpnProposalsOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<int>("refer_scale",
"The referring scale of FPN layer with"
" specified level");
AddAttr<bool>("pixel_offset", "(bool, default True),",
"If true, im_shape pixel offset is 1.")
.SetDefault(true);
AddComment(R"DOC(
This operator distribute all proposals into different fpn level,
with respect to scale of the proposals, the referring scale and
......@@ -134,4 +137,8 @@ REGISTER_OP_VERSION(distribute_fpn_proposals)
.NewOutput("MultiLevelRoisNum",
"The RoIs' number of each image on multiple "
"levels. The number on each level has the shape of (B),"
"B is the number of images."));
"B is the number of images."))
.AddCheckpoint(
R"ROC(Register distribute_fpn_proposals for adding the attribute of pixel_offset)ROC",
paddle::framework::compatible::OpVersionDesc().NewAttr(
"pixel_offset", "If true, im_shape pixel offset is 1.", true));
......@@ -43,15 +43,15 @@ __global__ void GPUDistFpnProposalsHelper(
const int nthreads, const T* rois, const int lod_size,
const int refer_level, const int refer_scale, const int max_level,
const int min_level, int* roi_batch_id_data, int* sub_lod_list,
int* target_lvls) {
int* target_lvls, bool pixel_offset = true) {
CUDA_KERNEL_LOOP(i, nthreads) {
const T* offset_roi = rois + i * BBoxSize;
int roi_batch_ind = roi_batch_id_data[i];
// get the target level of current rois
T roi_area = RoIArea(offset_roi, false);
T roi_area = RoIArea(offset_roi, pixel_offset);
T roi_scale = sqrt(roi_area);
int tgt_lvl = floor(
log2(roi_scale / static_cast<T>(refer_scale) + (T)1e-6) + refer_level);
log2(roi_scale / static_cast<T>(refer_scale) + (T)1e-8) + refer_level);
tgt_lvl = min(max_level, max(tgt_lvl, min_level));
target_lvls[i] = tgt_lvl;
// compute number of rois in the same batch and same target level
......@@ -73,6 +73,7 @@ class GPUDistributeFpnProposalsOpKernel : public framework::OpKernel<T> {
const int max_level = ctx.Attr<int>("max_level");
const int refer_level = ctx.Attr<int>("refer_level");
const int refer_scale = ctx.Attr<int>("refer_scale");
const bool pixel_offset = ctx.Attr<bool>("pixel_offset");
int num_level = max_level - min_level + 1;
// check that the fpn_rois is not empty
......@@ -126,7 +127,7 @@ class GPUDistributeFpnProposalsOpKernel : public framework::OpKernel<T> {
GPUDistFpnProposalsHelper<T><<<dist_blocks, threads>>>(
roi_num, fpn_rois->data<T>(), lod_size, refer_level, refer_scale,
max_level, min_level, roi_batch_id_list_gpu.data<int>(),
sub_lod_list_data, target_lvls_data);
sub_lod_list_data, target_lvls_data, pixel_offset);
dev_ctx.Wait();
auto place = BOOST_GET_CONST(platform::CUDAPlace, dev_ctx.GetPlace());
......
......@@ -44,7 +44,7 @@ inline std::vector<size_t> GetLodFromRoisNum(const Tensor* rois_num) {
}
template <typename T>
static inline T BBoxArea(const T* box, bool normalized) {
static inline T BBoxArea(const T* box, bool pixel_offset) {
if (box[2] < box[0] || box[3] < box[1]) {
// If coordinate values are is invalid
// (e.g. xmax < xmin or ymax < ymin), return 0.
......@@ -52,11 +52,11 @@ static inline T BBoxArea(const T* box, bool normalized) {
} else {
const T w = box[2] - box[0];
const T h = box[3] - box[1];
if (normalized) {
return w * h;
} else {
if (pixel_offset) {
// If coordinate values are not within range [0, 1].
return (w + 1) * (h + 1);
} else {
return w * h;
}
}
}
......@@ -77,6 +77,7 @@ class DistributeFpnProposalsOpKernel : public framework::OpKernel<T> {
const int max_level = context.Attr<int>("max_level");
const int refer_level = context.Attr<int>("refer_level");
const int refer_scale = context.Attr<int>("refer_scale");
const bool pixel_offset = context.Attr<bool>("pixel_offset");
const int num_level = max_level - min_level + 1;
// check that the fpn_rois is not empty
......@@ -108,7 +109,7 @@ class DistributeFpnProposalsOpKernel : public framework::OpKernel<T> {
const T* rois_data = fpn_rois_slice.data<T>();
for (int j = 0; j < fpn_rois_slice.dims()[0]; ++j) {
// get the target level of current rois
T roi_scale = std::sqrt(BBoxArea(rois_data, false));
T roi_scale = std::sqrt(BBoxArea(rois_data, pixel_offset));
int tgt_lvl = std::floor(std::log2(roi_scale / refer_scale + (T)1e-6) +
refer_level);
tgt_lvl = std::min(max_level, std::max(tgt_lvl, min_level));
......
......@@ -87,6 +87,7 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> {
float nms_thresh = context.Attr<float>("nms_thresh");
float min_size = context.Attr<float>("min_size");
float eta = context.Attr<float>("eta");
bool pixel_offset = context.Attr<bool>("pixel_offset");
auto &dev_ctx =
context.template device_context<platform::CPUDeviceContext>();
......@@ -134,10 +135,10 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> {
bbox_deltas_slice.Resize({h_bbox * w_bbox * c_bbox / 4, 4});
scores_slice.Resize({h_score * w_score * c_score, 1});
std::pair<Tensor, Tensor> tensor_pair =
ProposalForOneImage(dev_ctx, im_shape_slice, anchors, variances,
bbox_deltas_slice, scores_slice, pre_nms_top_n,
post_nms_top_n, nms_thresh, min_size, eta);
std::pair<Tensor, Tensor> tensor_pair = ProposalForOneImage(
dev_ctx, im_shape_slice, anchors, variances, bbox_deltas_slice,
scores_slice, pre_nms_top_n, post_nms_top_n, nms_thresh, min_size,
eta, pixel_offset);
Tensor &proposals = tensor_pair.first;
Tensor &scores = tensor_pair.second;
......@@ -168,7 +169,7 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> {
const Tensor &bbox_deltas_slice, // [M, 4]
const Tensor &scores_slice, // [N, 1]
int pre_nms_top_n, int post_nms_top_n, float nms_thresh, float min_size,
float eta) const {
float eta, bool pixel_offset = true) const {
auto *scores_data = scores_slice.data<T>();
// Sort index
......@@ -203,12 +204,15 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> {
Tensor proposals;
proposals.mutable_data<T>({index_t.numel(), 4}, ctx.GetPlace());
BoxCoder<T>(ctx, &anchor_sel, &bbox_sel, &var_sel, &proposals);
BoxCoder<T>(ctx, &anchor_sel, &bbox_sel, &var_sel, &proposals,
pixel_offset);
ClipTiledBoxes<T>(ctx, im_shape_slice, proposals, &proposals, false);
ClipTiledBoxes<T>(ctx, im_shape_slice, proposals, &proposals, false,
pixel_offset);
Tensor keep;
FilterBoxes<T>(ctx, &proposals, min_size, im_shape_slice, false, &keep);
FilterBoxes<T>(ctx, &proposals, min_size, im_shape_slice, false, &keep,
pixel_offset);
// Handle the case when there is no keep index left
if (keep.numel() == 0) {
math::SetConstant<platform::CPUDeviceContext, T> set_zero;
......@@ -229,7 +233,8 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> {
return std::make_pair(bbox_sel, scores_filter);
}
Tensor keep_nms = NMS<T>(ctx, &bbox_sel, &scores_filter, nms_thresh, eta);
Tensor keep_nms =
NMS<T>(ctx, &bbox_sel, &scores_filter, nms_thresh, eta, pixel_offset);
if (post_nms_top_n > 0 && post_nms_top_n < keep_nms.numel()) {
keep_nms.Resize({post_nms_top_n});
......@@ -280,6 +285,9 @@ class GenerateProposalsV2OpMaker : public framework::OpProtoAndCheckerMaker {
"Proposal height and width both need to be greater "
"than this min_size.");
AddAttr<float>("eta", "The parameter for adaptive NMS.");
AddAttr<bool>("pixel_offset", "(bool, default True),",
"If true, im_shape pixel offset is 1.")
.SetDefault(true);
AddComment(R"DOC(
This operator is the second version of generate_proposals op to generate
bounding box proposals for Faster RCNN.
......@@ -312,3 +320,8 @@ REGISTER_OPERATOR(
REGISTER_OP_CPU_KERNEL(generate_proposals_v2,
ops::GenerateProposalsV2Kernel<float>,
ops::GenerateProposalsV2Kernel<double>);
REGISTER_OP_VERSION(generate_proposals_v2)
.AddCheckpoint(
R"ROC(Registe generate_proposals_v2 for adding the attribute of pixel_offset)ROC",
paddle::framework::compatible::OpVersionDesc().NewAttr(
"pixel_offset", "If true, im_shape pixel offset is 1.", true));
......@@ -36,7 +36,7 @@ static std::pair<Tensor, Tensor> ProposalForOneImage(
const Tensor &bbox_deltas, // [M, 4]
const Tensor &scores, // [N, 1]
int pre_nms_top_n, int post_nms_top_n, float nms_thresh, float min_size,
float eta) {
float eta, bool pixel_offset) {
// 1. pre nms
Tensor scores_sort, index_sort;
SortDescending<T>(ctx, scores, &scores_sort, &index_sort);
......@@ -54,7 +54,8 @@ static std::pair<Tensor, Tensor> ProposalForOneImage(
platform::ForRange<platform::CUDADeviceContext> for_range(ctx, pre_nms_num);
for_range(BoxDecodeAndClipFunctor<T>{
anchors.data<T>(), bbox_deltas.data<T>(), variances.data<T>(),
index_sort.data<int>(), im_shape.data<T>(), proposals.data<T>()});
index_sort.data<int>(), im_shape.data<T>(), proposals.data<T>(),
pixel_offset});
}
// 3. filter
......@@ -65,7 +66,7 @@ static std::pair<Tensor, Tensor> ProposalForOneImage(
auto stream = ctx.stream();
FilterBBoxes<T, 512><<<1, 512, 0, stream>>>(
proposals.data<T>(), im_shape.data<T>(), min_size, pre_nms_num,
keep_num_t.data<int>(), keep_index.data<int>(), false);
keep_num_t.data<int>(), keep_index.data<int>(), false, pixel_offset);
int keep_num;
const auto gpu_place = BOOST_GET_CONST(platform::CUDAPlace, ctx.GetPlace());
memory::Copy(platform::CPUPlace(), &keep_num, gpu_place,
......@@ -94,7 +95,8 @@ static std::pair<Tensor, Tensor> ProposalForOneImage(
// 4. nms
Tensor keep_nms;
NMS<T>(ctx, proposals_filter, keep_index, nms_thresh, &keep_nms);
NMS<T>(ctx, proposals_filter, keep_index, nms_thresh, &keep_nms,
pixel_offset);
if (post_nms_top_n > 0 && post_nms_top_n < keep_nms.numel()) {
keep_nms.Resize({post_nms_top_n});
}
......@@ -129,6 +131,7 @@ class CUDAGenerateProposalsV2Kernel : public framework::OpKernel<T> {
float nms_thresh = context.Attr<float>("nms_thresh");
float min_size = context.Attr<float>("min_size");
float eta = context.Attr<float>("eta");
bool pixel_offset = context.Attr<bool>("pixel_offset");
PADDLE_ENFORCE_GE(eta, 1.,
platform::errors::InvalidArgument(
"Not support adaptive NMS. The attribute 'eta' "
......@@ -184,10 +187,10 @@ class CUDAGenerateProposalsV2Kernel : public framework::OpKernel<T> {
bbox_deltas_slice.Resize({h_bbox * w_bbox * c_bbox / 4, 4});
scores_slice.Resize({h_score * w_score * c_score, 1});
std::pair<Tensor, Tensor> box_score_pair =
ProposalForOneImage<T>(dev_ctx, im_shape_slice, anchors, variances,
bbox_deltas_slice, scores_slice, pre_nms_top_n,
post_nms_top_n, nms_thresh, min_size, eta);
std::pair<Tensor, Tensor> box_score_pair = ProposalForOneImage<T>(
dev_ctx, im_shape_slice, anchors, variances, bbox_deltas_slice,
scores_slice, pre_nms_top_n, post_nms_top_n, nms_thresh, min_size,
eta, pixel_offset);
Tensor &proposals = box_score_pair.first;
Tensor &scores = box_score_pair.second;
......
......@@ -130,7 +130,7 @@ static inline framework::Tensor VectorToTensor(
template <class T>
framework::Tensor NMS(const platform::DeviceContext& ctx,
framework::Tensor* bbox, framework::Tensor* scores,
T nms_threshold, float eta) {
T nms_threshold, float eta, bool pixel_offset = true) {
int64_t num_boxes = bbox->dims()[0];
// 4: [xmin ymin xmax ymax]
int64_t box_size = bbox->dims()[1];
......@@ -144,13 +144,15 @@ framework::Tensor NMS(const platform::DeviceContext& ctx,
int selected_num = 0;
T adaptive_threshold = nms_threshold;
const T* bbox_data = bbox->data<T>();
bool normalized = pixel_offset ? false : true;
while (sorted_indices.size() != 0) {
int idx = sorted_indices.back().second;
bool flag = true;
for (int kept_idx : selected_indices) {
if (flag) {
T overlap = JaccardOverlap<T>(bbox_data + idx * box_size,
bbox_data + kept_idx * box_size, false);
T overlap =
JaccardOverlap<T>(bbox_data + idx * box_size,
bbox_data + kept_idx * box_size, normalized);
flag = (overlap <= adaptive_threshold);
} else {
break;
......
......@@ -175,6 +175,10 @@ class ROIAlignOpMaker : public framework::OpProtoAndCheckerMaker {
"If <=0, then grid points are adaptive to roi_width "
"and pooled_w, likewise for height")
.SetDefault(-1);
AddAttr<bool>("aligned",
"(bool, default False),"
"If true, pixel shift it by -0.5 for align more perfectly")
.SetDefault(false);
AddComment(R"DOC(
**RoIAlign Operator**
......@@ -245,4 +249,11 @@ REGISTER_OP_VERSION(roi_align)
Upgrade roi_align add a new input [RoisNum])ROC",
paddle::framework::compatible::OpVersionDesc().NewInput(
"RoisNum",
"The number of RoIs in each image. RoisNum is dispensable."));
"The number of RoIs in each image. RoisNum is dispensable."))
.AddCheckpoint(
R"ROC(
Upgrade roi_align add a new input [aligned])ROC",
paddle::framework::compatible::OpVersionDesc().NewAttr(
"aligned",
"If true, pixel shift it by -0.5 for align more perfectly.",
false));
......@@ -105,7 +105,8 @@ __global__ void GPUROIAlignForward(
const int nthreads, const T* input_data, const T* input_rois,
const float spatial_scale, const int channels, const int height,
const int width, const int pooled_height, const int pooled_width,
const int sampling_ratio, int* roi_batch_id_data, T* output_data) {
const int sampling_ratio, int* roi_batch_id_data, T* output_data,
const bool continuous_coordinate) {
CUDA_KERNEL_LOOP(i, nthreads) {
int pw = i % pooled_width;
int ph = (i / pooled_width) % pooled_height;
......@@ -115,13 +116,19 @@ __global__ void GPUROIAlignForward(
const T* offset_input_rois = input_rois + n * kROISize;
int roi_batch_ind = roi_batch_id_data[n];
T roi_xmin = offset_input_rois[0] * spatial_scale;
T roi_ymin = offset_input_rois[1] * spatial_scale;
T roi_xmax = offset_input_rois[2] * spatial_scale;
T roi_ymax = offset_input_rois[3] * spatial_scale;
T roi_offset = continuous_coordinate ? static_cast<T>(0.5) : 0;
T roi_xmin = offset_input_rois[0] * spatial_scale - roi_offset;
T roi_ymin = offset_input_rois[1] * spatial_scale - roi_offset;
T roi_xmax = offset_input_rois[2] * spatial_scale - roi_offset;
T roi_ymax = offset_input_rois[3] * spatial_scale - roi_offset;
T roi_width = max(roi_xmax - roi_xmin, static_cast<T>(1.));
T roi_height = max(roi_ymax - roi_ymin, static_cast<T>(1.));
T roi_width = roi_xmax - roi_xmin;
T roi_height = roi_ymax - roi_ymin;
if (!continuous_coordinate) {
roi_width = max(roi_width, static_cast<T>(1.));
roi_height = max(roi_height, static_cast<T>(1.));
}
T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
......@@ -153,14 +160,12 @@ __global__ void GPUROIAlignForward(
}
template <typename T>
__global__ void GPUROIAlignBackward(const int nthreads, const T* input_rois,
const T* out_grad, const int num_rois,
const float spatial_scale,
const int channels, const int height,
const int width, const int pooled_height,
const int pooled_width,
const int sampling_ratio,
int* roi_batch_id_data, T* input_grad) {
__global__ void GPUROIAlignBackward(
const int nthreads, const T* input_rois, const T* out_grad,
const int num_rois, const float spatial_scale, const int channels,
const int height, const int width, const int pooled_height,
const int pooled_width, const int sampling_ratio, int* roi_batch_id_data,
T* input_grad, const bool continuous_coordinate) {
CUDA_KERNEL_LOOP(i, nthreads) {
int pw = i % pooled_width;
int ph = (i / pooled_width) % pooled_height;
......@@ -169,13 +174,18 @@ __global__ void GPUROIAlignBackward(const int nthreads, const T* input_rois,
const T* offset_input_rois = input_rois + n * kROISize;
int roi_batch_ind = roi_batch_id_data[n];
T roi_xmin = offset_input_rois[0] * spatial_scale;
T roi_ymin = offset_input_rois[1] * spatial_scale;
T roi_xmax = offset_input_rois[2] * spatial_scale;
T roi_ymax = offset_input_rois[3] * spatial_scale;
T roi_width = max(roi_xmax - roi_xmin, static_cast<T>(1.));
T roi_height = max(roi_ymax - roi_ymin, static_cast<T>(1.));
T roi_offset = continuous_coordinate ? T(0.5) : 0;
T roi_xmin = offset_input_rois[0] * spatial_scale - roi_offset;
T roi_ymin = offset_input_rois[1] * spatial_scale - roi_offset;
T roi_xmax = offset_input_rois[2] * spatial_scale - roi_offset;
T roi_ymax = offset_input_rois[3] * spatial_scale - roi_offset;
T roi_width = roi_xmax - roi_xmin;
T roi_height = roi_ymax - roi_ymin;
if (!continuous_coordinate) {
roi_width = max(roi_width, static_cast<T>(1.));
roi_height = max(roi_height, static_cast<T>(1.));
}
T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
......@@ -236,6 +246,7 @@ class GPUROIAlignOpKernel : public framework::OpKernel<T> {
auto pooled_width = ctx.Attr<int>("pooled_width");
auto spatial_scale = ctx.Attr<float>("spatial_scale");
auto sampling_ratio = ctx.Attr<int>("sampling_ratio");
auto aligned = ctx.Attr<bool>("aligned");
auto in_dims = in->dims();
int batch_size = in_dims[0];
......@@ -316,7 +327,7 @@ class GPUROIAlignOpKernel : public framework::OpKernel<T> {
GPUROIAlignForward<T><<<blocks, threads, 0, dev_ctx.stream()>>>(
output_size, in->data<T>(), rois->data<T>(), spatial_scale, channels,
height, width, pooled_height, pooled_width, sampling_ratio, roi_id_data,
out->mutable_data<T>(ctx.GetPlace()));
out->mutable_data<T>(ctx.GetPlace()), aligned);
}
};
......@@ -334,6 +345,7 @@ class GPUROIAlignGradOpKernel : public framework::OpKernel<T> {
auto pooled_width = ctx.Attr<int>("pooled_width");
auto spatial_scale = ctx.Attr<float>("spatial_scale");
auto sampling_ratio = ctx.Attr<int>("sampling_ratio");
auto aligned = ctx.Attr<bool>("aligned");
int rois_num = rois->dims()[0];
int channels = in->dims()[1];
......@@ -390,8 +402,8 @@ class GPUROIAlignGradOpKernel : public framework::OpKernel<T> {
GPUROIAlignBackward<T><<<blocks, threads, 0, dev_ctx.stream()>>>(
output_grad_size, rois->data<T>(), out_grad->data<T>(), rois_num,
spatial_scale, channels, height, width, pooled_height, pooled_width,
sampling_ratio, roi_id_data,
in_grad->mutable_data<T>(ctx.GetPlace()));
sampling_ratio, roi_id_data, in_grad->mutable_data<T>(ctx.GetPlace()),
aligned);
}
}
};
......
......@@ -145,6 +145,7 @@ class CPUROIAlignOpKernel : public framework::OpKernel<T> {
auto pooled_width = ctx.Attr<int>("pooled_width");
auto spatial_scale = ctx.Attr<float>("spatial_scale");
auto sampling_ratio = ctx.Attr<int>("sampling_ratio");
auto aligned = ctx.Attr<bool>("aligned");
auto& dev_ctx = ctx.template device_context<DeviceContext>();
......@@ -215,15 +216,21 @@ class CPUROIAlignOpKernel : public framework::OpKernel<T> {
}
T* output_data = out->mutable_data<T>(ctx.GetPlace());
const T* rois_data = rois->data<T>();
T roi_offset = aligned ? T(0.5) : 0;
for (int n = 0; n < rois_num; ++n) {
int roi_batch_id = roi_batch_id_data[n];
T roi_xmin = rois_data[0] * spatial_scale;
T roi_ymin = rois_data[1] * spatial_scale;
T roi_xmax = rois_data[2] * spatial_scale;
T roi_ymax = rois_data[3] * spatial_scale;
T roi_xmin = rois_data[0] * spatial_scale - roi_offset;
T roi_ymin = rois_data[1] * spatial_scale - roi_offset;
T roi_xmax = rois_data[2] * spatial_scale - roi_offset;
T roi_ymax = rois_data[3] * spatial_scale - roi_offset;
T roi_width = roi_xmax - roi_xmin;
T roi_height = roi_ymax - roi_ymin;
if (!aligned) {
roi_width = std::max(roi_width, static_cast<T>(1.));
roi_height = std::max(roi_height, static_cast<T>(1.));
}
T roi_width = std::max(roi_xmax - roi_xmin, static_cast<T>(1.));
T roi_height = std::max(roi_ymax - roi_ymin, static_cast<T>(1.));
T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
const T* batch_data = input_data + roi_batch_id * in_stride[0];
......@@ -290,6 +297,7 @@ class CPUROIAlignGradOpKernel : public framework::OpKernel<T> {
auto spatial_scale = ctx.Attr<float>("spatial_scale");
auto sampling_ratio = ctx.Attr<int>("sampling_ratio");
auto in_dims = in->dims();
auto aligned = ctx.Attr<bool>("aligned");
int channels = in_dims[1];
int height = in_dims[2];
......@@ -344,14 +352,21 @@ class CPUROIAlignGradOpKernel : public framework::OpKernel<T> {
auto roi_stride = framework::stride(rois->dims());
auto out_stride = framework::stride(out_grad->dims());
T roi_offset = aligned ? T(0.5) : 0;
for (int n = 0; n < rois_num; ++n) {
int roi_batch_idx = roi_batch_id_data[n];
T roi_xmin = rois_data[0] * spatial_scale;
T roi_ymin = rois_data[1] * spatial_scale;
T roi_xmax = rois_data[2] * spatial_scale;
T roi_ymax = rois_data[3] * spatial_scale;
T roi_width = std::max(roi_xmax - roi_xmin, static_cast<T>(1.));
T roi_height = std::max(roi_ymax - roi_ymin, static_cast<T>(1.));
T roi_xmin = rois_data[0] * spatial_scale - roi_offset;
T roi_ymin = rois_data[1] * spatial_scale - roi_offset;
T roi_xmax = rois_data[2] * spatial_scale - roi_offset;
T roi_ymax = rois_data[3] * spatial_scale - roi_offset;
T roi_width = roi_xmax - roi_xmin;
T roi_height = roi_ymax - roi_ymin;
if (!aligned) {
roi_width = std::max(roi_width, static_cast<T>(1.));
roi_height = std::max(roi_height, static_cast<T>(1.));
}
T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
for (int c = 0; c < channels; ++c) {
......
......@@ -31,7 +31,8 @@ class TestDistributeFPNProposalsOp(OpTest):
'max_level': self.roi_max_level,
'min_level': self.roi_min_level,
'refer_scale': self.canonical_scale,
'refer_level': self.canonical_level
'refer_level': self.canonical_level,
'pixel_offset': self.pixel_offset,
}
output = [('out%d' % i, self.rois_fpn[i])
for i in range(len(self.rois_fpn))]
......@@ -47,10 +48,12 @@ class TestDistributeFPNProposalsOp(OpTest):
self.canonical_scale = 224
self.canonical_level = 4
self.images_shape = [512, 512]
self.pixel_offset = True
def boxes_area(self, boxes):
w = (boxes[:, 2] - boxes[:, 0] + 1)
h = (boxes[:, 3] - boxes[:, 1] + 1)
offset = 1 if self.pixel_offset else 0
w = (boxes[:, 2] - boxes[:, 0] + offset)
h = (boxes[:, 3] - boxes[:, 1] + offset)
areas = w * h
assert np.all(areas >= 0), 'Negative areas founds'
return areas
......@@ -59,7 +62,7 @@ class TestDistributeFPNProposalsOp(OpTest):
s = np.sqrt(self.boxes_area(rois))
s0 = self.canonical_scale
lvl0 = self.canonical_level
target_lvls = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
target_lvls = np.floor(lvl0 + np.log2(s / s0 + 1e-8))
target_lvls = np.clip(target_lvls, lvl_min, lvl_max)
return target_lvls
......@@ -131,7 +134,8 @@ class TestDistributeFPNProposalsOpWithRoisNum(TestDistributeFPNProposalsOp):
'max_level': self.roi_max_level,
'min_level': self.roi_min_level,
'refer_scale': self.canonical_scale,
'refer_level': self.canonical_level
'refer_level': self.canonical_level,
'pixel_offset': self.pixel_offset,
}
output = [('out%d' % i, self.rois_fpn[i])
for i in range(len(self.rois_fpn))]
......@@ -147,5 +151,16 @@ class TestDistributeFPNProposalsOpWithRoisNum(TestDistributeFPNProposalsOp):
}
class TestDistributeFPNProposalsOpNoOffset(
TestDistributeFPNProposalsOpWithRoisNum):
def init_test_case(self):
self.roi_max_level = 5
self.roi_min_level = 2
self.canonical_scale = 224
self.canonical_level = 4
self.images_shape = [512, 512]
self.pixel_offset = False
if __name__ == '__main__':
unittest.main()
......@@ -21,7 +21,6 @@ import math
import paddle
import paddle.fluid as fluid
from op_test import OpTest
from test_multiclass_nms_op import nms
from test_anchor_generator_op import anchor_generator_in_python
import copy
......@@ -111,18 +110,19 @@ def proposal_for_one_image(im_info, all_anchors, variances, bbox_deltas, scores,
return proposals, scores
def box_coder(all_anchors, bbox_deltas, variances):
def box_coder(all_anchors, bbox_deltas, variances, pixel_offset=True):
"""
Decode proposals by anchors and bbox_deltas from RPN
"""
offset = 1 if pixel_offset else 0
#proposals: xmin, ymin, xmax, ymax
proposals = np.zeros_like(bbox_deltas, dtype=np.float32)
#anchor_loc: width, height, center_x, center_y
anchor_loc = np.zeros_like(bbox_deltas, dtype=np.float32)
anchor_loc[:, 0] = all_anchors[:, 2] - all_anchors[:, 0] + 1
anchor_loc[:, 1] = all_anchors[:, 3] - all_anchors[:, 1] + 1
anchor_loc[:, 0] = all_anchors[:, 2] - all_anchors[:, 0] + offset
anchor_loc[:, 1] = all_anchors[:, 3] - all_anchors[:, 1] + offset
anchor_loc[:, 2] = all_anchors[:, 0] + 0.5 * anchor_loc[:, 0]
anchor_loc[:, 3] = all_anchors[:, 1] + 0.5 * anchor_loc[:, 1]
......@@ -152,51 +152,60 @@ def box_coder(all_anchors, bbox_deltas, variances):
pred_bbox[i, 3] = math.exp(
min(bbox_deltas[i, 3], math.log(1000 / 16.0))) * anchor_loc[i,
1]
proposals[:, 0] = pred_bbox[:, 0] - pred_bbox[:, 2] / 2
proposals[:, 1] = pred_bbox[:, 1] - pred_bbox[:, 3] / 2
proposals[:, 2] = pred_bbox[:, 0] + pred_bbox[:, 2] / 2 - 1
proposals[:, 3] = pred_bbox[:, 1] + pred_bbox[:, 3] / 2 - 1
proposals[:, 2] = pred_bbox[:, 0] + pred_bbox[:, 2] / 2 - offset
proposals[:, 3] = pred_bbox[:, 1] + pred_bbox[:, 3] / 2 - offset
return proposals
def clip_tiled_boxes(boxes, im_shape):
def clip_tiled_boxes(boxes, im_shape, pixel_offset=True):
"""Clip boxes to image boundaries. im_shape is [height, width] and boxes
has shape (N, 4 * num_tiled_boxes)."""
assert boxes.shape[1] % 4 == 0, \
'boxes.shape[1] is {:d}, but must be divisible by 4.'.format(
boxes.shape[1]
)
offset = 1 if pixel_offset else 0
# x1 >= 0
boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
boxes[:, 0::4] = np.maximum(
np.minimum(boxes[:, 0::4], im_shape[1] - offset), 0)
# y1 >= 0
boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
boxes[:, 1::4] = np.maximum(
np.minimum(boxes[:, 1::4], im_shape[0] - offset), 0)
# x2 < im_shape[1]
boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
boxes[:, 2::4] = np.maximum(
np.minimum(boxes[:, 2::4], im_shape[1] - offset), 0)
# y2 < im_shape[0]
boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
boxes[:, 3::4] = np.maximum(
np.minimum(boxes[:, 3::4], im_shape[0] - offset), 0)
return boxes
def filter_boxes(boxes, min_size, im_info):
def filter_boxes(boxes, min_size, im_info, pixel_offset=True):
"""Only keep boxes with both sides >= min_size and center within the image.
"""
# Scale min_size to match image scale
im_scale = im_info[2]
min_size = max(min_size, 1.0)
ws = boxes[:, 2] - boxes[:, 0] + 1
hs = boxes[:, 3] - boxes[:, 1] + 1
offset = 1 if pixel_offset else 0
ws = boxes[:, 2] - boxes[:, 0] + offset
hs = boxes[:, 3] - boxes[:, 1] + offset
if pixel_offset:
ws_orig_scale = (boxes[:, 2] - boxes[:, 0]) / im_scale + 1
hs_orig_scale = (boxes[:, 3] - boxes[:, 1]) / im_scale + 1
x_ctr = boxes[:, 0] + ws / 2.
y_ctr = boxes[:, 1] + hs / 2.
keep = np.where((ws_orig_scale >= min_size) & (hs_orig_scale >= min_size) &
(x_ctr < im_info[1]) & (y_ctr < im_info[0]))[0]
keep = np.where((ws_orig_scale >= min_size) & (
hs_orig_scale >= min_size) & (x_ctr < im_info[1]) & (y_ctr <
im_info[0]))[0]
else:
keep = np.where((ws >= min_size) & (hs >= min_size))[0]
return keep
def iou(box_a, box_b):
def iou(box_a, box_b, pixel_offset=True):
"""
Apply intersection-over-union overlap between box_a and box_b
"""
......@@ -209,9 +218,9 @@ def iou(box_a, box_b):
ymin_b = min(box_b[1], box_b[3])
xmax_b = max(box_b[0], box_b[2])
ymax_b = max(box_b[1], box_b[3])
area_a = (ymax_a - ymin_a + 1) * (xmax_a - xmin_a + 1)
area_b = (ymax_b - ymin_b + 1) * (xmax_b - xmin_b + 1)
offset = 1 if pixel_offset else 0
area_a = (ymax_a - ymin_a + offset) * (xmax_a - xmin_a + offset)
area_b = (ymax_b - ymin_b + offset) * (xmax_b - xmin_b + offset)
if area_a <= 0 and area_b <= 0:
return 0.0
......@@ -220,14 +229,14 @@ def iou(box_a, box_b):
xb = min(xmax_a, xmax_b)
yb = min(ymax_a, ymax_b)
inter_area = max(xb - xa + 1, 0.0) * max(yb - ya + 1, 0.0)
inter_area = max(xb - xa + offset, 0.0) * max(yb - ya + offset, 0.0)
iou_ratio = inter_area / (area_a + area_b - inter_area)
return iou_ratio
def nms(boxes, scores, nms_threshold, eta=1.0):
def nms(boxes, scores, nms_threshold, eta=1.0, pixel_offset=True):
"""Apply non-maximum suppression at test time to avoid detecting too many
overlapping bounding boxes for a given object.
Args:
......@@ -252,7 +261,9 @@ def nms(boxes, scores, nms_threshold, eta=1.0):
for k in range(len(selected_indices)):
if keep:
kept_idx = selected_indices[k]
overlap = iou(boxes[idx], boxes[kept_idx])
overlap = iou(boxes[idx],
boxes[kept_idx],
pixel_offset=pixel_offset)
keep = True if overlap <= adaptive_threshold else False
else:
break
......
......@@ -21,7 +21,6 @@ import math
import paddle
import paddle.fluid as fluid
from op_test import OpTest
from test_multiclass_nms_op import nms
from test_anchor_generator_op import anchor_generator_in_python
import copy
from test_generate_proposals_op import clip_tiled_boxes, box_coder, nms
......@@ -29,7 +28,7 @@ from test_generate_proposals_op import clip_tiled_boxes, box_coder, nms
def generate_proposals_v2_in_python(scores, bbox_deltas, im_shape, anchors,
variances, pre_nms_topN, post_nms_topN,
nms_thresh, min_size, eta):
nms_thresh, min_size, eta, pixel_offset):
all_anchors = anchors.reshape(-1, 4)
rois = np.empty((0, 5), dtype=np.float32)
roi_probs = np.empty((0, 1), dtype=np.float32)
......@@ -42,7 +41,8 @@ def generate_proposals_v2_in_python(scores, bbox_deltas, im_shape, anchors,
img_i_boxes, img_i_probs = proposal_for_one_image(
im_shape[img_idx, :], all_anchors, variances,
bbox_deltas[img_idx, :, :, :], scores[img_idx, :, :, :],
pre_nms_topN, post_nms_topN, nms_thresh, min_size, eta)
pre_nms_topN, post_nms_topN, nms_thresh, min_size, eta,
pixel_offset)
rois_num.append(img_i_probs.shape[0])
rpn_rois.append(img_i_boxes)
rpn_roi_probs.append(img_i_probs)
......@@ -52,7 +52,7 @@ def generate_proposals_v2_in_python(scores, bbox_deltas, im_shape, anchors,
def proposal_for_one_image(im_shape, all_anchors, variances, bbox_deltas,
scores, pre_nms_topN, post_nms_topN, nms_thresh,
min_size, eta):
min_size, eta, pixel_offset):
# Transpose and reshape predicted bbox transformations to get them
# into the same order as the anchors:
# - bbox deltas will be (4 * A, H, W) format from conv output
......@@ -83,12 +83,12 @@ def proposal_for_one_image(im_shape, all_anchors, variances, bbox_deltas,
scores = scores[order, :]
bbox_deltas = bbox_deltas[order, :]
all_anchors = all_anchors[order, :]
proposals = box_coder(all_anchors, bbox_deltas, variances)
proposals = box_coder(all_anchors, bbox_deltas, variances, pixel_offset)
# clip proposals to image (may result in proposals with zero area
# that will be removed in the next step)
proposals = clip_tiled_boxes(proposals, im_shape)
proposals = clip_tiled_boxes(proposals, im_shape, pixel_offset)
# remove predicted boxes with height or width < min_size
keep = filter_boxes(proposals, min_size, im_shape)
keep = filter_boxes(proposals, min_size, im_shape, pixel_offset)
if len(keep) == 0:
proposals = np.zeros((1, 4)).astype('float32')
scores = np.zeros((1, 1)).astype('float32')
......@@ -103,7 +103,8 @@ def proposal_for_one_image(im_shape, all_anchors, variances, bbox_deltas,
keep = nms(boxes=proposals,
scores=scores,
nms_threshold=nms_thresh,
eta=eta)
eta=eta,
pixel_offset=pixel_offset)
if post_nms_topN > 0 and post_nms_topN < len(keep):
keep = keep[:post_nms_topN]
proposals = proposals[keep, :]
......@@ -112,17 +113,21 @@ def proposal_for_one_image(im_shape, all_anchors, variances, bbox_deltas,
return proposals, scores
def filter_boxes(boxes, min_size, im_shape):
def filter_boxes(boxes, min_size, im_shape, pixel_offset=True):
"""Only keep boxes with both sides >= min_size and center within the image.
"""
# Scale min_size to match image scale
min_size = max(min_size, 1.0)
ws = boxes[:, 2] - boxes[:, 0] + 1
hs = boxes[:, 3] - boxes[:, 1] + 1
offset = 1 if pixel_offset else 0
ws = boxes[:, 2] - boxes[:, 0] + offset
hs = boxes[:, 3] - boxes[:, 1] + offset
if pixel_offset:
x_ctr = boxes[:, 0] + ws / 2.
y_ctr = boxes[:, 1] + hs / 2.
keep = np.where((ws >= min_size) & (hs >= min_size) & (x_ctr < im_shape[1])
& (y_ctr < im_shape[0]))[0]
keep = np.where((ws >= min_size) & (hs >= min_size) & (x_ctr < im_shape[
1]) & (y_ctr < im_shape[0]))[0]
else:
keep = np.where((ws >= min_size) & (hs >= min_size))[0]
return keep
......@@ -144,7 +149,8 @@ class TestGenerateProposalsV2Op(OpTest):
'post_nms_topN': self.post_nms_topN,
'nms_thresh': self.nms_thresh,
'min_size': self.min_size,
'eta': self.eta
'eta': self.eta,
'pixel_offset': self.pixel_offset,
}
self.outputs = {
......@@ -165,6 +171,7 @@ class TestGenerateProposalsV2Op(OpTest):
self.nms_thresh = 0.7
self.min_size = 3.0
self.eta = 1.
self.pixel_offset = True
def init_test_input(self):
batch_size = 1
......@@ -191,7 +198,7 @@ class TestGenerateProposalsV2Op(OpTest):
self.rpn_rois, self.rpn_roi_probs, self.rois_num = generate_proposals_v2_in_python(
self.scores, self.bbox_deltas, self.im_shape, self.anchors,
self.variances, self.pre_nms_topN, self.post_nms_topN,
self.nms_thresh, self.min_size, self.eta)
self.nms_thresh, self.min_size, self.eta, self.pixel_offset)
class TestGenerateProposalsV2OutLodOp(TestGenerateProposalsV2Op):
......@@ -231,6 +238,17 @@ class TestGenerateProposalsV2OpNoBoxLeft(TestGenerateProposalsV2Op):
self.nms_thresh = 0.7
self.min_size = 1000.0
self.eta = 1.
self.pixel_offset = True
class TestGenerateProposalsV2OpNoOffset(TestGenerateProposalsV2Op):
def init_test_params(self):
self.pre_nms_topN = 12000 # train 12000, test 2000
self.post_nms_topN = 5000 # train 6000, test 1000
self.nms_thresh = 0.7
self.min_size = 3.0
self.eta = 1.
self.pixel_offset = False
if __name__ == '__main__':
......
......@@ -35,7 +35,8 @@ class TestROIAlignOp(OpTest):
'spatial_scale': self.spatial_scale,
'pooled_height': self.pooled_height,
'pooled_width': self.pooled_width,
'sampling_ratio': self.sampling_ratio
'sampling_ratio': self.sampling_ratio,
'aligned': self.aligned,
}
self.outputs = {'Out': self.out_data}
......@@ -53,6 +54,7 @@ class TestROIAlignOp(OpTest):
self.pooled_height = 2
self.pooled_width = 2
self.sampling_ratio = -1
self.aligned = False
self.x = np.random.random(self.x_dim).astype('float64')
......@@ -115,16 +117,21 @@ class TestROIAlignOp(OpTest):
(self.rois_num, self.channels, self.pooled_height,
self.pooled_width)).astype('float64')
offset = 0.5 if self.aligned else 0.
for i in range(self.rois_num):
roi = self.rois[i]
roi_batch_id = int(roi[0])
x_i = self.x[roi_batch_id]
roi_xmin = roi[1] * self.spatial_scale
roi_ymin = roi[2] * self.spatial_scale
roi_xmax = roi[3] * self.spatial_scale
roi_ymax = roi[4] * self.spatial_scale
roi_width = max(roi_xmax - roi_xmin, 1)
roi_height = max(roi_ymax - roi_ymin, 1)
roi_xmin = roi[1] * self.spatial_scale - offset
roi_ymin = roi[2] * self.spatial_scale - offset
roi_xmax = roi[3] * self.spatial_scale - offset
roi_ymax = roi[4] * self.spatial_scale - offset
roi_width = roi_xmax - roi_xmin
roi_height = roi_ymax - roi_ymin
if not self.aligned:
roi_width = max(roi_width, 1)
roi_height = max(roi_height, 1)
bin_size_h = float(roi_height) / float(self.pooled_height)
bin_size_w = float(roi_width) / float(self.pooled_width)
roi_bin_grid_h = self.sampling_ratio if self.sampling_ratio > 0 else \
......@@ -192,11 +199,31 @@ class TestROIAlignInLodOp(TestROIAlignOp):
'spatial_scale': self.spatial_scale,
'pooled_height': self.pooled_height,
'pooled_width': self.pooled_width,
'sampling_ratio': self.sampling_ratio
'sampling_ratio': self.sampling_ratio,
'aligned': self.aligned
}
self.outputs = {'Out': self.out_data}
class TestROIAlignOpWithAligned(TestROIAlignOp):
def init_test_case(self):
self.batch_size = 3
self.channels = 3
self.height = 8
self.width = 6
# n, c, h, w
self.x_dim = (self.batch_size, self.channels, self.height, self.width)
self.spatial_scale = 1.0 / 2.0
self.pooled_height = 2
self.pooled_width = 2
self.sampling_ratio = -1
self.aligned = True
self.x = np.random.random(self.x_dim).astype('float64')
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册