未验证 提交 566c80ff 编写于 作者: Z zhiboniu 提交者: GitHub

Phi generate_proposals_v2 (#44436)

* phi_generate_proposals_v2

* remove old kernels

* optest add eager_check

* del lod

* update

* update

* update test_detection with_lod

* update nms_util

* remove old nms_util.h
上级 e9994f2e
......@@ -93,7 +93,7 @@ if(WITH_GPU OR WITH_ROCM)
detection_library(generate_proposals_op SRCS generate_proposals_op.cc
generate_proposals_op.cu DEPS ${TMPDEPS})
detection_library(generate_proposals_v2_op SRCS generate_proposals_v2_op.cc
generate_proposals_v2_op.cu DEPS ${TMPDEPS})
DEPS ${TMPDEPS})
detection_library(
distribute_fpn_proposals_op SRCS distribute_fpn_proposals_op.cc
distribute_fpn_proposals_op.cu DEPS ${TMPDEPS})
......
......@@ -20,7 +20,7 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/operators/detection/bbox_util.h"
#include "paddle/fluid/operators/detection/nms_util.h"
#include "paddle/phi/kernels/funcs/detection/nms_util.h"
#include "paddle/phi/kernels/funcs/gather.h"
#include "paddle/phi/kernels/funcs/math_function.h"
......@@ -251,7 +251,8 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
return std::make_pair(bbox_sel, scores_filter);
}
Tensor keep_nms = NMS<T>(ctx, &bbox_sel, &scores_filter, nms_thresh, eta);
Tensor keep_nms =
phi::funcs::NMS<T>(ctx, &bbox_sel, &scores_filter, nms_thresh, eta);
if (post_nms_top_n > 0 && post_nms_top_n < keep_nms.numel()) {
keep_nms.Resize({post_nms_top_n});
......
......@@ -17,10 +17,12 @@ limitations under the License. */
#include <string>
#include <vector>
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/operators/detection/bbox_util.h"
#include "paddle/fluid/operators/detection/nms_util.h"
#include "paddle/phi/infermeta/multiary.h"
#include "paddle/phi/kernels/funcs/detection/nms_util.h"
#include "paddle/phi/kernels/funcs/gather.h"
#include "paddle/phi/kernels/funcs/math_function.h"
......@@ -34,36 +36,6 @@ class GenerateProposalsV2Op : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE_EQ(
ctx->HasInput("Scores"),
true,
platform::errors::NotFound("Input(Scores) shouldn't be null."));
PADDLE_ENFORCE_EQ(
ctx->HasInput("BboxDeltas"),
true,
platform::errors::NotFound("Input(BboxDeltas) shouldn't be null."));
PADDLE_ENFORCE_EQ(
ctx->HasInput("ImShape"),
true,
platform::errors::NotFound("Input(ImShape) shouldn't be null."));
PADDLE_ENFORCE_EQ(
ctx->HasInput("Anchors"),
true,
platform::errors::NotFound("Input(Anchors) shouldn't be null."));
PADDLE_ENFORCE_EQ(
ctx->HasInput("Variances"),
true,
platform::errors::NotFound("Input(Variances) shouldn't be null."));
ctx->SetOutputDim("RpnRois", {-1, 4});
ctx->SetOutputDim("RpnRoiProbs", {-1, 1});
if (!ctx->IsRuntime()) {
ctx->SetLoDLevel("RpnRois", std::max(ctx->GetLoDLevel("Scores"), 1));
ctx->SetLoDLevel("RpnRoiProbs", std::max(ctx->GetLoDLevel("Scores"), 1));
}
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext &ctx) const override {
......@@ -73,206 +45,6 @@ class GenerateProposalsV2Op : public framework::OperatorWithKernel {
}
};
template <typename T>
class GenerateProposalsV2Kernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &context) const override {
auto *scores = context.Input<Tensor>("Scores");
auto *bbox_deltas = context.Input<Tensor>("BboxDeltas");
auto *im_shape = context.Input<Tensor>("ImShape");
auto anchors = GET_DATA_SAFELY(context.Input<Tensor>("Anchors"),
"Input",
"Anchors",
"GenerateProposals");
auto variances = GET_DATA_SAFELY(context.Input<Tensor>("Variances"),
"Input",
"Variances",
"GenerateProposals");
auto *rpn_rois = context.Output<LoDTensor>("RpnRois");
auto *rpn_roi_probs = context.Output<LoDTensor>("RpnRoiProbs");
int pre_nms_top_n = context.Attr<int>("pre_nms_topN");
int post_nms_top_n = context.Attr<int>("post_nms_topN");
float nms_thresh = context.Attr<float>("nms_thresh");
float min_size = context.Attr<float>("min_size");
float eta = context.Attr<float>("eta");
bool pixel_offset = context.Attr<bool>("pixel_offset");
auto &dev_ctx = context.template device_context<phi::CPUContext>();
auto &scores_dim = scores->dims();
int64_t num = scores_dim[0];
int64_t c_score = scores_dim[1];
int64_t h_score = scores_dim[2];
int64_t w_score = scores_dim[3];
auto &bbox_dim = bbox_deltas->dims();
int64_t c_bbox = bbox_dim[1];
int64_t h_bbox = bbox_dim[2];
int64_t w_bbox = bbox_dim[3];
rpn_rois->mutable_data<T>({bbox_deltas->numel() / 4, 4},
context.GetPlace());
rpn_roi_probs->mutable_data<T>({scores->numel(), 1}, context.GetPlace());
Tensor bbox_deltas_swap, scores_swap;
bbox_deltas_swap.mutable_data<T>({num, h_bbox, w_bbox, c_bbox},
dev_ctx.GetPlace());
scores_swap.mutable_data<T>({num, h_score, w_score, c_score},
dev_ctx.GetPlace());
phi::funcs::Transpose<phi::CPUContext, T, 4> trans;
std::vector<int> axis = {0, 2, 3, 1};
trans(dev_ctx, *bbox_deltas, &bbox_deltas_swap, axis);
trans(dev_ctx, *scores, &scores_swap, axis);
framework::LoD lod;
lod.resize(1);
auto &lod0 = lod[0];
lod0.push_back(0);
anchors.Resize({anchors.numel() / 4, 4});
variances.Resize({variances.numel() / 4, 4});
std::vector<int> tmp_num;
int64_t num_proposals = 0;
for (int64_t i = 0; i < num; ++i) {
Tensor im_shape_slice = im_shape->Slice(i, i + 1);
Tensor bbox_deltas_slice = bbox_deltas_swap.Slice(i, i + 1);
Tensor scores_slice = scores_swap.Slice(i, i + 1);
bbox_deltas_slice.Resize({h_bbox * w_bbox * c_bbox / 4, 4});
scores_slice.Resize({h_score * w_score * c_score, 1});
std::pair<Tensor, Tensor> tensor_pair =
ProposalForOneImage(dev_ctx,
im_shape_slice,
anchors,
variances,
bbox_deltas_slice,
scores_slice,
pre_nms_top_n,
post_nms_top_n,
nms_thresh,
min_size,
eta,
pixel_offset);
Tensor &proposals = tensor_pair.first;
Tensor &scores = tensor_pair.second;
AppendProposals(rpn_rois, 4 * num_proposals, proposals);
AppendProposals(rpn_roi_probs, num_proposals, scores);
num_proposals += proposals.dims()[0];
lod0.push_back(num_proposals);
tmp_num.push_back(proposals.dims()[0]);
}
if (context.HasOutput("RpnRoisNum")) {
auto *rpn_rois_num = context.Output<Tensor>("RpnRoisNum");
rpn_rois_num->mutable_data<int>({num}, context.GetPlace());
int *num_data = rpn_rois_num->data<int>();
for (int i = 0; i < num; i++) {
num_data[i] = tmp_num[i];
}
rpn_rois_num->Resize({num});
}
rpn_rois->set_lod(lod);
rpn_roi_probs->set_lod(lod);
rpn_rois->Resize({num_proposals, 4});
rpn_roi_probs->Resize({num_proposals, 1});
}
std::pair<Tensor, Tensor> ProposalForOneImage(
const phi::CPUContext &ctx,
const Tensor &im_shape_slice,
const Tensor &anchors,
const Tensor &variances,
const Tensor &bbox_deltas_slice, // [M, 4]
const Tensor &scores_slice, // [N, 1]
int pre_nms_top_n,
int post_nms_top_n,
float nms_thresh,
float min_size,
float eta,
bool pixel_offset = true) const {
auto *scores_data = scores_slice.data<T>();
// Sort index
Tensor index_t;
index_t.Resize({scores_slice.numel()});
int *index = index_t.mutable_data<int>(ctx.GetPlace());
for (int i = 0; i < scores_slice.numel(); ++i) {
index[i] = i;
}
auto compare = [scores_data](const int64_t &i, const int64_t &j) {
return scores_data[i] > scores_data[j];
};
if (pre_nms_top_n <= 0 || pre_nms_top_n >= scores_slice.numel()) {
std::sort(index, index + scores_slice.numel(), compare);
} else {
std::nth_element(
index, index + pre_nms_top_n, index + scores_slice.numel(), compare);
index_t.Resize({pre_nms_top_n});
}
Tensor scores_sel, bbox_sel, anchor_sel, var_sel;
scores_sel.mutable_data<T>({index_t.numel(), 1}, ctx.GetPlace());
bbox_sel.mutable_data<T>({index_t.numel(), 4}, ctx.GetPlace());
anchor_sel.mutable_data<T>({index_t.numel(), 4}, ctx.GetPlace());
var_sel.mutable_data<T>({index_t.numel(), 4}, ctx.GetPlace());
phi::funcs::CPUGather<T>(ctx, scores_slice, index_t, &scores_sel);
phi::funcs::CPUGather<T>(ctx, bbox_deltas_slice, index_t, &bbox_sel);
phi::funcs::CPUGather<T>(ctx, anchors, index_t, &anchor_sel);
phi::funcs::CPUGather<T>(ctx, variances, index_t, &var_sel);
Tensor proposals;
proposals.mutable_data<T>({index_t.numel(), 4}, ctx.GetPlace());
BoxCoder<T>(
ctx, &anchor_sel, &bbox_sel, &var_sel, &proposals, pixel_offset);
ClipTiledBoxes<T>(
ctx, im_shape_slice, proposals, &proposals, false, pixel_offset);
Tensor keep;
FilterBoxes<T>(
ctx, &proposals, min_size, im_shape_slice, false, &keep, pixel_offset);
// Handle the case when there is no keep index left
if (keep.numel() == 0) {
phi::funcs::SetConstant<phi::CPUContext, T> set_zero;
bbox_sel.mutable_data<T>({1, 4}, ctx.GetPlace());
set_zero(ctx, &bbox_sel, static_cast<T>(0));
Tensor scores_filter;
scores_filter.mutable_data<T>({1, 1}, ctx.GetPlace());
set_zero(ctx, &scores_filter, static_cast<T>(0));
return std::make_pair(bbox_sel, scores_filter);
}
Tensor scores_filter;
bbox_sel.mutable_data<T>({keep.numel(), 4}, ctx.GetPlace());
scores_filter.mutable_data<T>({keep.numel(), 1}, ctx.GetPlace());
phi::funcs::CPUGather<T>(ctx, proposals, keep, &bbox_sel);
phi::funcs::CPUGather<T>(ctx, scores_sel, keep, &scores_filter);
if (nms_thresh <= 0) {
return std::make_pair(bbox_sel, scores_filter);
}
Tensor keep_nms =
NMS<T>(ctx, &bbox_sel, &scores_filter, nms_thresh, eta, pixel_offset);
if (post_nms_top_n > 0 && post_nms_top_n < keep_nms.numel()) {
keep_nms.Resize({post_nms_top_n});
}
proposals.mutable_data<T>({keep_nms.numel(), 4}, ctx.GetPlace());
scores_sel.mutable_data<T>({keep_nms.numel(), 1}, ctx.GetPlace());
phi::funcs::CPUGather<T>(ctx, bbox_sel, keep_nms, &proposals);
phi::funcs::CPUGather<T>(ctx, scores_filter, keep_nms, &scores_sel);
return std::make_pair(proposals, scores_sel);
}
};
class GenerateProposalsV2OpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
......@@ -336,16 +108,19 @@ to before and will not effect the result.
} // namespace operators
} // namespace paddle
DECLARE_INFER_SHAPE_FUNCTOR(generate_proposals_v2,
GenerateProposalsV2InferShapeFunctor,
PD_INFER_META(phi::GenerateProposalsV2InferMeta));
namespace ops = paddle::operators;
REGISTER_OPERATOR(
generate_proposals_v2,
ops::GenerateProposalsV2Op,
ops::GenerateProposalsV2OpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(generate_proposals_v2,
ops::GenerateProposalsV2Kernel<float>,
ops::GenerateProposalsV2Kernel<double>);
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>,
GenerateProposalsV2InferShapeFunctor);
REGISTER_OP_VERSION(generate_proposals_v2)
.AddCheckpoint(
R"ROC(Registe generate_proposals_v2 for adding the attribute of pixel_offset)ROC",
......
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <paddle/fluid/memory/allocation/allocator.h>
#include <stdio.h>
#include <string>
#include <vector>
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/memory/memory.h"
#include "paddle/fluid/operators/detection/bbox_util.cu.h"
#include "paddle/phi/kernels/funcs/gather.cu.h"
#include "paddle/phi/kernels/funcs/math_function.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;
namespace {
template <typename T>
static std::pair<Tensor, Tensor> ProposalForOneImage(
const phi::GPUContext &ctx,
const Tensor &im_shape,
const Tensor &anchors,
const Tensor &variances,
const Tensor &bbox_deltas, // [M, 4]
const Tensor &scores, // [N, 1]
int pre_nms_top_n,
int post_nms_top_n,
float nms_thresh,
float min_size,
float eta,
bool pixel_offset) {
// 1. pre nms
Tensor scores_sort, index_sort;
SortDescending<T>(ctx, scores, &scores_sort, &index_sort);
int num = scores.numel();
int pre_nms_num = (pre_nms_top_n <= 0 || pre_nms_top_n > num) ? scores.numel()
: pre_nms_top_n;
scores_sort.Resize({pre_nms_num, 1});
index_sort.Resize({pre_nms_num, 1});
// 2. box decode and clipping
Tensor proposals;
proposals.mutable_data<T>({pre_nms_num, 4}, ctx.GetPlace());
{
platform::ForRange<phi::GPUContext> for_range(ctx, pre_nms_num);
for_range(BoxDecodeAndClipFunctor<T>{anchors.data<T>(),
bbox_deltas.data<T>(),
variances.data<T>(),
index_sort.data<int>(),
im_shape.data<T>(),
proposals.data<T>(),
pixel_offset});
}
// 3. filter
Tensor keep_index, keep_num_t;
keep_index.mutable_data<int>({pre_nms_num}, ctx.GetPlace());
keep_num_t.mutable_data<int>({1}, ctx.GetPlace());
min_size = std::max(min_size, 1.0f);
auto stream = ctx.stream();
FilterBBoxes<T, 512><<<1, 512, 0, stream>>>(proposals.data<T>(),
im_shape.data<T>(),
min_size,
pre_nms_num,
keep_num_t.data<int>(),
keep_index.data<int>(),
false,
pixel_offset);
int keep_num;
const auto gpu_place = ctx.GetPlace();
memory::Copy(platform::CPUPlace(),
&keep_num,
gpu_place,
keep_num_t.data<int>(),
sizeof(int),
ctx.stream());
ctx.Wait();
keep_index.Resize({keep_num});
Tensor scores_filter, proposals_filter;
// Handle the case when there is no keep index left
if (keep_num == 0) {
phi::funcs::SetConstant<phi::GPUContext, T> set_zero;
proposals_filter.mutable_data<T>({1, 4}, ctx.GetPlace());
scores_filter.mutable_data<T>({1, 1}, ctx.GetPlace());
set_zero(ctx, &proposals_filter, static_cast<T>(0));
set_zero(ctx, &scores_filter, static_cast<T>(0));
return std::make_pair(proposals_filter, scores_filter);
}
proposals_filter.mutable_data<T>({keep_num, 4}, ctx.GetPlace());
scores_filter.mutable_data<T>({keep_num, 1}, ctx.GetPlace());
phi::funcs::GPUGather<T>(ctx, proposals, keep_index, &proposals_filter);
phi::funcs::GPUGather<T>(ctx, scores_sort, keep_index, &scores_filter);
if (nms_thresh <= 0) {
return std::make_pair(proposals_filter, scores_filter);
}
// 4. nms
Tensor keep_nms;
NMS<T>(
ctx, proposals_filter, keep_index, nms_thresh, &keep_nms, pixel_offset);
if (post_nms_top_n > 0 && post_nms_top_n < keep_nms.numel()) {
keep_nms.Resize({post_nms_top_n});
}
Tensor scores_nms, proposals_nms;
proposals_nms.mutable_data<T>({keep_nms.numel(), 4}, ctx.GetPlace());
scores_nms.mutable_data<T>({keep_nms.numel(), 1}, ctx.GetPlace());
phi::funcs::GPUGather<T>(ctx, proposals_filter, keep_nms, &proposals_nms);
phi::funcs::GPUGather<T>(ctx, scores_filter, keep_nms, &scores_nms);
return std::make_pair(proposals_nms, scores_nms);
}
} // namespace
template <typename DeviceContext, typename T>
class CUDAGenerateProposalsV2Kernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &context) const override {
auto *scores = context.Input<Tensor>("Scores");
auto *bbox_deltas = context.Input<Tensor>("BboxDeltas");
auto *im_shape = context.Input<Tensor>("ImShape");
auto anchors = GET_DATA_SAFELY(context.Input<Tensor>("Anchors"),
"Input",
"Anchors",
"GenerateProposals");
auto variances = GET_DATA_SAFELY(context.Input<Tensor>("Variances"),
"Input",
"Variances",
"GenerateProposals");
auto *rpn_rois = context.Output<LoDTensor>("RpnRois");
auto *rpn_roi_probs = context.Output<LoDTensor>("RpnRoiProbs");
int pre_nms_top_n = context.Attr<int>("pre_nms_topN");
int post_nms_top_n = context.Attr<int>("post_nms_topN");
float nms_thresh = context.Attr<float>("nms_thresh");
float min_size = context.Attr<float>("min_size");
float eta = context.Attr<float>("eta");
bool pixel_offset = context.Attr<bool>("pixel_offset");
PADDLE_ENFORCE_GE(eta,
1.,
platform::errors::InvalidArgument(
"Not support adaptive NMS. The attribute 'eta' "
"should not less than 1. But received eta=[%d]",
eta));
auto &dev_ctx = context.template device_context<DeviceContext>();
auto scores_dim = scores->dims();
int64_t num = scores_dim[0];
int64_t c_score = scores_dim[1];
int64_t h_score = scores_dim[2];
int64_t w_score = scores_dim[3];
auto bbox_dim = bbox_deltas->dims();
int64_t c_bbox = bbox_dim[1];
int64_t h_bbox = bbox_dim[2];
int64_t w_bbox = bbox_dim[3];
Tensor bbox_deltas_swap, scores_swap;
bbox_deltas_swap.mutable_data<T>({num, h_bbox, w_bbox, c_bbox},
dev_ctx.GetPlace());
scores_swap.mutable_data<T>({num, h_score, w_score, c_score},
dev_ctx.GetPlace());
phi::funcs::Transpose<DeviceContext, T, 4> trans;
std::vector<int> axis = {0, 2, 3, 1};
trans(dev_ctx, *bbox_deltas, &bbox_deltas_swap, axis);
trans(dev_ctx, *scores, &scores_swap, axis);
anchors.Resize({anchors.numel() / 4, 4});
variances.Resize({variances.numel() / 4, 4});
rpn_rois->mutable_data<T>({bbox_deltas->numel() / 4, 4},
context.GetPlace());
rpn_roi_probs->mutable_data<T>({scores->numel(), 1}, context.GetPlace());
T *rpn_rois_data = rpn_rois->data<T>();
T *rpn_roi_probs_data = rpn_roi_probs->data<T>();
auto place = dev_ctx.GetPlace();
auto cpu_place = platform::CPUPlace();
int64_t num_proposals = 0;
std::vector<size_t> offset(1, 0);
std::vector<int> tmp_num;
for (int64_t i = 0; i < num; ++i) {
Tensor im_shape_slice = im_shape->Slice(i, i + 1);
Tensor bbox_deltas_slice = bbox_deltas_swap.Slice(i, i + 1);
Tensor scores_slice = scores_swap.Slice(i, i + 1);
bbox_deltas_slice.Resize({h_bbox * w_bbox * c_bbox / 4, 4});
scores_slice.Resize({h_score * w_score * c_score, 1});
std::pair<Tensor, Tensor> box_score_pair =
ProposalForOneImage<T>(dev_ctx,
im_shape_slice,
anchors,
variances,
bbox_deltas_slice,
scores_slice,
pre_nms_top_n,
post_nms_top_n,
nms_thresh,
min_size,
eta,
pixel_offset);
Tensor &proposals = box_score_pair.first;
Tensor &scores = box_score_pair.second;
memory::Copy(place,
rpn_rois_data + num_proposals * 4,
place,
proposals.data<T>(),
sizeof(T) * proposals.numel(),
dev_ctx.stream());
memory::Copy(place,
rpn_roi_probs_data + num_proposals,
place,
scores.data<T>(),
sizeof(T) * scores.numel(),
dev_ctx.stream());
dev_ctx.Wait();
num_proposals += proposals.dims()[0];
offset.emplace_back(num_proposals);
tmp_num.push_back(proposals.dims()[0]);
}
if (context.HasOutput("RpnRoisNum")) {
auto *rpn_rois_num = context.Output<Tensor>("RpnRoisNum");
rpn_rois_num->mutable_data<int>({num}, context.GetPlace());
int *num_data = rpn_rois_num->data<int>();
memory::Copy(place,
num_data,
cpu_place,
&tmp_num[0],
sizeof(int) * num,
dev_ctx.stream());
rpn_rois_num->Resize({num});
}
framework::LoD lod;
lod.emplace_back(offset);
rpn_rois->set_lod(lod);
rpn_roi_probs->set_lod(lod);
rpn_rois->Resize({num_proposals, 4});
rpn_roi_probs->Resize({num_proposals, 1});
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
generate_proposals_v2,
ops::CUDAGenerateProposalsV2Kernel<phi::GPUContext, float>);
......@@ -14,7 +14,7 @@ limitations under the License. */
#include <glog/logging.h>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/detection/nms_util.h"
#include "paddle/phi/kernels/funcs/detection/nms_util.h"
namespace paddle {
namespace operators {
......@@ -118,15 +118,15 @@ void GetMaxScoreIndexWithLocalityAware(
if (index > -1) {
T overlap = T(0.);
if (box_size == 4) {
overlap = JaccardOverlap<T>(
overlap = phi::funcs::JaccardOverlap<T>(
bbox_data + i * box_size, bbox_data + index * box_size, normalized);
}
// 8: [x1 y1 x2 y2 x3 y3 x4 y4] or 16, 24, 32
if (box_size == 8 || box_size == 16 || box_size == 24 || box_size == 32) {
overlap = PolyIoU<T>(bbox_data + i * box_size,
bbox_data + index * box_size,
box_size,
normalized);
overlap = phi::funcs::PolyIoU<T>(bbox_data + i * box_size,
bbox_data + index * box_size,
box_size,
normalized);
}
if (overlap > nms_threshold) {
......@@ -156,7 +156,7 @@ void GetMaxScoreIndexWithLocalityAware(
// Sort the score pair according to the scores in descending order
std::stable_sort(sorted_indices->begin(),
sorted_indices->end(),
SortScorePairDescend<int>);
phi::funcs::SortScorePairDescend<int>);
// Keep top_k scores if needed.
if (top_k > -1 && top_k < static_cast<int>(sorted_indices->size())) {
sorted_indices->resize(top_k);
......@@ -207,17 +207,18 @@ class LocalityAwareNMSKernel : public framework::OpKernel<T> {
T overlap = T(0.);
// 4: [xmin ymin xmax ymax]
if (box_size == 4) {
overlap = JaccardOverlap<T>(bbox_data + idx * box_size,
bbox_data + kept_idx * box_size,
normalized);
overlap =
phi::funcs::JaccardOverlap<T>(bbox_data + idx * box_size,
bbox_data + kept_idx * box_size,
normalized);
}
// 8: [x1 y1 x2 y2 x3 y3 x4 y4] or 16, 24, 32
if (box_size == 8 || box_size == 16 || box_size == 24 ||
box_size == 32) {
overlap = PolyIoU<T>(bbox_data + idx * box_size,
bbox_data + kept_idx * box_size,
box_size,
normalized);
overlap = phi::funcs::PolyIoU<T>(bbox_data + idx * box_size,
bbox_data + kept_idx * box_size,
box_size,
normalized);
}
keep = overlap <= adaptive_threshold;
} else {
......@@ -290,7 +291,7 @@ class LocalityAwareNMSKernel : public framework::OpKernel<T> {
// Keep top k results per image.
std::stable_sort(score_index_pairs.begin(),
score_index_pairs.end(),
SortScorePairDescend<std::pair<int, int>>);
phi::funcs::SortScorePairDescend<std::pair<int, int>>);
score_index_pairs.resize(keep_top_k);
// Store the new indices.
......
......@@ -14,8 +14,8 @@ limitations under the License. */
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/operators/detection/nms_util.h"
#include "paddle/phi/infermeta/binary.h"
#include "paddle/phi/kernels/funcs/detection/nms_util.h"
namespace paddle {
namespace operators {
......
......@@ -15,8 +15,8 @@ limitations under the License. */
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/detection/nms_util.h"
#include "paddle/phi/infermeta/ternary.h"
#include "paddle/phi/kernels/funcs/detection/nms_util.h"
namespace paddle {
namespace operators {
......@@ -166,7 +166,8 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
std::vector<T> scores_data(num_boxes);
std::copy_n(scores.data<T>(), num_boxes, scores_data.begin());
std::vector<std::pair<T, int>> sorted_indices;
GetMaxScoreIndex(scores_data, score_threshold, top_k, &sorted_indices);
phi::funcs::GetMaxScoreIndex(
scores_data, score_threshold, top_k, &sorted_indices);
selected_indices->clear();
T adaptive_threshold = nms_threshold;
......@@ -181,17 +182,18 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
T overlap = T(0.);
// 4: [xmin ymin xmax ymax]
if (box_size == 4) {
overlap = JaccardOverlap<T>(bbox_data + idx * box_size,
bbox_data + kept_idx * box_size,
normalized);
overlap =
phi::funcs::JaccardOverlap<T>(bbox_data + idx * box_size,
bbox_data + kept_idx * box_size,
normalized);
}
// 8: [x1 y1 x2 y2 x3 y3 x4 y4] or 16, 24, 32
if (box_size == 8 || box_size == 16 || box_size == 24 ||
box_size == 32) {
overlap = PolyIoU<T>(bbox_data + idx * box_size,
bbox_data + kept_idx * box_size,
box_size,
normalized);
overlap = phi::funcs::PolyIoU<T>(bbox_data + idx * box_size,
bbox_data + kept_idx * box_size,
box_size,
normalized);
}
keep = overlap <= adaptive_threshold;
} else {
......@@ -276,7 +278,7 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
// Keep top k results per image.
std::stable_sort(score_index_pairs.begin(),
score_index_pairs.end(),
SortScorePairDescend<std::pair<int, int>>);
phi::funcs::SortScorePairDescend<std::pair<int, int>>);
score_index_pairs.resize(keep_top_k);
// Store the new indices.
......
......@@ -1040,6 +1040,14 @@
func : gelu
backward : gelu_grad
- api : generate_proposals_v2
args : (Tensor scores, Tensor bbox_deltas, Tensor im_shape, Tensor anchors, Tensor variances, int pre_nms_top_n, int post_nms_top_n, float nms_thresh, float min_size, float eta, bool pixel_offset=true)
output : Tensor(rpn_rois), Tensor(rpn_roi_probs), Tensor(rpn_rois_num)
infer_meta :
func : GenerateProposalsV2InferMeta
kernel :
func : generate_proposals_v2
- api : graph_send_recv
args : (Tensor x, Tensor src_index, Tensor dst_index, str pool_type = "SUM", int64_t out_size = 0)
output : Tensor(out), Tensor(dst_count)
......
......@@ -1090,6 +1090,24 @@ void EditDistanceInferMeta(const MetaTensor& hyps,
sequencenum->set_dtype(DataType::FLOAT32);
}
void GenerateProposalsV2InferMeta(const MetaTensor& scores,
const MetaTensor& bbox_deltas,
const MetaTensor& im_shape,
const MetaTensor& anchors,
const MetaTensor& variances,
int pre_nms_top_n,
int post_nms_top_n,
float nms_thresh,
float min_size,
float eta,
bool pixel_offset,
MetaTensor* rpn_rois,
MetaTensor* rpn_roi_probs,
MetaTensor* rpn_rois_num) {
rpn_rois->set_dims(phi::make_ddim({-1, 4}));
rpn_roi_probs->set_dims(phi::make_ddim({-1, 1}));
}
void HierarchicalSigmoidInferMeta(const MetaTensor& x,
const MetaTensor& w,
const MetaTensor& label,
......
......@@ -221,6 +221,21 @@ void EditDistanceInferMeta(const MetaTensor& hyps,
MetaTensor* sequencenum,
MetaTensor* out);
void GenerateProposalsV2InferMeta(const MetaTensor& scores,
const MetaTensor& bbox_deltas,
const MetaTensor& im_shape,
const MetaTensor& anchors,
const MetaTensor& variances,
int pre_nms_top_n,
int post_nms_top_n,
float nms_thresh,
float min_size,
float eta,
bool pixel_offset,
MetaTensor* rpn_rois,
MetaTensor* rpn_roi_probs,
MetaTensor* rpn_rois_num);
void HierarchicalSigmoidInferMeta(const MetaTensor& x,
const MetaTensor& w,
const MetaTensor& label,
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/generate_proposals_v2_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/detection/nms_util.h"
#include "paddle/phi/kernels/funcs/gather.h"
namespace phi {
static const double kBBoxClipDefault = std::log(1000.0 / 16.0);
static void AppendProposals(DenseTensor* dst,
int64_t offset,
const DenseTensor& src) {
auto* out_data = dst->data();
auto* to_add_data = src.data();
size_t size_of_t = SizeOf(src.dtype());
offset *= size_of_t;
std::memcpy(
reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(out_data) + offset),
to_add_data,
src.numel() * size_of_t);
}
template <class T>
void ClipTiledBoxes(const phi::CPUContext& ctx,
const DenseTensor& im_info,
const DenseTensor& input_boxes,
DenseTensor* out,
bool is_scale = true,
bool pixel_offset = true) {
T* out_data = ctx.template Alloc<T>(out);
const T* im_info_data = im_info.data<T>();
const T* input_boxes_data = input_boxes.data<T>();
T offset = pixel_offset ? static_cast<T>(1.0) : 0;
T zero(0);
T im_w =
is_scale ? round(im_info_data[1] / im_info_data[2]) : im_info_data[1];
T im_h =
is_scale ? round(im_info_data[0] / im_info_data[2]) : im_info_data[0];
for (int64_t i = 0; i < input_boxes.numel(); ++i) {
if (i % 4 == 0) {
out_data[i] =
std::max(std::min(input_boxes_data[i], im_w - offset), zero);
} else if (i % 4 == 1) {
out_data[i] =
std::max(std::min(input_boxes_data[i], im_h - offset), zero);
} else if (i % 4 == 2) {
out_data[i] =
std::max(std::min(input_boxes_data[i], im_w - offset), zero);
} else {
out_data[i] =
std::max(std::min(input_boxes_data[i], im_h - offset), zero);
}
}
}
// Filter the box with small area
template <class T>
void FilterBoxes(const phi::CPUContext& ctx,
const DenseTensor* boxes,
float min_size,
const DenseTensor& im_info,
bool is_scale,
DenseTensor* keep,
bool pixel_offset = true) {
const T* im_info_data = im_info.data<T>();
const T* boxes_data = boxes->data<T>();
keep->Resize(phi::make_ddim({boxes->dims()[0]}));
min_size = std::max(min_size, 1.0f);
int* keep_data = ctx.template Alloc<int>(keep);
T offset = pixel_offset ? static_cast<T>(1.0) : 0;
int keep_len = 0;
for (int i = 0; i < boxes->dims()[0]; ++i) {
T ws = boxes_data[4 * i + 2] - boxes_data[4 * i] + offset;
T hs = boxes_data[4 * i + 3] - boxes_data[4 * i + 1] + offset;
if (pixel_offset) {
T x_ctr = boxes_data[4 * i] + ws / 2;
T y_ctr = boxes_data[4 * i + 1] + hs / 2;
if (is_scale) {
ws = (boxes_data[4 * i + 2] - boxes_data[4 * i]) / im_info_data[2] + 1;
hs = (boxes_data[4 * i + 3] - boxes_data[4 * i + 1]) / im_info_data[2] +
1;
}
if (ws >= min_size && hs >= min_size && x_ctr <= im_info_data[1] &&
y_ctr <= im_info_data[0]) {
keep_data[keep_len++] = i;
}
} else {
if (ws >= min_size && hs >= min_size) {
keep_data[keep_len++] = i;
}
}
}
keep->Resize(phi::make_ddim({keep_len}));
}
template <class T>
static void BoxCoder(const phi::CPUContext& ctx,
DenseTensor* all_anchors,
DenseTensor* bbox_deltas,
DenseTensor* variances,
DenseTensor* proposals,
const bool pixel_offset = true) {
T* proposals_data = ctx.template Alloc<T>(proposals);
int64_t row = all_anchors->dims()[0];
int64_t len = all_anchors->dims()[1];
auto* bbox_deltas_data = bbox_deltas->data<T>();
auto* anchor_data = all_anchors->data<T>();
const T* variances_data = nullptr;
if (variances) {
variances_data = variances->data<T>();
}
T offset = pixel_offset ? static_cast<T>(1.0) : 0;
for (int64_t i = 0; i < row; ++i) {
T anchor_width = anchor_data[i * len + 2] - anchor_data[i * len] + offset;
T anchor_height =
anchor_data[i * len + 3] - anchor_data[i * len + 1] + offset;
T anchor_center_x = anchor_data[i * len] + 0.5 * anchor_width;
T anchor_center_y = anchor_data[i * len + 1] + 0.5 * anchor_height;
T bbox_center_x = 0, bbox_center_y = 0;
T bbox_width = 0, bbox_height = 0;
if (variances) {
bbox_center_x =
variances_data[i * len] * bbox_deltas_data[i * len] * anchor_width +
anchor_center_x;
bbox_center_y = variances_data[i * len + 1] *
bbox_deltas_data[i * len + 1] * anchor_height +
anchor_center_y;
bbox_width = std::exp(std::min<T>(variances_data[i * len + 2] *
bbox_deltas_data[i * len + 2],
kBBoxClipDefault)) *
anchor_width;
bbox_height = std::exp(std::min<T>(variances_data[i * len + 3] *
bbox_deltas_data[i * len + 3],
kBBoxClipDefault)) *
anchor_height;
} else {
bbox_center_x =
bbox_deltas_data[i * len] * anchor_width + anchor_center_x;
bbox_center_y =
bbox_deltas_data[i * len + 1] * anchor_height + anchor_center_y;
bbox_width = std::exp(std::min<T>(bbox_deltas_data[i * len + 2],
kBBoxClipDefault)) *
anchor_width;
bbox_height = std::exp(std::min<T>(bbox_deltas_data[i * len + 3],
kBBoxClipDefault)) *
anchor_height;
}
proposals_data[i * len] = bbox_center_x - bbox_width / 2;
proposals_data[i * len + 1] = bbox_center_y - bbox_height / 2;
proposals_data[i * len + 2] = bbox_center_x + bbox_width / 2 - offset;
proposals_data[i * len + 3] = bbox_center_y + bbox_height / 2 - offset;
}
// return proposals;
}
template <typename T>
std::pair<DenseTensor, DenseTensor> ProposalForOneImage(
const phi::CPUContext& ctx,
const DenseTensor& im_shape_slice,
const DenseTensor& anchors,
const DenseTensor& variances,
const DenseTensor& bbox_deltas_slice, // [M, 4]
const DenseTensor& scores_slice, // [N, 1]
int pre_nms_top_n,
int post_nms_top_n,
float nms_thresh,
float min_size,
float eta,
bool pixel_offset = true) {
auto* scores_data = scores_slice.data<T>();
// Sort index
DenseTensor index_t;
index_t.Resize(phi::make_ddim({scores_slice.numel()}));
int* index = ctx.template Alloc<int>(&index_t);
for (int i = 0; i < scores_slice.numel(); ++i) {
index[i] = i;
}
auto compare = [scores_data](const int64_t& i, const int64_t& j) {
return scores_data[i] > scores_data[j];
};
if (pre_nms_top_n <= 0 || pre_nms_top_n >= scores_slice.numel()) {
std::sort(index, index + scores_slice.numel(), compare);
} else {
std::nth_element(
index, index + pre_nms_top_n, index + scores_slice.numel(), compare);
index_t.Resize(phi::make_ddim({pre_nms_top_n}));
}
DenseTensor scores_sel, bbox_sel, anchor_sel, var_sel;
scores_sel.Resize(phi::make_ddim({index_t.numel(), 1}));
ctx.template Alloc<T>(&scores_sel);
bbox_sel.Resize(phi::make_ddim({index_t.numel(), 4}));
ctx.template Alloc<T>(&bbox_sel);
anchor_sel.Resize(phi::make_ddim({index_t.numel(), 4}));
ctx.template Alloc<T>(&anchor_sel);
var_sel.Resize(phi::make_ddim({index_t.numel(), 4}));
ctx.template Alloc<T>(&var_sel);
phi::funcs::CPUGather<T>(ctx, scores_slice, index_t, &scores_sel);
phi::funcs::CPUGather<T>(ctx, bbox_deltas_slice, index_t, &bbox_sel);
phi::funcs::CPUGather<T>(ctx, anchors, index_t, &anchor_sel);
phi::funcs::CPUGather<T>(ctx, variances, index_t, &var_sel);
DenseTensor proposals;
proposals.Resize(phi::make_ddim({index_t.numel(), 4}));
ctx.template Alloc<T>(&proposals);
BoxCoder<T>(ctx, &anchor_sel, &bbox_sel, &var_sel, &proposals, pixel_offset);
ClipTiledBoxes<T>(
ctx, im_shape_slice, proposals, &proposals, false, pixel_offset);
DenseTensor keep;
FilterBoxes<T>(
ctx, &proposals, min_size, im_shape_slice, false, &keep, pixel_offset);
// Handle the case when there is no keep index left
if (keep.numel() == 0) {
phi::funcs::SetConstant<phi::CPUContext, T> set_zero;
bbox_sel.Resize(phi::make_ddim({1, 4}));
ctx.template Alloc<T>(&bbox_sel);
set_zero(ctx, &bbox_sel, static_cast<T>(0));
DenseTensor scores_filter;
scores_filter.Resize(phi::make_ddim({1, 1}));
ctx.template Alloc<T>(&scores_filter);
set_zero(ctx, &scores_filter, static_cast<T>(0));
return std::make_pair(bbox_sel, scores_filter);
}
DenseTensor scores_filter;
bbox_sel.Resize(phi::make_ddim({keep.numel(), 4}));
ctx.template Alloc<T>(&bbox_sel);
scores_filter.Resize(phi::make_ddim({keep.numel(), 1}));
ctx.template Alloc<T>(&scores_filter);
phi::funcs::CPUGather<T>(ctx, proposals, keep, &bbox_sel);
phi::funcs::CPUGather<T>(ctx, scores_sel, keep, &scores_filter);
if (nms_thresh <= 0) {
return std::make_pair(bbox_sel, scores_filter);
}
DenseTensor keep_nms = phi::funcs::NMS<T>(
ctx, &bbox_sel, &scores_filter, nms_thresh, eta, pixel_offset);
if (post_nms_top_n > 0 && post_nms_top_n < keep_nms.numel()) {
keep_nms.Resize(phi::make_ddim({post_nms_top_n}));
}
proposals.Resize(phi::make_ddim({keep_nms.numel(), 4}));
ctx.template Alloc<T>(&proposals);
scores_sel.Resize(phi::make_ddim({keep_nms.numel(), 1}));
ctx.template Alloc<T>(&scores_sel);
phi::funcs::CPUGather<T>(ctx, bbox_sel, keep_nms, &proposals);
phi::funcs::CPUGather<T>(ctx, scores_filter, keep_nms, &scores_sel);
return std::make_pair(proposals, scores_sel);
}
template <typename T, typename Context>
void GenerateProposalsV2Kernel(const Context& ctx,
const DenseTensor& scores,
const DenseTensor& bbox_deltas,
const DenseTensor& im_shape,
const DenseTensor& anchors,
const DenseTensor& variances,
int pre_nms_top_n,
int post_nms_top_n,
float nms_thresh,
float min_size,
float eta,
bool pixel_offset,
DenseTensor* rpn_rois,
DenseTensor* rpn_roi_probs,
DenseTensor* rpn_rois_num) {
auto& scores_dim = scores.dims();
int64_t num = scores_dim[0];
int64_t c_score = scores_dim[1];
int64_t h_score = scores_dim[2];
int64_t w_score = scores_dim[3];
auto& bbox_dim = bbox_deltas.dims();
int64_t c_bbox = bbox_dim[1];
int64_t h_bbox = bbox_dim[2];
int64_t w_bbox = bbox_dim[3];
rpn_rois->Resize(phi::make_ddim({bbox_deltas.numel() / 4, 4}));
ctx.template Alloc<T>(rpn_rois);
rpn_roi_probs->Resize(phi::make_ddim({scores.numel(), 1}));
ctx.template Alloc<T>(rpn_roi_probs);
DenseTensor bbox_deltas_swap, scores_swap;
bbox_deltas_swap.Resize(phi::make_ddim({num, h_bbox, w_bbox, c_bbox}));
ctx.template Alloc<T>(&bbox_deltas_swap);
scores_swap.Resize(phi::make_ddim({num, h_score, w_score, c_score}));
ctx.template Alloc<T>(&scores_swap);
phi::funcs::Transpose<phi::CPUContext, T, 4> trans;
std::vector<int> axis = {0, 2, 3, 1};
trans(ctx, bbox_deltas, &bbox_deltas_swap, axis);
trans(ctx, scores, &scores_swap, axis);
phi::LoD lod;
lod.resize(1);
auto& lod0 = lod[0];
lod0.push_back(0);
DenseTensor tmp_anchors = anchors;
DenseTensor tmp_variances = variances;
tmp_anchors.Resize(phi::make_ddim({tmp_anchors.numel() / 4, 4}));
tmp_variances.Resize(phi::make_ddim({tmp_variances.numel() / 4, 4}));
std::vector<int> tmp_num;
int64_t num_proposals = 0;
for (int64_t i = 0; i < num; ++i) {
DenseTensor im_shape_slice = im_shape.Slice(i, i + 1);
DenseTensor bbox_deltas_slice = bbox_deltas_swap.Slice(i, i + 1);
DenseTensor scores_slice = scores_swap.Slice(i, i + 1);
bbox_deltas_slice.Resize(phi::make_ddim({h_bbox * w_bbox * c_bbox / 4, 4}));
scores_slice.Resize(phi::make_ddim({h_score * w_score * c_score, 1}));
std::pair<DenseTensor, DenseTensor> tensor_pair =
ProposalForOneImage<T>(ctx,
im_shape_slice,
tmp_anchors,
tmp_variances,
bbox_deltas_slice,
scores_slice,
pre_nms_top_n,
post_nms_top_n,
nms_thresh,
min_size,
eta,
pixel_offset);
DenseTensor& proposals = tensor_pair.first;
DenseTensor& nscores = tensor_pair.second;
AppendProposals(rpn_rois, 4 * num_proposals, proposals);
AppendProposals(rpn_roi_probs, num_proposals, nscores);
num_proposals += proposals.dims()[0];
lod0.push_back(num_proposals);
tmp_num.push_back(proposals.dims()[0]);
}
if (rpn_rois_num != nullptr) {
rpn_rois_num->Resize(phi::make_ddim({num}));
ctx.template Alloc<int>(rpn_rois_num);
int* num_data = rpn_rois_num->data<int>();
for (int i = 0; i < num; i++) {
num_data[i] = tmp_num[i];
}
rpn_rois_num->Resize(phi::make_ddim({num}));
}
rpn_rois->Resize(phi::make_ddim({num_proposals, 4}));
rpn_roi_probs->Resize(phi::make_ddim({num_proposals, 1}));
}
} // namespace phi
PD_REGISTER_KERNEL(generate_proposals_v2,
CPU,
ALL_LAYOUT,
phi::GenerateProposalsV2Kernel,
float,
double) {}
......@@ -18,9 +18,11 @@ limitations under the License. */
#include <vector>
#include "paddle/fluid/operators/detection/poly_util.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/device_context.h"
namespace paddle {
namespace operators {
namespace phi {
namespace funcs {
template <class T>
bool SortScorePairDescend(const std::pair<float, T>& pair1,
......@@ -94,9 +96,10 @@ T PolyIoU(const T* box1,
const T* box2,
const size_t box_size,
const bool normalized) {
T bbox1_area = PolyArea<T>(box1, box_size, normalized);
T bbox2_area = PolyArea<T>(box2, box_size, normalized);
T inter_area = PolyOverlapArea<T>(box1, box2, box_size, normalized);
T bbox1_area = paddle::operators::PolyArea<T>(box1, box_size, normalized);
T bbox2_area = paddle::operators::PolyArea<T>(box2, box_size, normalized);
T inter_area =
paddle::operators::PolyOverlapArea<T>(box1, box2, box_size, normalized);
if (bbox1_area == 0 || bbox2_area == 0 || inter_area == 0) {
// If coordinate values are invalid
// if area size <= 0, return 0.
......@@ -124,11 +127,12 @@ static inline std::vector<std::pair<T, int>> GetSortedScoreIndex(
}
template <typename T>
static inline framework::Tensor VectorToTensor(
const std::vector<T>& selected_indices, int selected_num) {
framework::Tensor keep_nms;
static inline DenseTensor VectorToTensor(const DeviceContext& ctx,
const std::vector<T>& selected_indices,
int selected_num) {
DenseTensor keep_nms;
keep_nms.Resize({selected_num});
auto* keep_data = keep_nms.mutable_data<T>(platform::CPUPlace());
auto* keep_data = ctx.template Alloc<T>(&keep_nms);
for (int i = 0; i < selected_num; ++i) {
keep_data[i] = selected_indices[i];
}
......@@ -136,12 +140,12 @@ static inline framework::Tensor VectorToTensor(
}
template <class T>
framework::Tensor NMS(const platform::DeviceContext& ctx,
framework::Tensor* bbox,
framework::Tensor* scores,
T nms_threshold,
float eta,
bool pixel_offset = true) {
DenseTensor NMS(const DeviceContext& ctx,
DenseTensor* bbox,
DenseTensor* scores,
T nms_threshold,
float eta,
bool pixel_offset = true) {
int64_t num_boxes = bbox->dims()[0];
// 4: [xmin ymin xmax ymax]
int64_t box_size = bbox->dims()[1];
......@@ -178,8 +182,8 @@ framework::Tensor NMS(const platform::DeviceContext& ctx,
adaptive_threshold *= eta;
}
}
return VectorToTensor(selected_indices, selected_num);
return VectorToTensor(ctx, selected_indices, selected_num);
}
} // namespace operators
} // namespace paddle
} // namespace funcs
} // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
namespace phi {
template <typename T, typename Context>
void GenerateProposalsV2Kernel(const Context& ctx,
const DenseTensor& scores,
const DenseTensor& bbox_deltas,
const DenseTensor& im_shape,
const DenseTensor& anchors,
const DenseTensor& variances,
int pre_nms_top_n,
int post_nms_top_n,
float nms_thresh,
float min_size,
float eta,
bool pixel_offset,
DenseTensor* rpn_rois,
DenseTensor* rpn_roi_probs,
DenseTensor* rpn_rois_num);
} // namespace phi
此差异已折叠。
......@@ -596,7 +596,7 @@ class TestGenerateProposals(LayerTest):
'var': variances_np
},
fetch_list=[rois, roi_probs, rois_num],
with_lod=True)
with_lod=False)
with self.dynamic_graph():
scores_dy = base.to_variable(scores_np)
......
......@@ -26,6 +26,36 @@ import copy
from test_generate_proposals_op import clip_tiled_boxes, box_coder, nms
def python_generate_proposals_v2(
scores,
bbox_deltas,
img_size,
anchors,
variances,
pre_nms_top_n=6000,
post_nms_top_n=1000,
nms_thresh=0.5,
min_size=0.1,
eta=1.0,
pixel_offset=False,
return_rois_num=True,
):
rpn_rois, rpn_roi_probs, rpn_rois_num = paddle.vision.ops.generate_proposals(
scores,
bbox_deltas,
img_size,
anchors,
variances,
pre_nms_top_n=pre_nms_top_n,
post_nms_top_n=post_nms_top_n,
nms_thresh=nms_thresh,
min_size=min_size,
eta=eta,
pixel_offset=pixel_offset,
return_rois_num=return_rois_num)
return rpn_rois, rpn_roi_probs
def generate_proposals_v2_in_python(scores, bbox_deltas, im_shape, anchors,
variances, pre_nms_topN, post_nms_topN,
nms_thresh, min_size, eta, pixel_offset):
......@@ -155,15 +185,16 @@ class TestGenerateProposalsV2Op(OpTest):
}
self.outputs = {
'RpnRois': (self.rpn_rois[0], [self.rois_num]),
'RpnRoiProbs': (self.rpn_roi_probs[0], [self.rois_num]),
'RpnRois': self.rpn_rois[0],
'RpnRoiProbs': self.rpn_roi_probs[0],
}
def test_check_output(self):
self.check_output()
self.check_output(check_eager=False)
def setUp(self):
self.op_type = "generate_proposals_v2"
self.python_api = python_generate_proposals_v2
self.set_data()
def init_test_params(self):
......@@ -202,150 +233,117 @@ class TestGenerateProposalsV2Op(OpTest):
self.nms_thresh, self.min_size, self.eta, self.pixel_offset)
class TestGenerateProposalsV2OutLodOp(TestGenerateProposalsV2Op):
def set_data(self):
self.init_test_params()
self.init_test_input()
self.init_test_output()
self.inputs = {
'Scores': self.scores,
'BboxDeltas': self.bbox_deltas,
'ImShape': self.im_shape.astype(np.float32),
'Anchors': self.anchors,
'Variances': self.variances
}
self.attrs = {
'pre_nms_topN': self.pre_nms_topN,
'post_nms_topN': self.post_nms_topN,
'nms_thresh': self.nms_thresh,
'min_size': self.min_size,
'eta': self.eta,
'return_rois_num': True
}
self.outputs = {
'RpnRois': (self.rpn_rois[0], [self.rois_num]),
'RpnRoiProbs': (self.rpn_roi_probs[0], [self.rois_num]),
'RpnRoisNum': (np.asarray(self.rois_num, dtype=np.int32))
}
class TestGenerateProposalsV2OpNoBoxLeft(TestGenerateProposalsV2Op):
def init_test_params(self):
self.pre_nms_topN = 12000 # train 12000, test 2000
self.post_nms_topN = 5000 # train 6000, test 1000
self.nms_thresh = 0.7
self.min_size = 1000.0
self.eta = 1.
self.pixel_offset = True
class TestGenerateProposalsV2OpNoOffset(TestGenerateProposalsV2Op):
def init_test_params(self):
self.pre_nms_topN = 12000 # train 12000, test 2000
self.post_nms_topN = 5000 # train 6000, test 1000
self.nms_thresh = 0.7
self.min_size = 3.0
self.eta = 1.
self.pixel_offset = False
class testGenerateProposalsAPI(unittest.TestCase):
def setUp(self):
np.random.seed(678)
self.scores_np = np.random.rand(2, 3, 4, 4).astype('float32')
self.bbox_deltas_np = np.random.rand(2, 12, 4, 4).astype('float32')
self.img_size_np = np.array([[8, 8], [6, 6]]).astype('float32')
self.anchors_np = np.reshape(np.arange(4 * 4 * 3 * 4),
[4, 4, 3, 4]).astype('float32')
self.variances_np = np.ones((4, 4, 3, 4)).astype('float32')
self.roi_expected, self.roi_probs_expected, self.rois_num_expected = generate_proposals_v2_in_python(
self.scores_np,
self.bbox_deltas_np,
self.img_size_np,
self.anchors_np,
self.variances_np,
pre_nms_topN=10,
post_nms_topN=5,
nms_thresh=0.5,
min_size=0.1,
eta=1.0,
pixel_offset=False)
self.roi_expected = np.array(self.roi_expected).squeeze(1)
self.roi_probs_expected = np.array(self.roi_probs_expected).squeeze(1)
self.rois_num_expected = np.array(self.rois_num_expected)
def test_dynamic(self):
paddle.disable_static()
scores = paddle.to_tensor(self.scores_np)
bbox_deltas = paddle.to_tensor(self.bbox_deltas_np)
img_size = paddle.to_tensor(self.img_size_np)
anchors = paddle.to_tensor(self.anchors_np)
variances = paddle.to_tensor(self.variances_np)
rois, roi_probs, rois_num = paddle.vision.ops.generate_proposals(
scores,
bbox_deltas,
img_size,
anchors,
variances,
pre_nms_top_n=10,
post_nms_top_n=5,
return_rois_num=True)
self.assertTrue(np.allclose(self.roi_expected, rois.numpy()))
self.assertTrue(np.allclose(self.roi_probs_expected, roi_probs.numpy()))
self.assertTrue(np.allclose(self.rois_num_expected, rois_num.numpy()))
def test_static(self):
paddle.enable_static()
scores = paddle.static.data(name='scores',
shape=[2, 3, 4, 4],
dtype='float32')
bbox_deltas = paddle.static.data(name='bbox_deltas',
shape=[2, 12, 4, 4],
dtype='float32')
img_size = paddle.static.data(name='img_size',
shape=[2, 2],
dtype='float32')
anchors = paddle.static.data(name='anchors',
shape=[4, 4, 3, 4],
dtype='float32')
variances = paddle.static.data(name='variances',
shape=[4, 4, 3, 4],
dtype='float32')
rois, roi_probs, rois_num = paddle.vision.ops.generate_proposals(
scores,
bbox_deltas,
img_size,
anchors,
variances,
pre_nms_top_n=10,
post_nms_top_n=5,
return_rois_num=True)
exe = paddle.static.Executor()
rois, roi_probs, rois_num = exe.run(
paddle.static.default_main_program(),
feed={
'scores': self.scores_np,
'bbox_deltas': self.bbox_deltas_np,
'img_size': self.img_size_np,
'anchors': self.anchors_np,
'variances': self.variances_np,
},
fetch_list=[rois.name, roi_probs.name, rois_num.name],
return_numpy=False)
self.assertTrue(np.allclose(self.roi_expected, np.array(rois)))
self.assertTrue(
np.allclose(self.roi_probs_expected, np.array(roi_probs)))
self.assertTrue(np.allclose(self.rois_num_expected, np.array(rois_num)))
# class TestGenerateProposalsV2OpNoBoxLeft(TestGenerateProposalsV2Op):
# def init_test_params(self):
# self.pre_nms_topN = 12000 # train 12000, test 2000
# self.post_nms_topN = 5000 # train 6000, test 1000
# self.nms_thresh = 0.7
# self.min_size = 1000.0
# self.eta = 1.
# self.pixel_offset = True
# class TestGenerateProposalsV2OpNoOffset(TestGenerateProposalsV2Op):
# def init_test_params(self):
# self.pre_nms_topN = 12000 # train 12000, test 2000
# self.post_nms_topN = 5000 # train 6000, test 1000
# self.nms_thresh = 0.7
# self.min_size = 3.0
# self.eta = 1.
# self.pixel_offset = False
# class testGenerateProposalsAPI(unittest.TestCase):
# def setUp(self):
# np.random.seed(678)
# self.scores_np = np.random.rand(2, 3, 4, 4).astype('float32')
# self.bbox_deltas_np = np.random.rand(2, 12, 4, 4).astype('float32')
# self.img_size_np = np.array([[8, 8], [6, 6]]).astype('float32')
# self.anchors_np = np.reshape(np.arange(4 * 4 * 3 * 4),
# [4, 4, 3, 4]).astype('float32')
# self.variances_np = np.ones((4, 4, 3, 4)).astype('float32')
# self.roi_expected, self.roi_probs_expected, self.rois_num_expected = generate_proposals_v2_in_python(
# self.scores_np,
# self.bbox_deltas_np,
# self.img_size_np,
# self.anchors_np,
# self.variances_np,
# pre_nms_topN=10,
# post_nms_topN=5,
# nms_thresh=0.5,
# min_size=0.1,
# eta=1.0,
# pixel_offset=False)
# self.roi_expected = np.array(self.roi_expected).squeeze(1)
# self.roi_probs_expected = np.array(self.roi_probs_expected).squeeze(1)
# self.rois_num_expected = np.array(self.rois_num_expected)
# def test_dynamic(self):
# paddle.disable_static()
# scores = paddle.to_tensor(self.scores_np)
# bbox_deltas = paddle.to_tensor(self.bbox_deltas_np)
# img_size = paddle.to_tensor(self.img_size_np)
# anchors = paddle.to_tensor(self.anchors_np)
# variances = paddle.to_tensor(self.variances_np)
# rois, roi_probs, rois_num = paddle.vision.ops.generate_proposals(
# scores,
# bbox_deltas,
# img_size,
# anchors,
# variances,
# pre_nms_top_n=10,
# post_nms_top_n=5,
# return_rois_num=True)
# self.assertTrue(np.allclose(self.roi_expected, rois.numpy()))
# self.assertTrue(np.allclose(self.roi_probs_expected, roi_probs.numpy()))
# self.assertTrue(np.allclose(self.rois_num_expected, rois_num.numpy()))
# def test_static(self):
# paddle.enable_static()
# scores = paddle.static.data(name='scores',
# shape=[2, 3, 4, 4],
# dtype='float32')
# bbox_deltas = paddle.static.data(name='bbox_deltas',
# shape=[2, 12, 4, 4],
# dtype='float32')
# img_size = paddle.static.data(name='img_size',
# shape=[2, 2],
# dtype='float32')
# anchors = paddle.static.data(name='anchors',
# shape=[4, 4, 3, 4],
# dtype='float32')
# variances = paddle.static.data(name='variances',
# shape=[4, 4, 3, 4],
# dtype='float32')
# rois, roi_probs, rois_num = paddle.vision.ops.generate_proposals(
# scores,
# bbox_deltas,
# img_size,
# anchors,
# variances,
# pre_nms_top_n=10,
# post_nms_top_n=5,
# return_rois_num=True)
# exe = paddle.static.Executor()
# rois, roi_probs, rois_num = exe.run(
# paddle.static.default_main_program(),
# feed={
# 'scores': self.scores_np,
# 'bbox_deltas': self.bbox_deltas_np,
# 'img_size': self.img_size_np,
# 'anchors': self.anchors_np,
# 'variances': self.variances_np,
# },
# fetch_list=[rois.name, roi_probs.name, rois_num.name],
# return_numpy=False)
# self.assertTrue(np.allclose(self.roi_expected, np.array(rois)))
# self.assertTrue(
# np.allclose(self.roi_probs_expected, np.array(roi_probs)))
# self.assertTrue(np.allclose(self.rois_num_expected, np.array(rois_num)))
if __name__ == '__main__':
paddle.enable_static()
......
......@@ -1740,7 +1740,15 @@ def generate_proposals(scores,
print(rois, roi_probs, roi_nums)
"""
if _non_static_mode():
if in_dygraph_mode():
assert return_rois_num, "return_rois_num should be True in dygraph mode."
attrs = (pre_nms_top_n, post_nms_top_n, nms_thresh, min_size, eta,
pixel_offset)
rpn_rois, rpn_roi_probs, rpn_rois_num = _C_ops.final_state_generate_proposals_v2(
scores, bbox_deltas, img_size, anchors, variances, *attrs)
return rpn_rois, rpn_roi_probs, rpn_rois_num
elif _non_static_mode():
assert return_rois_num, "return_rois_num should be True in dygraph mode."
attrs = ('pre_nms_topN', pre_nms_top_n, 'post_nms_topN', post_nms_top_n,
'nms_thresh', nms_thresh, 'min_size', min_size, 'eta', eta,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册