提交 912a4f25 编写于 作者: D dangqingqing

Add multi-class non-maximum suppression operator.

上级 114d0c15
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/op_registry.h"
#include "paddle/operators/math/math_function.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;
constexpr int64_t kOutputDim = 6;
constexpr int64_t kBBoxSize = 4;
class MulticlassNMSOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Bboxes"),
"Input(Bboxes) of MulticlassNMS should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Scores"),
"Input(Scores) of MulticlassNMS should not be null.");
auto box_dims = ctx->GetInputDim("Bboxes");
auto score_dims = ctx->GetInputDim("Scores");
PADDLE_ENFORCE_EQ(box_dims.size(), 3,
"The rank of Input(Bboxes) must be 3.");
PADDLE_ENFORCE_EQ(score_dims.size(), 3,
"The rank of Input(Scores) must be 3.");
PADDLE_ENFORCE_EQ(box_dims[0], score_dims[0]);
PADDLE_ENFORCE_EQ(box_dims[2], 4);
PADDLE_ENFORCE_EQ(box_dims[1], score_dims[2]);
// Here the box_dims[0] is not the real dimension of output.
// It will be rewritten in the computing kernel.
ctx->SetOutputDim("Out", {box_dims[0], 6});
}
};
template <class T>
bool SortScorePairDescend(const std::pair<float, T>& pair1,
const std::pair<float, T>& pair2) {
return pair1.first > pair2.first;
}
template <class T>
static inline void GetMaxScoreIndex(
const std::vector<T>& scores, const T threshold, int top_k,
std::vector<std::pair<T, int>>* sorted_indices) {
for (size_t i = 0; i < scores.size(); ++i) {
if (scores[i] > threshold) {
sorted_indices->push_back(std::make_pair(scores[i], i));
}
}
// Sort the score pair according to the scores in descending order
std::stable_sort(sorted_indices->begin(), sorted_indices->end(),
SortScorePairDescend<int>);
// Keep top_k scores if needed.
if (top_k > -1 && top_k < sorted_indices->size()) {
sorted_indices->resize(top_k);
}
}
template <class T>
T BBoxArea(const T* box, const bool normalized) {
if (box[2] < box[0] || box[3] < box[1]) {
// If bbox is invalid (e.g. xmax < xmin or ymax < ymin), return 0.
return T(0.);
} else {
const T w = box[2] - box[0];
const T h = box[3] - box[1];
if (normalized) {
return w * h;
} else {
// If bbox is not within range [0, 1].
return (w + 1) * (h + 1);
}
}
}
template <class T>
static inline T JaccardOverlap(const T* box1, const T* box2,
const bool normalized) {
if (box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] ||
box2[3] < box1[1]) {
return static_cast<T>(0.);
} else {
const T inter_xmin = std::max(box1[0], box2[0]);
const T inter_ymin = std::max(box1[1], box2[1]);
const T inter_xmax = std::min(box1[2], box2[2]);
const T inter_ymax = std::min(box1[3], box2[3]);
const T inter_w = inter_xmax - inter_xmin;
const T inter_h = inter_ymax - inter_ymin;
const T inter_area = inter_w * inter_h;
const T bbox1_area = BBoxArea<T>(box1, normalized);
const T bbox2_area = BBoxArea<T>(box2, normalized);
return inter_area / (bbox1_area + bbox2_area - inter_area);
}
}
template <typename T>
class MulticlassNMSKernel : public framework::OpKernel<T> {
public:
void NMSFast(const Tensor& bbox, const Tensor& scores,
const T score_threshold, const T nms_threshold, const T eta,
const int64_t top_k, std::vector<int>* selected_indices) const {
// The total boxes for each instance.
int64_t num_boxes = bbox.dims()[0];
// 4: [xmin ymin xmax ymax]
int64_t box_size = bbox.dims()[1];
std::vector<T> scores_data(num_boxes);
std::copy_n(scores.data<T>(), num_boxes, scores_data.begin());
std::vector<std::pair<T, int>> sorted_indices;
GetMaxScoreIndex(scores_data, score_threshold, top_k, &sorted_indices);
selected_indices->clear();
T adaptive_threshold = nms_threshold;
const T* bbox_data = bbox.data<T>();
while (sorted_indices.size() != 0) {
const int idx = sorted_indices.front().second;
bool keep = true;
for (int k = 0; k < selected_indices->size(); ++k) {
if (keep) {
const int kept_idx = (*selected_indices)[k];
T overlap = JaccardOverlap<T>(bbox_data + idx * box_size,
bbox_data + kept_idx * box_size, true);
keep = overlap <= adaptive_threshold;
} else {
break;
}
}
if (keep) {
selected_indices->push_back(idx);
}
sorted_indices.erase(sorted_indices.begin());
if (keep && eta < 1 && adaptive_threshold > 0.5) {
adaptive_threshold *= eta;
}
}
}
void MulticlassNMS(const framework::ExecutionContext& ctx,
const Tensor& scores, const Tensor& bboxes,
std::map<int, std::vector<int>>* indices,
int* num_nmsed_out) const {
int64_t background_label = ctx.Attr<int64_t>("background_label");
int64_t nms_top_k = ctx.Attr<int64_t>("nms_top_k");
int64_t keep_top_k = ctx.Attr<int64_t>("keep_top_k");
T nms_threshold = static_cast<T>(ctx.Attr<float>("nms_threshold"));
T nms_eta = static_cast<T>(ctx.Attr<float>("nms_eta"));
T score_threshold = static_cast<T>(ctx.Attr<float>("confidence_threshold"));
int64_t class_num = scores.dims()[0];
int64_t predict_dim = scores.dims()[1];
int num_det = 0;
for (int64_t c = 0; c < class_num; ++c) {
if (c == background_label) continue;
Tensor score = scores.Slice(c, c + 1);
NMSFast(bboxes, score, score_threshold, nms_threshold, nms_eta, nms_top_k,
&((*indices)[c]));
num_det += indices[c].size();
}
*num_nmsed_out = num_det;
const T* scores_data = scores.data<T>();
if (keep_top_k > -1 && num_det > keep_top_k) {
std::vector<std::pair<float, std::pair<int, int>>> score_index_pairs;
for (const auto& it : *indices) {
int label = it.first;
const T* sdata = scores_data + label * predict_dim;
const std::vector<int>& label_indices = it.second;
for (int j = 0; j < label_indices.size(); ++j) {
int idx = label_indices[j];
PADDLE_ENFORCE_LT(idx, predict_dim);
score_index_pairs.push_back(
std::make_pair(sdata[idx], std::make_pair(label, idx)));
}
}
// Keep top k results per image.
std::sort(score_index_pairs.begin(), score_index_pairs.end(),
SortScorePairDescend<std::pair<int, int>>);
score_index_pairs.resize(keep_top_k);
// Store the new indices.
std::map<int, std::vector<int>> new_indices;
for (int j = 0; j < score_index_pairs.size(); ++j) {
int label = score_index_pairs[j].second.first;
int idx = score_index_pairs[j].second.second;
new_indices[label].push_back(idx);
}
new_indices.swap(*indices);
*num_nmsed_out = keep_top_k;
}
}
void MulticlassOutput(const Tensor& scores, const Tensor& bboxes,
std::map<int, std::vector<int>>& selected_indices,
Tensor* outs) const {
int predict_dim = scores.dims()[1];
auto* scores_data = scores.data<T>();
auto* bboxes_data = bboxes.data<T>();
auto* odata = outs->data<T>();
int count = 0;
for (const auto& it : selected_indices) {
int label = it.first;
const T* sdata = scores_data + label * predict_dim;
std::vector<int> indices = it.second;
for (int j = 0; j < indices.size(); ++j) {
int idx = indices[j];
const T* bdata = bboxes_data + idx * kBBoxSize;
odata[count * kOutputDim] = label; // label
odata[count * kOutputDim + 1] = sdata[idx]; // score
odata[count * kOutputDim + 2] = bdata[0]; // xmin
odata[count * kOutputDim + 3] = bdata[1]; // ymin
odata[count * kOutputDim + 4] = bdata[2]; // xmax
odata[count * kOutputDim + 5] = bdata[3]; // ymax
}
count++;
}
}
void Compute(const framework::ExecutionContext& ctx) const override {
auto* boxes = ctx.Input<Tensor>("Bboxes");
auto* scores = ctx.Input<Tensor>("Scores");
auto* outs = ctx.Output<LoDTensor>("Out");
auto box_dims = boxes->dims();
auto score_dims = scores->dims();
int64_t batch_size = box_dims[0];
int64_t class_num = score_dims[1];
int64_t predict_dim = score_dims[2];
std::vector<std::map<int, std::vector<int>>> all_indices;
std::vector<size_t> batch_starts = {0};
for (int64_t i = 0; i < batch_size; ++i) {
Tensor ins_score = scores->Slice(i, i + 1);
ins_score.Resize({class_num, predict_dim});
std::map<int, std::vector<int>> indices;
int num_nmsed_out = 0;
MulticlassNMS(ctx, ins_score, *boxes, &indices, &num_nmsed_out);
all_indices.push_back(indices);
batch_starts.push_back(batch_starts.back() + num_nmsed_out);
}
int num_kept = batch_starts.back();
if (num_kept == 0) {
outs->Resize({0, 0});
} else {
outs->mutable_data<T>({num_kept, kOutputDim}, ctx.GetPlace());
for (int64_t i = 0; i < batch_size; ++i) {
Tensor ins_score = scores->Slice(i, i + 1);
ins_score.Resize({class_num, predict_dim});
int64_t s = batch_starts[i];
int64_t e = batch_starts[i + 1];
if (e > s) {
Tensor out = outs->Slice(s, e);
MulticlassOutput(ins_score, *boxes, all_indices[i], &out);
}
}
}
framework::LoD lod;
lod.emplace_back(batch_starts);
outs->set_lod(lod);
}
};
class MulticlassNMSOpMaker : public framework::OpProtoAndCheckerMaker {
public:
MulticlassNMSOpMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Bboxes",
"(Tensor) A 2-D Tensor with shape [M, 4] represents the location "
"predictions with M bboxes. 4 is the number of "
"each location coordinates.");
AddOutput("Scores",
"(Tensor) A 3-D Tensor with shape [N, C, M] represents the "
"confidence predictions. N is the batch size, C is the class "
"number, M is number of predictions for each class, which is "
"the same with Bboxes.");
AddAttr<int64_t>(
"background_label",
"(int64_t, defalut: 0) "
"The index of background label, the background label will be ignored.")
.SetDefault(0);
AddAttr<float>("nms_threshold",
"(float, defalut: 0.3) "
"The threshold to be used in nms.")
.SetDefault(0.3);
AddAttr<int64_t>("nms_top_k",
"(int64_t) "
" .");
AddAttr<float>("nms_eta",
"(float) "
"The parameter for adaptive nms.")
.SetDefault(1.0);
AddAttr<int64_t>("keep_top_k",
"(int64_t) "
".");
AddAttr<float>("confidence_threshold",
"(float) "
".");
AddOutput("Out",
"(LoDTensor) A 2-D LoDTensor with shape [No, 6] represents the "
"detections. Each row has 6 values: "
"[label, confidence, xmin, ymin, xmax, ymax], No is the total "
"number of detections in this mini-batch. For each instance, "
"the offsets in first dimension are called LoD, the number of "
"offset is N + 1, if LoD[i + 1] - LoD[i] == 0, means there is "
"no detected bbox.");
AddComment(R"DOC(
This operators is to do multi-class non maximum suppression (nms) on a batched
of boxes and scores.
This op greedily selects a subset of detection bounding boxes, pruning
away boxes that have high IOU (intersection over union) overlap (> thresh)
with already selected boxes. It operates independently for each class for
which scores are provided (via the scores field of the input box_list),
pruning boxes with score less than a provided threshold prior to
applying NMS.
)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(multiclass_nms, ops::MulticlassNMSOp,
ops::MulticlassNMSOpMaker,
paddle::framework::EmptyGradOpMaker);
REGISTER_OP_CPU_KERNEL(multiclass_nms, ops::MulticlassNMSKernel<float>,
ops::MulticlassNMSKernel<double>);
......@@ -62,7 +62,7 @@ def batch_bipartite_match(distance, lod):
return match_indices, match_dist
class TestBipartiteMatchOpForWithLoD(OpTest):
class TestBipartiteMatchOpWithLoD(OpTest):
def setUp(self):
self.op_type = 'bipartite_match'
lod = [[0, 5, 11, 23]]
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import unittest
import numpy as np
import copy
from op_test import OpTest
def iou(box_a, box_b):
"""Apply intersection-over-union overlap between box_a and box_b
"""
xmin_a = min(box_a[0], box_a[2])
ymin_a = min(box_a[1], box_a[3])
xmax_a = max(box_a[0], box_a[2])
ymax_a = max(box_a[1], box_a[3])
xmin_b = min(box_b[0], box_b[2])
ymin_b = min(box_b[1], box_b[3])
xmax_b = max(box_b[0], box_b[2])
ymax_b = max(box_b[1], box_b[3])
area_a = (ymax_a - ymin_a) * (xmax_a - xmin_a)
area_b = (ymax_b - ymin_b) * (xmax_b - xmin_b)
if area_a <= 0 and area_b <= 0:
return 0.0
xa = max(xmin_a, xmin_b)
ya = max(ymin_a, ymin_b)
xb = min(xmax_a, xmax_b)
yb = min(ymax_a, ymax_b)
inter_area = max(xb - xa, 0.0) * max(yb - ya, 0.0)
box_a_area = (box_a[2] - box_a[0]) * (box_a[3] - box_a[1])
box_b_area = (box_b[2] - box_b[0]) * (box_b[3] - box_b[1])
iou_ratio = inter_area / (area_a + area_b - inter_area)
return iou_ratio
def nms(boxes, scores, score_threshold, nms_threshold, top_k=200, eta=1.0):
"""Apply non-maximum suppression at test time to avoid detecting too many
overlapping bounding boxes for a given object.
Args:
boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
scores: (tensor) The class predscores for the img, Shape:[num_priors].
overlap: (float) The overlap thresh for suppressing unnecessary boxes.
top_k: (int) The Maximum number of box preds to consider.
Return:
The indices of the kept boxes with respect to num_priors.
"""
all_scores = copy.deepcopy(scores)
all_scores = all_scores.flatten()
selected_indices = np.argwhere(all_scores > score_threshold)
selected_indices = selected_indices.flatten()
all_scores = all_scores[selected_indices]
sorted_indices = np.argsort(-all_scores, axis=0)
sorted_scores = all_scores[sorted_indices]
if top_k < -1 and top_k < sorted_indices.shape[0]:
sorted_indices = sorted_indices[:top_k]
sorted_scores = sorted_scores[:top_k]
selected_indices = []
adaptive_threshold = nms_threshold
for i in range(sorted_scores.shape[0]):
idx = sorted_indices[i]
keep = True
for k in range(len(selected_indices)):
if keep:
kept_idx = selected_indices[k]
overlap = iou(boxes[idx], boxes[kept_idx])
keep = overlap <= adaptive_threshold
else:
break
if keep:
selected_indices.append(idx)
if keep and eta < 1 and adaptive_threshold > 0.5:
adaptive_threshold *= eta
return selected_indices
def multiclass_nms(boxes, scores, background, score_threshold, nms_threshold,
nms_top_k, keep_top_k):
class_num = scores.shape[0]
priorbox_num = scores.shape[1]
selected_indices = []
num_det = 0
for c in range(class_num):
if c == background: continue
indices = nms(boxes, scores[c], score_threshold, nms_threshold,
nms_top_k)
selected_indices.append((c, indices))
num_det += len(indices)
if keep_top_k > -1 and num_det > keep_top_k:
score_index = []
for c, indices in selected_indices:
for idx in indices:
score_index.append((scores[c][idx], c, idx))
sorted_score_index = sorted(
score_index, key=lambda tup: tup[0], reverse=True)
sorted_score_index = sorted_score_index[:keep_top_k]
selected_indices = []
for s, c, idx in sorted_score_index:
selected_indices.append((c, idx))
return selected_indices
def batched_multiclass_nms(boxes, scores, background, score_threshold,
nms_threshold, nms_top_k, keep_top_k):
batch_size = scores.shape[0]
det_outs = []
lod = [0]
for n in range(batch_size):
nmsed_outs = multiclass_nms(boxes, scores[n], background,
score_threshold, nms_threshold, nms_top_k,
keep_top_k)
lod.append(lod[-1] + len(nmsed_outs))
if len(nmsed_outs) == 0: continue
for c, indices in nmsed_outs:
for idx in indices:
xmin, ymin, xmax, ymax = boxes[idx][:]
det_outs.append(
(c, scores[n][c][idx], c, xmin, ymin, xmax, ymax))
return det_outs, lod
class TestMulticlassNMSOp(OpTest):
def setUp(self):
self.op_type = 'multiclass_nms'
N = 7
M = 1230
C = 21
BOX_SIZE = 4
background = 0
nms_threshold = 0.3
nms_top_k = 400
keep_top_k = 200
score_threshold = 0.01
scores = np.random.random((N, C, M)).astype('float32')
boxes = np.random.random((M, BOX_SIZE)).astype('float32')
boxes[:, 0:2] = boxes[:, 0:2] * 0.5
boxes[:, 2:4] = boxes[:, 0:2] * 0.5 + 0.5
nmsed_outs, lod = batched_multiclass_nms(boxes, scores, background,
score_threshold, nms_threshold,
nms_top_k, keep_top_k)
self.inputs = {'Bboxes': boxes, 'Scores': scores}
self.outputs = {'Out': (nmsed_outs, [lod])}
def test_check_output(self):
self.check_output()
class TestIOU(unittest.TestCase):
def test_iou(self):
box1 = np.array([4.0, 3.0, 7.0, 5.0]).astype('float32')
box2 = np.array([3.0, 4.0, 6.0, 8.0]).astype('float32')
expt_output = np.array([2.0 / 16.0]).astype('float32')
calc_output = np.array([iou(box1, box2)]).astype('float32')
self.assertTrue(np.allclose(calc_output, expt_output))
if __name__ == '__main__':
unittest.main()
# N = 7
# M = 8
# C = 5
# BOX_SIZE = 4
# background = 0
# nms_threshold = 0.3
# nms_top_k = 400
# keep_top_k = 200
# score_threshold = 0.5
# scores = np.random.random((N, C, M)).astype('float32')
# boxes = np.random.random((M, BOX_SIZE)).astype('float32')
# boxes[:, 0 : 2] = boxes[:, 0 : 2] * 0.5
# boxes[:, 2 : 4] = boxes[:, 0 : 2] * 0.5 + 0.5
# print nmsed_outs, lod
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册