diff --git a/paddle/fluid/operators/detection/CMakeLists.txt b/paddle/fluid/operators/detection/CMakeLists.txt index f1c504d6e4bd065e4221b1207a117ff0f6732459..ffa6953a3d56aabb6db9cd98efe547b706fa1910 100644 --- a/paddle/fluid/operators/detection/CMakeLists.txt +++ b/paddle/fluid/operators/detection/CMakeLists.txt @@ -21,6 +21,7 @@ detection_library(iou_similarity_op SRCS iou_similarity_op.cc iou_similarity_op.cu) detection_library(mine_hard_examples_op SRCS mine_hard_examples_op.cc) detection_library(multiclass_nms_op SRCS multiclass_nms_op.cc poly_util.cc gpc.cc) +detection_library(locality_aware_nms_op SRCS locality_aware_nms_op.cc poly_util.cc gpc.cc) detection_library(prior_box_op SRCS prior_box_op.cc prior_box_op.cu) detection_library(density_prior_box_op SRCS density_prior_box_op.cc density_prior_box_op.cu) detection_library(anchor_generator_op SRCS anchor_generator_op.cc diff --git a/paddle/fluid/operators/detection/locality_aware_nms_op.cc b/paddle/fluid/operators/detection/locality_aware_nms_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..ee0708312ddbbe1b4390e3a96e533c2aa053e60a --- /dev/null +++ b/paddle/fluid/operators/detection/locality_aware_nms_op.cc @@ -0,0 +1,459 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +limitations under the License. */ + +#include +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/detection/nms_util.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +using LoDTensor = framework::LoDTensor; + +class LocalityAwareNMSOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE_EQ(ctx->HasInput("BBoxes"), true, + "Input(BBoxes) of MultiClassNMS should not be null."); + PADDLE_ENFORCE_EQ(ctx->HasInput("Scores"), true, + "Input(Scores) of MultiClassNMS should not be null."); + PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true, + "Output(Out) of MultiClassNMS should not be null."); + + auto box_dims = ctx->GetInputDim("BBoxes"); + auto score_dims = ctx->GetInputDim("Scores"); + auto score_size = score_dims.size(); + + if (ctx->IsRuntime()) { + PADDLE_ENFORCE_EQ(score_size, 3, "The rank of Input(Scores) must be 3"); + PADDLE_ENFORCE_EQ(box_dims.size(), 3, + "The rank of Input(BBoxes) must be 3"); + + PADDLE_ENFORCE_EQ(box_dims[2] == 4 || box_dims[2] == 8 || + box_dims[2] == 16 || box_dims[2] == 24 || + box_dims[2] == 32, + true, + "The last dimension of Input(BBoxes) must be 4 or 8, " + "represents the layout of coordinate " + "[xmin, ymin, xmax, ymax] or " + "4 points: [x1, y1, x2, y2, x3, y3, x4, y4] or " + "8 points: [xi, yi] i= 1,2,...,8 or " + "12 points: [xi, yi] i= 1,2,...,12 or " + "16 points: [xi, yi] i= 1,2,...,16"); + PADDLE_ENFORCE_EQ(box_dims[1], score_dims[2], + "The 2nd dimension of Input(BBoxes) must be equal to " + "last dimension of Input(Scores), which represents the " + "predicted bboxes."); + } + // Here the box_dims[0] is not the real dimension of output. + // It will be rewritten in the computing kernel. + ctx->SetOutputDim("Out", {box_dims[1], box_dims[2] + 2}); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + OperatorWithKernel::IndicateVarDataType(ctx, "Scores"), + platform::CPUPlace()); + } +}; + +template +void PolyWeightedMerge(const T* box1, T* box2, const T score1, const T score2, + const size_t box_size) { + for (size_t i = 0; i < box_size; ++i) { + box2[i] = (box1[i] * score1 + box2[i] * score2) / (score1 + score2); + } +} + +template +void GetMaxScoreIndexWithLocalityAware( + T* scores, T* bbox_data, int64_t box_size, const T threshold, int top_k, + int64_t num_boxes, std::vector>* sorted_indices, + const T nms_threshold, const bool normalized) { + std::vector skip(num_boxes, true); + int index = -1; + for (int64_t i = 0; i < num_boxes; ++i) { + if (index > -1) { + T overlap = T(0.); + if (box_size == 4) { + overlap = JaccardOverlap(bbox_data + i * box_size, + bbox_data + index * box_size, normalized); + } + // 8: [x1 y1 x2 y2 x3 y3 x4 y4] or 16, 24, 32 + if (box_size == 8 || box_size == 16 || box_size == 24 || box_size == 32) { + overlap = + PolyIoU(bbox_data + i * box_size, bbox_data + index * box_size, + box_size, normalized); + } + + if (overlap > nms_threshold) { + PolyWeightedMerge(bbox_data + i * box_size, + bbox_data + index * box_size, scores[i], + scores[index], box_size); + scores[index] += scores[i]; + } else { + skip[index] = false; + index = i; + } + } else { + index = i; + } + } + + if (index > -1) { + skip[index] = false; + } + for (int64_t i = 0; i < num_boxes; ++i) { + if (scores[i] > threshold && skip[i] == false) { + sorted_indices->push_back(std::make_pair(scores[i], i)); + } + } + // Sort the score pair according to the scores in descending order + std::stable_sort(sorted_indices->begin(), sorted_indices->end(), + SortScorePairDescend); + // Keep top_k scores if needed. + if (top_k > -1 && top_k < static_cast(sorted_indices->size())) { + sorted_indices->resize(top_k); + } +} + +template +class LocalityAwareNMSKernel : public framework::OpKernel { + public: + void LocalityAwareNMSFast(Tensor* bbox, Tensor* scores, + const T score_threshold, const T nms_threshold, + const T eta, const int64_t top_k, + std::vector* selected_indices, + const bool normalized) const { + // The total boxes for each instance. + int64_t num_boxes = bbox->dims()[0]; + // 4: [xmin ymin xmax ymax] + // 8: [x1 y1 x2 y2 x3 y3 x4 y4] + // 16, 24, or 32: [x1 y1 x2 y2 ... xn yn], n = 8, 12 or 16 + int64_t box_size = bbox->dims()[1]; + + std::vector> sorted_indices; + T adaptive_threshold = nms_threshold; + T* bbox_data = bbox->data(); + T* scores_data = scores->data(); + + GetMaxScoreIndexWithLocalityAware( + scores_data, bbox_data, box_size, score_threshold, top_k, num_boxes, + &sorted_indices, nms_threshold, normalized); + + selected_indices->clear(); + + while (sorted_indices.size() != 0) { + const int idx = sorted_indices.front().second; + bool keep = true; + for (size_t k = 0; k < selected_indices->size(); ++k) { + if (keep) { + const int kept_idx = (*selected_indices)[k]; + T overlap = T(0.); + // 4: [xmin ymin xmax ymax] + if (box_size == 4) { + overlap = + JaccardOverlap(bbox_data + idx * box_size, + bbox_data + kept_idx * box_size, normalized); + } + // 8: [x1 y1 x2 y2 x3 y3 x4 y4] or 16, 24, 32 + if (box_size == 8 || box_size == 16 || box_size == 24 || + box_size == 32) { + overlap = PolyIoU(bbox_data + idx * box_size, + bbox_data + kept_idx * box_size, box_size, + normalized); + } + keep = overlap <= adaptive_threshold; + } else { + break; + } + } + if (keep) { + selected_indices->push_back(idx); + } + sorted_indices.erase(sorted_indices.begin()); + if (keep && eta < 1 && adaptive_threshold > 0.5) { + adaptive_threshold *= eta; + } + } + // delete bbox_data; + } + + void LocalityAwareNMS(const framework::ExecutionContext& ctx, Tensor* scores, + Tensor* bboxes, const int scores_size, + std::map>* indices, + int* num_nmsed_out) const { + int64_t background_label = ctx.Attr("background_label"); + int64_t nms_top_k = ctx.Attr("nms_top_k"); + int64_t keep_top_k = ctx.Attr("keep_top_k"); + bool normalized = ctx.Attr("normalized"); + T nms_threshold = static_cast(ctx.Attr("nms_threshold")); + T nms_eta = static_cast(ctx.Attr("nms_eta")); + T score_threshold = static_cast(ctx.Attr("score_threshold")); + + int num_det = 0; + + int64_t class_num = scores->dims()[0]; + Tensor bbox_slice, score_slice; + for (int64_t c = 0; c < class_num; ++c) { + if (c == background_label) continue; + + score_slice = scores->Slice(c, c + 1); + bbox_slice = *bboxes; + + LocalityAwareNMSFast(&bbox_slice, &score_slice, score_threshold, + nms_threshold, nms_eta, nms_top_k, &((*indices)[c]), + normalized); + num_det += (*indices)[c].size(); + } + + *num_nmsed_out = num_det; + const T* scores_data = scores->data(); + if (keep_top_k > -1 && num_det > keep_top_k) { + const T* sdata; + std::vector>> score_index_pairs; + for (const auto& it : *indices) { + int label = it.first; + + sdata = scores_data + label * scores->dims()[1]; + + const std::vector& label_indices = it.second; + for (size_t j = 0; j < label_indices.size(); ++j) { + int idx = label_indices[j]; + score_index_pairs.push_back( + std::make_pair(sdata[idx], std::make_pair(label, idx))); + } + } + // Keep top k results per image. + std::stable_sort(score_index_pairs.begin(), score_index_pairs.end(), + SortScorePairDescend>); + score_index_pairs.resize(keep_top_k); + + // Store the new indices. + std::map> new_indices; + for (size_t j = 0; j < score_index_pairs.size(); ++j) { + int label = score_index_pairs[j].second.first; + int idx = score_index_pairs[j].second.second; + new_indices[label].push_back(idx); + } + + new_indices.swap(*indices); + *num_nmsed_out = keep_top_k; + } + } + + void LocalityAwareNMSOutput( + const platform::DeviceContext& ctx, const Tensor& scores, + const Tensor& bboxes, + const std::map>& selected_indices, + const int scores_size, Tensor* outs, int* oindices = nullptr, + const int offset = 0) const { + int64_t predict_dim = scores.dims()[1]; + int64_t box_size = bboxes.dims()[1]; + if (scores_size == 2) { + box_size = bboxes.dims()[2]; + } + int64_t out_dim = box_size + 2; + auto* scores_data = scores.data(); + auto* bboxes_data = bboxes.data(); + auto* odata = outs->data(); + const T* sdata; + Tensor bbox; + bbox.Resize({scores.dims()[0], box_size}); + int count = 0; + for (const auto& it : selected_indices) { + int label = it.first; + const std::vector& indices = it.second; + sdata = scores_data + label * predict_dim; + for (size_t j = 0; j < indices.size(); ++j) { + int idx = indices[j]; + + odata[count * out_dim] = label; // label + const T* bdata; + bdata = bboxes_data + idx * box_size; + odata[count * out_dim + 1] = sdata[idx]; // score + if (oindices != nullptr) { + oindices[count] = offset + idx; + } + + // xmin, ymin, xmax, ymax or multi-points coordinates + std::memcpy(odata + count * out_dim + 2, bdata, box_size * sizeof(T)); + count++; + } + } + } + + void Compute(const framework::ExecutionContext& ctx) const override { + auto* boxes_input = ctx.Input("BBoxes"); + auto* scores_input = ctx.Input("Scores"); + auto* outs = ctx.Output("Out"); + auto score_dims = scores_input->dims(); + auto score_size = score_dims.size(); + auto& dev_ctx = ctx.template device_context(); + + LoDTensor scores; + LoDTensor boxes; + TensorCopySync(*scores_input, platform::CPUPlace(), &scores); + TensorCopySync(*boxes_input, platform::CPUPlace(), &boxes); + std::vector>> all_indices; + std::vector batch_starts = {0}; + int64_t batch_size = score_dims[0]; + int64_t box_dim = boxes.dims()[2]; + int64_t out_dim = box_dim + 2; + int num_nmsed_out = 0; + Tensor boxes_slice, scores_slice; + int n = batch_size; + for (int i = 0; i < n; ++i) { + scores_slice = scores.Slice(i, i + 1); + scores_slice.Resize({score_dims[1], score_dims[2]}); + boxes_slice = boxes.Slice(i, i + 1); + boxes_slice.Resize({score_dims[2], box_dim}); + + std::map> indices; + LocalityAwareNMS(ctx, &scores_slice, &boxes_slice, score_size, &indices, + &num_nmsed_out); + all_indices.push_back(indices); + batch_starts.push_back(batch_starts.back() + num_nmsed_out); + } + + int num_kept = batch_starts.back(); + if (num_kept == 0) { + T* od = outs->mutable_data({1, 1}, ctx.GetPlace()); + od[0] = -1; + batch_starts = {0, 1}; + } else { + outs->mutable_data({num_kept, out_dim}, ctx.GetPlace()); + int offset = 0; + int* oindices = nullptr; + for (int i = 0; i < n; ++i) { + scores_slice = scores.Slice(i, i + 1); + boxes_slice = boxes.Slice(i, i + 1); + scores_slice.Resize({score_dims[1], score_dims[2]}); + boxes_slice.Resize({score_dims[2], box_dim}); + + int64_t s = batch_starts[i]; + int64_t e = batch_starts[i + 1]; + if (e > s) { + Tensor out = outs->Slice(s, e); + LocalityAwareNMSOutput(dev_ctx, scores_slice, boxes_slice, + all_indices[i], score_dims.size(), &out, + oindices, offset); + } + } + } + + framework::LoD lod; + lod.emplace_back(batch_starts); + outs->set_lod(lod); + } +}; + +class LocalityAwareNMSOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("BBoxes", + "Two types of bboxes are supported:" + "1. (Tensor) A 3-D Tensor with shape " + "[N, M, 4 or 8 16 24 32] represents the " + "predicted locations of M bounding bboxes, N is the batch size. " + "Each bounding box has four coordinate values and the layout is " + "[xmin, ymin, xmax, ymax], when box size equals to 4."); + AddInput("Scores", + "Two types of scores are supported:" + "1. (Tensor) A 3-D Tensor with shape [N, C, M] represents the " + "predicted confidence predictions. N is the batch size, C is the " + "class number, M is number of bounding boxes. For each category " + "there are total M scores which corresponding M bounding boxes. " + " Please note, M is equal to the 2nd dimension of BBoxes. "); + AddAttr( + "background_label", + "(int, default: -1) " + "The index of background label, the background label will be ignored. " + "If set to -1, then all categories will be considered.") + .SetDefault(-1); + AddAttr("score_threshold", + "(float) " + "Threshold to filter out bounding boxes with low " + "confidence score. If not provided, consider all boxes."); + AddAttr("nms_top_k", + "(int64_t) " + "Maximum number of detections to be kept according to the " + "confidences aftern the filtering detections based on " + "score_threshold"); + AddAttr("nms_threshold", + "(float, default: 0.3) " + "The threshold to be used in NMS.") + .SetDefault(0.3); + AddAttr("nms_eta", + "(float) " + "The parameter for adaptive NMS.") + .SetDefault(1.0); + AddAttr("keep_top_k", + "(int64_t) " + "Number of total bboxes to be kept per image after NMS " + "step. -1 means keeping all bboxes after NMS step."); + AddAttr("normalized", + "(bool, default true) " + "Whether detections are normalized.") + .SetDefault(true); + AddOutput("Out", + "(LoDTensor) A 2-D LoDTensor with shape [No, 6] represents the " + "detections. Each row has 6 values: " + "[label, confidence, xmin, ymin, xmax, ymax] or " + "(LoDTensor) A 2-D LoDTensor with shape [No, 10] represents the " + "detections. Each row has 10 values: " + "[label, confidence, x1, y1, x2, y2, x3, y3, x4, y4]. No is the " + "total number of detections in this mini-batch." + "For each instance, " + "the offsets in first dimension are called LoD, the number of " + "offset is N + 1, if LoD[i + 1] - LoD[i] == 0, means there is " + "no detected bbox."); + AddComment(R"DOC( +This operator is to do locality-aware non maximum suppression (NMS) on a batched +of boxes and scores. +Firstly, this operator merge box and score according their IOU(intersection over union). +In the NMS step, this operator greedily selects a subset of detection bounding +boxes that have high scores larger than score_threshold, if providing this +threshold, then selects the largest nms_top_k confidences scores if nms_top_k +is larger than -1. Then this operator pruns away boxes that have high IOU +(intersection over union) overlap with already selected boxes by adaptive +threshold NMS based on parameters of nms_threshold and nms_eta. +Aftern NMS step, at most keep_top_k number of total bboxes are to be kept +per image if keep_top_k is larger than -1. +This operator support multi-class and batched inputs. It applying NMS +independently for each class. The outputs is a 2-D LoDTenosr, for each +image, the offsets in first dimension of LoDTensor are called LoD, the number +of offset is N + 1, where N is the batch size. If LoD[i + 1] - LoD[i] == 0, +means there is no detected bbox for this image. + +Please get more information from the following papers: +https://arxiv.org/abs/1704.03155. +)DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR( + locality_aware_nms, ops::LocalityAwareNMSOp, ops::LocalityAwareNMSOpMaker, + paddle::framework::EmptyGradOpMaker, + paddle::framework::EmptyGradOpMaker); +REGISTER_OP_CPU_KERNEL(locality_aware_nms, ops::LocalityAwareNMSKernel, + ops::LocalityAwareNMSKernel); diff --git a/paddle/fluid/operators/detection/multiclass_nms_op.cc b/paddle/fluid/operators/detection/multiclass_nms_op.cc index eb9a3c704833abdf5327815b5f15d13bbb8a2a8f..62d6bb3ac15809919157f228ae058c68dd5355f2 100644 --- a/paddle/fluid/operators/detection/multiclass_nms_op.cc +++ b/paddle/fluid/operators/detection/multiclass_nms_op.cc @@ -13,7 +13,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/detection/poly_util.h" +#include "paddle/fluid/operators/detection/nms_util.h" namespace paddle { namespace operators { @@ -85,84 +85,6 @@ class MultiClassNMSOp : public framework::OperatorWithKernel { } }; -template -bool SortScorePairDescend(const std::pair& pair1, - const std::pair& pair2) { - return pair1.first > pair2.first; -} - -template -static inline void GetMaxScoreIndex( - const std::vector& scores, const T threshold, int top_k, - std::vector>* sorted_indices) { - for (size_t i = 0; i < scores.size(); ++i) { - if (scores[i] > threshold) { - sorted_indices->push_back(std::make_pair(scores[i], i)); - } - } - // Sort the score pair according to the scores in descending order - std::stable_sort(sorted_indices->begin(), sorted_indices->end(), - SortScorePairDescend); - // Keep top_k scores if needed. - if (top_k > -1 && top_k < static_cast(sorted_indices->size())) { - sorted_indices->resize(top_k); - } -} - -template -static inline T BBoxArea(const T* box, const bool normalized) { - if (box[2] < box[0] || box[3] < box[1]) { - // If coordinate values are is invalid - // (e.g. xmax < xmin or ymax < ymin), return 0. - return static_cast(0.); - } else { - const T w = box[2] - box[0]; - const T h = box[3] - box[1]; - if (normalized) { - return w * h; - } else { - // If coordinate values are not within range [0, 1]. - return (w + 1) * (h + 1); - } - } -} - -template -static inline T JaccardOverlap(const T* box1, const T* box2, - const bool normalized) { - if (box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] || - box2[3] < box1[1]) { - return static_cast(0.); - } else { - const T inter_xmin = std::max(box1[0], box2[0]); - const T inter_ymin = std::max(box1[1], box2[1]); - const T inter_xmax = std::min(box1[2], box2[2]); - const T inter_ymax = std::min(box1[3], box2[3]); - T norm = normalized ? static_cast(0.) : static_cast(1.); - T inter_w = inter_xmax - inter_xmin + norm; - T inter_h = inter_ymax - inter_ymin + norm; - const T inter_area = inter_w * inter_h; - const T bbox1_area = BBoxArea(box1, normalized); - const T bbox2_area = BBoxArea(box2, normalized); - return inter_area / (bbox1_area + bbox2_area - inter_area); - } -} - -template -T PolyIoU(const T* box1, const T* box2, const size_t box_size, - const bool normalized) { - T bbox1_area = PolyArea(box1, box_size, normalized); - T bbox2_area = PolyArea(box2, box_size, normalized); - T inter_area = PolyOverlapArea(box1, box2, box_size, normalized); - if (bbox1_area == 0 || bbox2_area == 0 || inter_area == 0) { - // If coordinate values are invalid - // if area size <= 0, return 0. - return T(0.); - } else { - return inter_area / (bbox1_area + bbox2_area - inter_area); - } -} - template void SliceOneClass(const platform::DeviceContext& ctx, const framework::Tensor& items, const int class_id, diff --git a/paddle/fluid/operators/detection/nms_util.h b/paddle/fluid/operators/detection/nms_util.h new file mode 100644 index 0000000000000000000000000000000000000000..067bfce51949c7526ebe87bb51722327691db555 --- /dev/null +++ b/paddle/fluid/operators/detection/nms_util.h @@ -0,0 +1,103 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include +#include +#include "paddle/fluid/operators/detection/poly_util.h" + +namespace paddle { +namespace operators { + +template +bool SortScorePairDescend(const std::pair& pair1, + const std::pair& pair2) { + return pair1.first > pair2.first; +} + +template +static inline void GetMaxScoreIndex( + const std::vector& scores, const T threshold, int top_k, + std::vector>* sorted_indices) { + for (size_t i = 0; i < scores.size(); ++i) { + if (scores[i] > threshold) { + sorted_indices->push_back(std::make_pair(scores[i], i)); + } + } + // Sort the score pair according to the scores in descending order + std::stable_sort(sorted_indices->begin(), sorted_indices->end(), + SortScorePairDescend); + // Keep top_k scores if needed. + if (top_k > -1 && top_k < static_cast(sorted_indices->size())) { + sorted_indices->resize(top_k); + } +} + +template +static inline T BBoxArea(const T* box, const bool normalized) { + if (box[2] < box[0] || box[3] < box[1]) { + // If coordinate values are is invalid + // (e.g. xmax < xmin or ymax < ymin), return 0. + return static_cast(0.); + } else { + const T w = box[2] - box[0]; + const T h = box[3] - box[1]; + if (normalized) { + return w * h; + } else { + // If coordinate values are not within range [0, 1]. + return (w + 1) * (h + 1); + } + } +} + +template +static inline T JaccardOverlap(const T* box1, const T* box2, + const bool normalized) { + if (box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] || + box2[3] < box1[1]) { + return static_cast(0.); + } else { + const T inter_xmin = std::max(box1[0], box2[0]); + const T inter_ymin = std::max(box1[1], box2[1]); + const T inter_xmax = std::min(box1[2], box2[2]); + const T inter_ymax = std::min(box1[3], box2[3]); + T norm = normalized ? static_cast(0.) : static_cast(1.); + T inter_w = inter_xmax - inter_xmin + norm; + T inter_h = inter_ymax - inter_ymin + norm; + const T inter_area = inter_w * inter_h; + const T bbox1_area = BBoxArea(box1, normalized); + const T bbox2_area = BBoxArea(box2, normalized); + return inter_area / (bbox1_area + bbox2_area - inter_area); + } +} + +template +T PolyIoU(const T* box1, const T* box2, const size_t box_size, + const bool normalized) { + T bbox1_area = PolyArea(box1, box_size, normalized); + T bbox2_area = PolyArea(box2, box_size, normalized); + T inter_area = PolyOverlapArea(box1, box2, box_size, normalized); + if (bbox1_area == 0 || bbox2_area == 0 || inter_area == 0) { + // If coordinate values are invalid + // if area size <= 0, return 0. + return T(0.); + } else { + return inter_area / (bbox1_area + bbox2_area - inter_area); + } +} + +} // namespace operators +} // namespace paddle diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index e84510292f26a2732a70695b08948fcdf2c22cc7..fd691348c65228af290e4b44dd444400ab246175 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -53,6 +53,7 @@ __all__ = [ 'yolo_box', 'box_clip', 'multiclass_nms', + 'locality_aware_nms', 'retinanet_detection_output', 'distribute_fpn_proposals', 'box_decoder_and_assign', @@ -3147,6 +3148,124 @@ def multiclass_nms(bboxes, return output +def locality_aware_nms(bboxes, + scores, + score_threshold, + nms_top_k, + keep_top_k, + nms_threshold=0.3, + normalized=True, + nms_eta=1., + background_label=-1, + name=None): + """ + **Local Aware NMS** + + `Local Aware NMS `_ is to do locality-aware non maximum + suppression (LANMS) on boxes and scores. + + Firstly, this operator merge box and score according their IOU + (intersection over union). In the NMS step, this operator greedily selects a + subset of detection bounding boxes that have high scores larger than score_threshold, + if providing this threshold, then selects the largest nms_top_k confidences scores + if nms_top_k is larger than -1. Then this operator pruns away boxes that have high + IOU overlap with already selected boxes by adaptive threshold NMS based on parameters + of nms_threshold and nms_eta. + + Aftern NMS step, at most keep_top_k number of total bboxes are to be kept + per image if keep_top_k is larger than -1. + + Args: + bboxes (Variable): A 3-D Tensor with shape [N, M, 4 or 8 16 24 32] + represents the predicted locations of M bounding + bboxes, N is the batch size. Each bounding box + has four coordinate values and the layout is + [xmin, ymin, xmax, ymax], when box size equals to 4. + The data type is float32 or float64. + scores (Variable): A 3-D Tensor with shape [N, C, M] represents the + predicted confidence predictions. N is the batch + size, C is the class number, M is number of bounding + boxes. Now only support 1 class. For each category + there are total M scores which corresponding M bounding + boxes. Please note, M is equal to the 2nd dimension of + BBoxes. The data type is float32 or float64. + background_label (int): The index of background label, the background + label will be ignored. If set to -1, then all + categories will be considered. Default: -1 + score_threshold (float): Threshold to filter out bounding boxes with + low confidence score. If not provided, + consider all boxes. + nms_top_k (int): Maximum number of detections to be kept according to + the confidences aftern the filtering detections based + on score_threshold. + nms_threshold (float): The threshold to be used in NMS. Default: 0.3 + nms_eta (float): The threshold to be used in NMS. Default: 1.0 + keep_top_k (int): Number of total bboxes to be kept per image after NMS + step. -1 means keeping all bboxes after NMS step. + normalized (bool): Whether detections are normalized. Default: True + name(str): Name of the locality aware nms op, please refer to :ref:`api_guide_Name` . + Default: None. + + Returns: + Variable: A 2-D LoDTensor with shape [No, 6] represents the detections. + Each row has 6 values: [label, confidence, xmin, ymin, xmax, ymax] + or A 2-D LoDTensor with shape [No, 10] represents the detections. + Each row has 10 values: + [label, confidence, x1, y1, x2, y2, x3, y3, x4, y4]. No is the + total number of detections. If there is no detected boxes for all + images, lod will be set to {1} and Out only contains one value + which is -1. + (After version 1.3, when no boxes detected, the lod is changed + from {0} to {1}). The data type is float32 or float64. + + + Examples: + .. code-block:: python + + + import paddle.fluid as fluid + boxes = fluid.data(name='bboxes', shape=[None, 81, 8], + dtype='float32') + scores = fluid.data(name='scores', shape=[None, 1, 81], + dtype='float32') + out = fluid.layers.locality_aware_nms(bboxes=boxes, + scores=scores, + score_threshold=0.5, + nms_top_k=400, + nms_threshold=0.3, + keep_top_k=200, + normalized=False) + """ + shape = scores.shape + assert len(shape) == 3, "dim size of scores must be 3" + assert shape[ + 1] == 1, "locality_aware_nms only support one class, Tensor score shape must be [N, 1, M]" + + helper = LayerHelper('locality_aware_nms', **locals()) + + output = helper.create_variable_for_type_inference(dtype=bboxes.dtype) + out = {'Out': output} + + helper.append_op( + type="locality_aware_nms", + inputs={'BBoxes': bboxes, + 'Scores': scores}, + attrs={ + 'background_label': background_label, + 'score_threshold': score_threshold, + 'nms_top_k': nms_top_k, + 'nms_threshold': nms_threshold, + 'nms_eta': nms_eta, + 'keep_top_k': keep_top_k, + 'nms_eta': nms_eta, + 'normalized': normalized + }, + outputs={'Out': output}) + output.stop_gradient = True + + return output + + def distribute_fpn_proposals(fpn_rois, min_level, max_level, diff --git a/python/paddle/fluid/tests/unittests/test_locality_aware_nms_op.py b/python/paddle/fluid/tests/unittests/test_locality_aware_nms_op.py new file mode 100644 index 0000000000000000000000000000000000000000..1c8526f4df05be9779ce17bdec184b4b8c6dd1bd --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_locality_aware_nms_op.py @@ -0,0 +1,323 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import print_function +import unittest +import numpy as np +import copy +from op_test import OpTest +from test_multiclass_nms_op import iou +import paddle.fluid as fluid + + +def weight_merge(box1, box2, score1, score2): + for i in range(len(box1)): + box2[i] = (box1[i] * score1 + box2[i] * score2) / (score1 + score2) + + +def nms(boxes, + scores, + score_threshold, + nms_threshold, + top_k=200, + normalized=True, + eta=1.0): + """Apply non-maximum suppression at test time to avoid detecting too many + overlapping bounding boxes for a given object. + Args: + boxes: (tensor) The location preds for the img, Shape: [num_priors,4]. + scores: (tensor) The class predscores for the img, Shape:[num_priors]. + score_threshold: (float) The confidence thresh for filtering low + confidence boxes. + nms_threshold: (float) The overlap thresh for suppressing unnecessary + boxes. + top_k: (int) The maximum number of box preds to consider. + eta: (float) The parameter for adaptive NMS. + Return: + The indices of the kept boxes with respect to num_priors. + """ + index = -1 + for i in range(boxes.shape[0]): + if index > -1 and iou(boxes[i], boxes[index], + normalized) > nms_threshold: + weight_merge(boxes[i], boxes[index], scores[i], scores[index]) + scores[index] += scores[i] + scores[i] = score_threshold - 1. + else: + index = i + + all_scores = copy.deepcopy(scores) + all_scores = all_scores.flatten() + + selected_indices = np.argwhere(all_scores > score_threshold) + selected_indices = selected_indices.flatten() + all_scores = all_scores[selected_indices] + + sorted_indices = np.argsort(-all_scores, axis=0, kind='mergesort') + sorted_scores = all_scores[sorted_indices] + sorted_indices = selected_indices[sorted_indices] + + if top_k > -1 and top_k < sorted_indices.shape[0]: + sorted_indices = sorted_indices[:top_k] + sorted_scores = sorted_scores[:top_k] + + selected_indices = [] + adaptive_threshold = nms_threshold + for i in range(sorted_scores.shape[0]): + idx = sorted_indices[i] + keep = True + for k in range(len(selected_indices)): + if keep: + kept_idx = selected_indices[k] + overlap = iou(boxes[idx], boxes[kept_idx], normalized) + keep = True if overlap <= adaptive_threshold else False + else: + break + if keep: + selected_indices.append(idx) + if keep and eta < 1 and adaptive_threshold > 0.5: + adaptive_threshold *= eta + return selected_indices + + +def multiclass_nms(boxes, scores, background, score_threshold, nms_threshold, + nms_top_k, keep_top_k, normalized, shared): + if shared: + class_num = scores.shape[0] + priorbox_num = scores.shape[1] + else: + box_num = scores.shape[0] + class_num = scores.shape[1] + + selected_indices = {} + num_det = 0 + for c in range(class_num): + if c == background: continue + if shared: + indices = nms(boxes, scores[c], score_threshold, nms_threshold, + nms_top_k, normalized) + else: + indices = nms(boxes[:, c, :], scores[:, c], score_threshold, + nms_threshold, nms_top_k, normalized) + selected_indices[c] = indices + num_det += len(indices) + + if keep_top_k > -1 and num_det > keep_top_k: + score_index = [] + for c, indices in selected_indices.items(): + for idx in indices: + if shared: + score_index.append((scores[c][idx], c, idx)) + else: + score_index.append((scores[idx][c], c, idx)) + + sorted_score_index = sorted( + score_index, key=lambda tup: tup[0], reverse=True) + sorted_score_index = sorted_score_index[:keep_top_k] + selected_indices = {} + + for _, c, _ in sorted_score_index: + selected_indices[c] = [] + for s, c, idx in sorted_score_index: + selected_indices[c].append(idx) + if not shared: + for labels in selected_indices: + selected_indices[labels].sort() + num_det = keep_top_k + + return selected_indices, num_det + + +def batched_multiclass_nms(boxes, + scores, + background, + score_threshold, + nms_threshold, + nms_top_k, + keep_top_k, + normalized=True): + batch_size = scores.shape[0] + num_boxes = scores.shape[2] + det_outs = [] + + lod = [] + for n in range(batch_size): + nmsed_outs, nmsed_num = multiclass_nms( + boxes[n], + scores[n], + background, + score_threshold, + nms_threshold, + nms_top_k, + keep_top_k, + normalized, + shared=True) + lod.append(nmsed_num) + + if nmsed_num == 0: + continue + tmp_det_out = [] + for c, indices in nmsed_outs.items(): + for idx in indices: + xmin, ymin, xmax, ymax = boxes[n][idx][:] + tmp_det_out.append([ + c, scores[n][c][idx], xmin, ymin, xmax, ymax, + idx + n * num_boxes + ]) + sorted_det_out = sorted( + tmp_det_out, key=lambda tup: tup[0], reverse=False) + det_outs.extend(sorted_det_out) + return det_outs, lod + + +class TestLocalAwareNMSOp(OpTest): + def set_argument(self): + self.score_threshold = 0.01 + + def setUp(self): + self.set_argument() + N = 10 + M = 1200 + C = 1 + BOX_SIZE = 4 + background = -1 + nms_threshold = 0.3 + nms_top_k = 400 + keep_top_k = 10 + score_threshold = self.score_threshold + + scores = np.random.random((N * M, C)).astype('float32') + + def softmax(x): + shiftx = x - np.max(x).clip(-64.) + exps = np.exp(shiftx) + return exps / np.sum(exps) + + scores = np.apply_along_axis(softmax, 1, scores) + scores = np.reshape(scores, (N, M, C)) + scores = np.transpose(scores, (0, 2, 1)) + + boxes = np.random.random((N, M, BOX_SIZE)).astype('float32') + boxes[:, :, 0:2] = boxes[:, :, 0:2] * 0.5 + boxes[:, :, 2:4] = boxes[:, :, 2:4] * 0.5 + 0.5 + + boxes_copy = copy.deepcopy(boxes) + scores_copy = copy.deepcopy(scores) + det_outs, lod = batched_multiclass_nms( + boxes_copy, scores_copy, background, score_threshold, nms_threshold, + nms_top_k, keep_top_k) + + lod = [1] if not det_outs else lod + det_outs = [[-1, 0]] if not det_outs else det_outs + det_outs = np.array(det_outs) + nmsed_outs = det_outs[:, :-1].astype('float32') + + self.op_type = 'locality_aware_nms' + self.inputs = {'BBoxes': boxes, 'Scores': scores} + self.outputs = {'Out': (nmsed_outs, [lod])} + self.attrs = { + 'background_label': background, + 'nms_threshold': nms_threshold, + 'nms_top_k': nms_top_k, + 'keep_top_k': keep_top_k, + 'score_threshold': score_threshold, + 'nms_eta': 1.0, + 'normalized': True, + } + + def test_check_output(self): + self.check_output() + + +class TestLocalAwareNMSOpNoBoxes(TestLocalAwareNMSOp): + def set_argument(self): + self.score_threshold = 2.0 + + +class TestLocalAwareNMSOp4Points(OpTest): + def set_argument(self): + self.score_threshold = 0.01 + + def setUp(self): + self.set_argument() + N = 2 + M = 2 + C = 1 + BOX_SIZE = 8 + nms_top_k = 400 + keep_top_k = 200 + nms_threshold = 0.3 + score_threshold = self.score_threshold + + scores = np.array([[[0.76319082, 0.73770091]], + [[0.68513154, 0.45952697]]]) + boxes = np.array([[[ + 0.42078365, 0.58117018, 2.92776169, 3.28557757, 4.24344318, + 0.92196165, 2.72370856, -1.66141214 + ], [ + 0.13856006, 1.86871034, 2.81287224, 3.61381734, 4.5505249, + 0.51766346, 2.75630304, -1.91459389 + ]], [[ + 1.57533883, 1.3217477, 3.07904942, 3.89512545, 4.78680923, + 1.96914586, 3.539482, -1.59739244 + ], [ + 0.55084125, 1.71596215, 2.52476074, 3.18940435, 5.09035159, + 0.91959482, 3.71442385, -0.57299128 + ]]]) + + det_outs = np.array([[ + 0., 1.5008917, 0.28206837, 1.2140071, 2.8712926, 3.4469104, + 4.3943763, 0.7232457, 2.7397292, -1.7858533 + ], [ + 0., 1.1446586, 1.1640508, 1.4800063, 2.856528, 3.6118112, 4.908667, + 1.5478, 3.609713, -1.1861432 + ]]) + lod = [1, 1] + nmsed_outs = det_outs.astype('float32') + + self.op_type = 'locality_aware_nms' + self.inputs = { + 'BBoxes': boxes.astype('float32'), + 'Scores': scores.astype('float32') + } + self.outputs = {'Out': (nmsed_outs, [lod])} + self.attrs = { + 'score_threshold': score_threshold, + 'nms_threshold': nms_threshold, + 'nms_top_k': nms_top_k, + 'keep_top_k': keep_top_k, + 'background_label': -1, + 'normalized': False + } + + def test_check_output(self): + self.check_output() + + +class TestLocalityAwareNMSAPI(OpTest): + def test_api(self): + boxes = fluid.data(name='bboxes', shape=[None, 81, 8], dtype='float32') + scores = fluid.data(name='scores', shape=[None, 1, 81], dtype='float32') + fluid.layers.locality_aware_nms( + bboxes=boxes, + scores=scores, + score_threshold=0.5, + nms_top_k=400, + nms_threshold=0.3, + keep_top_k=200, + normalized=False) + + +if __name__ == '__main__': + unittest.main()