diff --git a/paddle/fluid/operators/detection/CMakeLists.txt b/paddle/fluid/operators/detection/CMakeLists.txt index 75a26ac2bdd077260954f0947ea419115aacea00..537063640e4ef6e49f7b991482f0f3122ecef02f 100644 --- a/paddle/fluid/operators/detection/CMakeLists.txt +++ b/paddle/fluid/operators/detection/CMakeLists.txt @@ -32,6 +32,7 @@ detection_library(rpn_target_assign_op SRCS rpn_target_assign_op.cc) detection_library(generate_proposal_labels_op SRCS generate_proposal_labels_op.cc) detection_library(multiclass_nms_op SRCS multiclass_nms_op.cc DEPS gpc) detection_library(locality_aware_nms_op SRCS locality_aware_nms_op.cc DEPS gpc) +detection_library(matrix_nms_op SRCS matrix_nms_op.cc DEPS gpc) detection_library(box_clip_op SRCS box_clip_op.cc box_clip_op.cu) detection_library(yolov3_loss_op SRCS yolov3_loss_op.cc) detection_library(yolo_box_op SRCS yolo_box_op.cc yolo_box_op.cu) diff --git a/paddle/fluid/operators/detection/matrix_nms_op.cc b/paddle/fluid/operators/detection/matrix_nms_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..f7d45bc85bf6b14f07e9bfda3615e0b2d51a09f1 --- /dev/null +++ b/paddle/fluid/operators/detection/matrix_nms_op.cc @@ -0,0 +1,389 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +limitations under the License. */ + +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/detection/nms_util.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +using LoDTensor = framework::LoDTensor; + +class MatrixNMSOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + OP_INOUT_CHECK(ctx->HasInput("BBoxes"), "Input", "BBoxes", "MatrixNMS"); + OP_INOUT_CHECK(ctx->HasInput("Scores"), "Input", "Scores", "MatrixNMS"); + OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "MatrixNMS"); + auto box_dims = ctx->GetInputDim("BBoxes"); + auto score_dims = ctx->GetInputDim("Scores"); + auto score_size = score_dims.size(); + + if (ctx->IsRuntime()) { + PADDLE_ENFORCE_EQ(score_size == 3, true, + platform::errors::InvalidArgument( + "The rank of Input(Scores) must be 3. " + "But received rank = %d.", + score_size)); + PADDLE_ENFORCE_EQ(box_dims.size(), 3, + platform::errors::InvalidArgument( + "The rank of Input(BBoxes) must be 3." + "But received rank = %d.", + box_dims.size())); + PADDLE_ENFORCE_EQ(box_dims[2] == 4, true, + platform::errors::InvalidArgument( + "The last dimension of Input (BBoxes) must be 4, " + "represents the layout of coordinate " + "[xmin, ymin, xmax, ymax].")); + PADDLE_ENFORCE_EQ( + box_dims[1], score_dims[2], + platform::errors::InvalidArgument( + "The 2nd dimension of Input(BBoxes) must be equal to " + "last dimension of Input(Scores), which represents the " + "predicted bboxes." + "But received box_dims[1](%s) != socre_dims[2](%s)", + box_dims[1], score_dims[2])); + } + ctx->SetOutputDim("Out", {box_dims[1], box_dims[2] + 2}); + ctx->SetOutputDim("Index", {box_dims[1], 1}); + if (!ctx->IsRuntime()) { + ctx->SetLoDLevel("Out", std::max(ctx->GetLoDLevel("BBoxes"), 1)); + ctx->SetLoDLevel("Index", std::max(ctx->GetLoDLevel("BBoxes"), 1)); + } + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + OperatorWithKernel::IndicateVarDataType(ctx, "Scores"), + platform::CPUPlace()); + } +}; + +template +struct decay_score; + +template +struct decay_score { + T operator()(T iou, T max_iou, T sigma) { + return std::exp((max_iou * max_iou - iou * iou) * sigma); + } +}; + +template +struct decay_score { + T operator()(T iou, T max_iou, T sigma) { + return (1. - iou) / (1. - max_iou); + } +}; + +template +void NMSMatrix(const Tensor& bbox, const Tensor& scores, + const T score_threshold, const T post_threshold, + const float sigma, const int64_t top_k, const bool normalized, + std::vector* selected_indices, + std::vector* decayed_scores) { + int64_t num_boxes = bbox.dims()[0]; + int64_t box_size = bbox.dims()[1]; + + auto score_ptr = scores.data(); + auto bbox_ptr = bbox.data(); + + std::vector perm(num_boxes); + std::iota(perm.begin(), perm.end(), 0); + auto end = std::remove_if(perm.begin(), perm.end(), + [&score_ptr, score_threshold](int32_t idx) { + return score_ptr[idx] <= score_threshold; + }); + + auto sort_fn = [&score_ptr](int32_t lhs, int32_t rhs) { + return score_ptr[lhs] > score_ptr[rhs]; + }; + + int64_t num_pre = std::distance(perm.begin(), end); + if (num_pre <= 0) { + return; + } + if (top_k > -1 && num_pre > top_k) { + num_pre = top_k; + } + std::partial_sort(perm.begin(), perm.begin() + num_pre, end, sort_fn); + + std::vector iou_matrix((num_pre * (num_pre - 1)) >> 1); + std::vector iou_max(num_pre); + + iou_max[0] = 0.; + for (int64_t i = 1; i < num_pre; i++) { + T max_iou = 0.; + auto idx_a = perm[i]; + for (int64_t j = 0; j < i; j++) { + auto idx_b = perm[j]; + auto iou = JaccardOverlap(bbox_ptr + idx_a * box_size, + bbox_ptr + idx_b * box_size, normalized); + max_iou = std::max(max_iou, iou); + iou_matrix[i * (i - 1) / 2 + j] = iou; + } + iou_max[i] = max_iou; + } + + if (score_ptr[perm[0]] > post_threshold) { + selected_indices->push_back(perm[0]); + decayed_scores->push_back(score_ptr[perm[0]]); + } + + decay_score decay_fn; + for (int64_t i = 1; i < num_pre; i++) { + T min_decay = 1.; + for (int64_t j = 0; j < i; j++) { + auto max_iou = iou_max[j]; + auto iou = iou_matrix[i * (i - 1) / 2 + j]; + auto decay = decay_fn(iou, max_iou, sigma); + min_decay = std::min(min_decay, decay); + } + auto ds = min_decay * score_ptr[perm[i]]; + if (ds <= post_threshold) continue; + selected_indices->push_back(perm[i]); + decayed_scores->push_back(ds); + } +} + +template +class MatrixNMSKernel : public framework::OpKernel { + public: + size_t MultiClassMatrixNMS(const Tensor& scores, const Tensor& bboxes, + std::vector* out, std::vector* indices, + int start, int64_t background_label, + int64_t nms_top_k, int64_t keep_top_k, + bool normalized, T score_threshold, + T post_threshold, bool use_gaussian, + float gaussian_sigma) const { + std::vector all_indices; + std::vector all_scores; + std::vector all_classes; + all_indices.reserve(scores.numel()); + all_scores.reserve(scores.numel()); + all_classes.reserve(scores.numel()); + + size_t num_det = 0; + auto class_num = scores.dims()[0]; + Tensor score_slice; + for (int64_t c = 0; c < class_num; ++c) { + if (c == background_label) continue; + score_slice = scores.Slice(c, c + 1); + if (use_gaussian) { + NMSMatrix(bboxes, score_slice, score_threshold, post_threshold, + gaussian_sigma, nms_top_k, normalized, &all_indices, + &all_scores); + } else { + NMSMatrix(bboxes, score_slice, score_threshold, + post_threshold, gaussian_sigma, nms_top_k, + normalized, &all_indices, &all_scores); + } + for (size_t i = 0; i < all_indices.size() - num_det; i++) { + all_classes.push_back(static_cast(c)); + } + num_det = all_indices.size(); + } + + if (num_det <= 0) { + return num_det; + } + + if (keep_top_k > -1) { + auto k = static_cast(keep_top_k); + if (num_det > k) num_det = k; + } + + std::vector perm(all_indices.size()); + std::iota(perm.begin(), perm.end(), 0); + + std::partial_sort(perm.begin(), perm.begin() + num_det, perm.end(), + [&all_scores](int lhs, int rhs) { + return all_scores[lhs] > all_scores[rhs]; + }); + + for (size_t i = 0; i < num_det; i++) { + auto p = perm[i]; + auto idx = all_indices[p]; + auto cls = all_classes[p]; + auto score = all_scores[p]; + auto bbox = bboxes.data() + idx * bboxes.dims()[1]; + (*indices).push_back(start + idx); + (*out).push_back(cls); + (*out).push_back(score); + for (int j = 0; j < bboxes.dims()[1]; j++) { + (*out).push_back(bbox[j]); + } + } + + return num_det; + } + + void Compute(const framework::ExecutionContext& ctx) const override { + auto* boxes = ctx.Input("BBoxes"); + auto* scores = ctx.Input("Scores"); + auto* outs = ctx.Output("Out"); + auto* index = ctx.Output("Index"); + + auto background_label = ctx.Attr("background_label"); + auto nms_top_k = ctx.Attr("nms_top_k"); + auto keep_top_k = ctx.Attr("keep_top_k"); + auto normalized = ctx.Attr("normalized"); + auto score_threshold = ctx.Attr("score_threshold"); + auto post_threshold = ctx.Attr("post_threshold"); + auto use_gaussian = ctx.Attr("use_gaussian"); + auto gaussian_sigma = ctx.Attr("gaussian_sigma"); + + auto score_dims = scores->dims(); + auto batch_size = score_dims[0]; + auto num_boxes = score_dims[2]; + auto box_dim = boxes->dims()[2]; + auto out_dim = box_dim + 2; + + Tensor boxes_slice, scores_slice; + size_t num_out = 0; + std::vector offsets = {0}; + std::vector detections; + std::vector indices; + detections.reserve(out_dim * num_boxes * batch_size); + indices.reserve(num_boxes * batch_size); + for (int i = 0; i < batch_size; ++i) { + scores_slice = scores->Slice(i, i + 1); + scores_slice.Resize({score_dims[1], score_dims[2]}); + boxes_slice = boxes->Slice(i, i + 1); + boxes_slice.Resize({score_dims[2], box_dim}); + int start = i * score_dims[2]; + num_out = MultiClassMatrixNMS( + scores_slice, boxes_slice, &detections, &indices, start, + background_label, nms_top_k, keep_top_k, normalized, score_threshold, + post_threshold, use_gaussian, gaussian_sigma); + offsets.push_back(offsets.back() + num_out); + } + + int64_t num_kept = offsets.back(); + if (num_kept == 0) { + outs->mutable_data({0, out_dim}, ctx.GetPlace()); + index->mutable_data({0, 1}, ctx.GetPlace()); + } else { + outs->mutable_data({num_kept, out_dim}, ctx.GetPlace()); + index->mutable_data({num_kept, 1}, ctx.GetPlace()); + std::copy(detections.begin(), detections.end(), outs->data()); + std::copy(indices.begin(), indices.end(), index->data()); + } + + framework::LoD lod; + lod.emplace_back(offsets); + outs->set_lod(lod); + index->set_lod(lod); + } +}; + +class MatrixNMSOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("BBoxes", + "(Tensor) A 3-D Tensor with shape " + "[N, M, 4] represents the predicted locations of M bounding boxes" + ", N is the batch size. " + "Each bounding box has four coordinate values and the layout is " + "[xmin, ymin, xmax, ymax], when box size equals to 4."); + AddInput("Scores", + "(Tensor) A 3-D Tensor with shape [N, C, M] represents the " + "predicted confidence predictions. N is the batch size, C is the " + "class number, M is number of bounding boxes. For each category " + "there are total M scores which corresponding M bounding boxes. " + " Please note, M is equal to the 2nd dimension of BBoxes. "); + AddAttr( + "background_label", + "(int, default: 0) " + "The index of background label, the background label will be ignored. " + "If set to -1, then all categories will be considered.") + .SetDefault(0); + AddAttr("score_threshold", + "(float) " + "Threshold to filter out bounding boxes with low " + "confidence score."); + AddAttr("post_threshold", + "(float, default 0.) " + "Threshold to filter out bounding boxes with low " + "confidence score AFTER decaying.") + .SetDefault(0.); + AddAttr("nms_top_k", + "(int64_t) " + "Maximum number of detections to be kept according to the " + "confidences after the filtering detections based on " + "score_threshold"); + AddAttr("keep_top_k", + "(int64_t) " + "Number of total bboxes to be kept per image after NMS " + "step. -1 means keeping all bboxes after NMS step."); + AddAttr("normalized", + "(bool, default true) " + "Whether detections are normalized.") + .SetDefault(true); + AddAttr("use_gaussian", + "(bool, default false) " + "Whether to use Gaussian as decreasing function.") + .SetDefault(false); + AddAttr("gaussian_sigma", + "(float) " + "Sigma for Gaussian decreasing function, only takes effect ", + "when 'use_gaussian' is enabled.") + .SetDefault(2.); + AddOutput("Out", + "(LoDTensor) A 2-D LoDTensor with shape [No, 6] represents the " + "detections. Each row has 6 values: " + "[label, confidence, xmin, ymin, xmax, ymax]. " + "the offsets in first dimension are called LoD, the number of " + "offset is N + 1, if LoD[i + 1] - LoD[i] == 0, means there is " + "no detected bbox."); + AddOutput("Index", + "(LoDTensor) A 2-D LoDTensor with shape [No, 1] represents the " + "index of selected bbox. The index is the absolute index cross " + "batches."); + AddComment(R"DOC( +This operator does multi-class matrix non maximum suppression (NMS) on batched +boxes and scores. +In the NMS step, this operator greedily selects a subset of detection bounding +boxes that have high scores larger than score_threshold, if providing this +threshold, then selects the largest nms_top_k confidences scores if nms_top_k +is larger than -1. Then this operator decays boxes score according to the +Matrix NMS scheme. +Aftern NMS step, at most keep_top_k number of total bboxes are to be kept +per image if keep_top_k is larger than -1. +This operator support multi-class and batched inputs. It applying NMS +independently for each class. The outputs is a 2-D LoDTenosr, for each +image, the offsets in first dimension of LoDTensor are called LoD, the number +of offset is N + 1, where N is the batch size. If LoD[i + 1] - LoD[i] == 0, +means there is no detected bbox for this image. + +For more information on Matrix NMS, please refer to: +https://arxiv.org/abs/2003.10152 +)DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR( + matrix_nms, ops::MatrixNMSOp, ops::MatrixNMSOpMaker, + paddle::framework::EmptyGradOpMaker, + paddle::framework::EmptyGradOpMaker); +REGISTER_OP_CPU_KERNEL(matrix_nms, ops::MatrixNMSKernel, + ops::MatrixNMSKernel); diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index 7fb26993b952063f95c08b600ce98d94742baf92..ae92ede4eab47eebc4a8e5827014f04e7b6f2e62 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -57,6 +57,7 @@ __all__ = [ 'box_clip', 'multiclass_nms', 'locality_aware_nms', + 'matrix_nms', 'retinanet_detection_output', 'distribute_fpn_proposals', 'box_decoder_and_assign', @@ -3387,6 +3388,133 @@ def locality_aware_nms(bboxes, return output +def matrix_nms(bboxes, + scores, + score_threshold, + post_threshold, + nms_top_k, + keep_top_k, + use_gaussian=False, + gaussian_sigma=2., + background_label=0, + normalized=True, + return_index=False, + name=None): + """ + **Matrix NMS** + + This operator does matrix non maximum suppression (NMS). + + First selects a subset of candidate bounding boxes that have higher scores + than score_threshold (if provided), then the top k candidate is selected if + nms_top_k is larger than -1. Score of the remaining candidate are then + decayed according to the Matrix NMS scheme. + Aftern NMS step, at most keep_top_k number of total bboxes are to be kept + per image if keep_top_k is larger than -1. + + Args: + bboxes (Variable): A 3-D Tensor with shape [N, M, 4] represents the + predicted locations of M bounding bboxes, + N is the batch size. Each bounding box has four + coordinate values and the layout is + [xmin, ymin, xmax, ymax], when box size equals to 4. + The data type is float32 or float64. + scores (Variable): A 3-D Tensor with shape [N, C, M] + represents the predicted confidence predictions. + N is the batch size, C is the class number, M is + number of bounding boxes. For each category there + are total M scores which corresponding M bounding + boxes. Please note, M is equal to the 2nd dimension + of BBoxes. The data type is float32 or float64. + score_threshold (float): Threshold to filter out bounding boxes with + low confidence score. + post_threshold (float): Threshold to filter out bounding boxes with + low confidence score AFTER decaying. + nms_top_k (int): Maximum number of detections to be kept according to + the confidences after the filtering detections based + on score_threshold. + keep_top_k (int): Number of total bboxes to be kept per image after NMS + step. -1 means keeping all bboxes after NMS step. + use_gaussian (bool): Use Gaussian as the decay function. Default: False + gaussian_sigma (float): Sigma for Gaussian decay function. Default: 2.0 + background_label (int): The index of background label, the background + label will be ignored. If set to -1, then all + categories will be considered. Default: 0 + normalized (bool): Whether detections are normalized. Default: True + return_index(bool): Whether return selected index. Default: False + name(str): Name of the matrix nms op. Default: None. + + Returns: + A tuple with two Variables: (Out, Index) if return_index is True, + otherwise, one Variable(Out) is returned. + + Out (Variable): A 2-D LoDTensor with shape [No, 6] containing the + detection results. + Each row has 6 values: [label, confidence, xmin, ymin, xmax, ymax] + (After version 1.3, when no boxes detected, the lod is changed + from {0} to {1}) + + Index (Variable): A 2-D LoDTensor with shape [No, 1] containing the + selected indices, which are absolute values cross batches. + + Examples: + .. code-block:: python + + + import paddle.fluid as fluid + boxes = fluid.data(name='bboxes', shape=[None,81, 4], + dtype='float32', lod_level=1) + scores = fluid.data(name='scores', shape=[None,81], + dtype='float32', lod_level=1) + out = fluid.layers.matrix_nms(bboxes=boxes, + scores=scores, + background_label=0, + score_threshold=0.5, + post_threshold=0.1, + nms_top_k=400, + keep_top_k=200, + normalized=False) + """ + check_variable_and_dtype(bboxes, 'BBoxes', ['float32', 'float64'], + 'matrix_nms') + check_variable_and_dtype(scores, 'Scores', ['float32', 'float64'], + 'matrix_nms') + check_type(score_threshold, 'score_threshold', float, 'matrix_nms') + check_type(post_threshold, 'post_threshold', float, 'matrix_nms') + check_type(nms_top_k, 'nums_top_k', int, 'matrix_nms') + check_type(keep_top_k, 'keep_top_k', int, 'matrix_nms') + check_type(normalized, 'normalized', bool, 'matrix_nms') + check_type(use_gaussian, 'use_gaussian', bool, 'matrix_nms') + check_type(gaussian_sigma, 'gaussian_sigma', float, 'matrix_nms') + check_type(background_label, 'background_label', int, 'matrix_nms') + + helper = LayerHelper('matrix_nms', **locals()) + output = helper.create_variable_for_type_inference(dtype=bboxes.dtype) + index = helper.create_variable_for_type_inference(dtype='int') + helper.append_op( + type="matrix_nms", + inputs={'BBoxes': bboxes, + 'Scores': scores}, + attrs={ + 'background_label': background_label, + 'score_threshold': score_threshold, + 'post_threshold': post_threshold, + 'nms_top_k': nms_top_k, + 'gaussian_sigma': gaussian_sigma, + 'use_gaussian': use_gaussian, + 'keep_top_k': keep_top_k, + 'normalized': normalized + }, + outputs={'Out': output, + 'Index': index}) + output.stop_gradient = True + + if return_index: + return output, index + else: + return output + + def distribute_fpn_proposals(fpn_rois, min_level, max_level, diff --git a/python/paddle/fluid/tests/unittests/test_matrix_nms_op.py b/python/paddle/fluid/tests/unittests/test_matrix_nms_op.py new file mode 100644 index 0000000000000000000000000000000000000000..cf756ae8384486ea30ebc2fd25079484061ed380 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_matrix_nms_op.py @@ -0,0 +1,307 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import unittest +import numpy as np +import copy +from op_test import OpTest +import paddle.fluid as fluid +from paddle.fluid import Program, program_guard + + +def softmax(x): + # clip to shiftx, otherwise, when calc loss with + # log(exp(shiftx)), may get log(0)=INF + shiftx = (x - np.max(x)).clip(-64.) + exps = np.exp(shiftx) + return exps / np.sum(exps) + + +def iou_matrix(a, b, norm=True): + tl_i = np.maximum(a[:, np.newaxis, :2], b[:, :2]) + br_i = np.minimum(a[:, np.newaxis, 2:], b[:, 2:]) + + pad = not norm and 1 or 0 + + area_i = np.prod(br_i - tl_i + pad, axis=2) * (tl_i < br_i).all(axis=2) + area_a = np.prod(a[:, 2:] - a[:, :2] + pad, axis=1) + area_b = np.prod(b[:, 2:] - b[:, :2] + pad, axis=1) + area_o = (area_a[:, np.newaxis] + area_b - area_i) + return area_i / (area_o + 1e-10) + + +def matrix_nms(boxes, + scores, + score_threshold, + post_threshold=0., + nms_top_k=400, + normalized=True, + use_gaussian=False, + gaussian_sigma=2.): + all_scores = copy.deepcopy(scores) + all_scores = all_scores.flatten() + selected_indices = np.where(all_scores > score_threshold)[0] + all_scores = all_scores[selected_indices] + + sorted_indices = np.argsort(-all_scores, axis=0, kind='mergesort') + sorted_scores = all_scores[sorted_indices] + sorted_indices = selected_indices[sorted_indices] + if nms_top_k > -1 and nms_top_k < sorted_indices.shape[0]: + sorted_indices = sorted_indices[:nms_top_k] + sorted_scores = sorted_scores[:nms_top_k] + + selected_boxes = boxes[sorted_indices, :] + ious = iou_matrix(selected_boxes, selected_boxes) + ious = np.triu(ious, k=1) + iou_cmax = ious.max(0) + N = iou_cmax.shape[0] + iou_cmax = np.repeat(iou_cmax[:, np.newaxis], N, axis=1) + + if use_gaussian: + decay = np.exp((iou_cmax**2 - ious**2) * gaussian_sigma) + else: + decay = (1 - ious) / (1 - iou_cmax) + decay = decay.min(0) + decayed_scores = sorted_scores * decay + + if post_threshold > 0.: + inds = np.where(decayed_scores > post_threshold)[0] + selected_boxes = selected_boxes[inds, :] + decayed_scores = decayed_scores[inds] + sorted_indices = sorted_indices[inds] + + return decayed_scores, selected_boxes, sorted_indices + + +def multiclass_nms(boxes, scores, background, score_threshold, post_threshold, + nms_top_k, keep_top_k, normalized, use_gaussian, + gaussian_sigma): + all_boxes = [] + all_cls = [] + all_scores = [] + all_indices = [] + for c in range(scores.shape[0]): + if c == background: + continue + decayed_scores, selected_boxes, indices = matrix_nms( + boxes, scores[c], score_threshold, post_threshold, nms_top_k, + normalized, use_gaussian, gaussian_sigma) + all_cls.append(np.full(len(decayed_scores), c, decayed_scores.dtype)) + all_boxes.append(selected_boxes) + all_scores.append(decayed_scores) + all_indices.append(indices) + + all_cls = np.concatenate(all_cls) + all_boxes = np.concatenate(all_boxes) + all_scores = np.concatenate(all_scores) + all_indices = np.concatenate(all_indices) + all_pred = np.concatenate( + (all_cls[:, np.newaxis], all_scores[:, np.newaxis], all_boxes), axis=1) + + num_det = len(all_pred) + if num_det == 0: + return all_pred, np.array([], dtype=np.float32) + + inds = np.argsort(-all_scores, axis=0, kind='mergesort') + all_pred = all_pred[inds, :] + all_indices = all_indices[inds] + + if keep_top_k > -1 and num_det > keep_top_k: + num_det = keep_top_k + all_pred = all_pred[:keep_top_k, :] + all_indices = all_indices[:keep_top_k] + + return all_pred, all_indices + + +def batched_multiclass_nms(boxes, + scores, + background, + score_threshold, + post_threshold, + nms_top_k, + keep_top_k, + normalized=True, + use_gaussian=False, + gaussian_sigma=2.): + batch_size = scores.shape[0] + det_outs = [] + index_outs = [] + lod = [] + for n in range(batch_size): + nmsed_outs, indices = multiclass_nms( + boxes[n], scores[n], background, score_threshold, post_threshold, + nms_top_k, keep_top_k, normalized, use_gaussian, gaussian_sigma) + nmsed_num = len(nmsed_outs) + lod.append(nmsed_num) + if nmsed_num == 0: + continue + indices += n * scores.shape[2] + det_outs.append(nmsed_outs) + index_outs.append(indices) + if det_outs: + det_outs = np.concatenate(det_outs) + index_outs = np.concatenate(index_outs) + return det_outs, index_outs, lod + + +class TestMatrixNMSOp(OpTest): + def set_argument(self): + self.post_threshold = 0. + self.use_gaussian = False + + def setUp(self): + self.set_argument() + N = 7 + M = 1200 + C = 21 + BOX_SIZE = 4 + background = 0 + nms_top_k = 400 + keep_top_k = 200 + score_threshold = 0.01 + post_threshold = self.post_threshold + use_gaussian = False + if hasattr(self, 'use_gaussian'): + use_gaussian = self.use_gaussian + gaussian_sigma = 2. + + scores = np.random.random((N * M, C)).astype('float32') + + scores = np.apply_along_axis(softmax, 1, scores) + scores = np.reshape(scores, (N, M, C)) + scores = np.transpose(scores, (0, 2, 1)) + + boxes = np.random.random((N, M, BOX_SIZE)).astype('float32') + boxes[:, :, 0:2] = boxes[:, :, 0:2] * 0.5 + boxes[:, :, 2:4] = boxes[:, :, 2:4] * 0.5 + 0.5 + + det_outs, index_outs, lod = batched_multiclass_nms( + boxes, scores, background, score_threshold, post_threshold, + nms_top_k, keep_top_k, True, use_gaussian, gaussian_sigma) + + empty = len(det_outs) == 0 + det_outs = np.array([], dtype=np.float32) if empty else det_outs + index_outs = np.array([], dtype=np.float32) if empty else index_outs + nmsed_outs = det_outs.astype('float32') + + self.op_type = 'matrix_nms' + self.inputs = {'BBoxes': boxes, 'Scores': scores} + self.outputs = { + 'Out': (nmsed_outs, [lod]), + 'Index': (index_outs[:, None], [lod]) + } + self.attrs = { + 'background_label': 0, + 'nms_top_k': nms_top_k, + 'keep_top_k': keep_top_k, + 'score_threshold': score_threshold, + 'post_threshold': post_threshold, + 'use_gaussian': use_gaussian, + 'gaussian_sigma': gaussian_sigma, + 'normalized': True, + } + + def test_check_output(self): + self.check_output() + + +class TestMatrixNMSOpNoOutput(TestMatrixNMSOp): + def set_argument(self): + self.post_threshold = 2.0 + + +class TestMatrixNMSOpGaussian(TestMatrixNMSOp): + def set_argument(self): + self.post_threshold = 0. + self.use_gaussian = True + + +class TestMatrixNMSError(unittest.TestCase): + def test_errors(self): + with program_guard(Program(), Program()): + M = 1200 + N = 7 + C = 21 + BOX_SIZE = 4 + nms_top_k = 400 + keep_top_k = 200 + score_threshold = 0.01 + post_threshold = 0. + + boxes_np = np.random.random((M, C, BOX_SIZE)).astype('float32') + scores = np.random.random((N * M, C)).astype('float32') + scores = np.apply_along_axis(softmax, 1, scores) + scores = np.reshape(scores, (N, M, C)) + scores_np = np.transpose(scores, (0, 2, 1)) + + boxes_data = fluid.data( + name='bboxes', shape=[M, C, BOX_SIZE], dtype='float32') + scores_data = fluid.data( + name='scores', shape=[N, C, M], dtype='float32') + + def test_bboxes_Variable(): + # the bboxes type must be Variable + fluid.layers.matrix_nms( + bboxes=boxes_np, + scores=scores_data, + nms_top_k=nms_top_k, + keep_top_k=keep_top_k, + score_threshold=score_threshold, + post_threshold=post_threshold) + + def test_scores_Variable(): + # the scores type must be Variable + fluid.layers.matrix_nms( + bboxes=boxes_data, + scores=scores_np, + nms_top_k=nms_top_k, + keep_top_k=keep_top_k, + score_threshold=score_threshold, + post_threshold=post_threshold) + + def test_empty(): + # when all score are lower than threshold + try: + fluid.layers.matrix_nms( + bboxes=boxes_data, + scores=scores_data, + nms_top_k=nms_top_k, + keep_top_k=keep_top_k, + score_threshold=10., + post_threshold=post_threshold) + except Exception as e: + self.fail(e) + + def test_coverage(): + # cover correct workflow + try: + fluid.layers.matrix_nms( + bboxes=boxes_data, + scores=scores_data, + nms_top_k=nms_top_k, + keep_top_k=keep_top_k, + score_threshold=score_threshold, + post_threshold=post_threshold) + except Exception as e: + self.fail(e) + + self.assertRaises(TypeError, test_bboxes_Variable) + self.assertRaises(TypeError, test_scores_Variable) + test_coverage() + + +if __name__ == '__main__': + unittest.main()