// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #include #include #include "lite/api/paddle_use_kernels.h" #include "lite/api/paddle_use_ops.h" #include "lite/core/arena/framework.h" #include "lite/tests/utils/fill_data.h" namespace paddle { namespace lite { template static T BBoxArea(const T* box, const bool normalized) { if (box[2] < box[0] || box[3] < box[1]) { // If coordinate values are is invalid // (e.g. xmax < xmin or ymax < ymin), return 0. return static_cast(0.); } else { const T w = box[2] - box[0]; const T h = box[3] - box[1]; if (normalized) { return w * h; } else { // If coordinate values are not within range [0, 1]. return (w + 1) * (h + 1); } } } template static T JaccardOverlap(const T* box1, const T* box2, const bool normalized) { if (box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] || box2[3] < box1[1]) { return static_cast(0.); } else { const T inter_xmin = std::max(box1[0], box2[0]); const T inter_ymin = std::max(box1[1], box2[1]); const T inter_xmax = std::min(box1[2], box2[2]); const T inter_ymax = std::min(box1[3], box2[3]); T norm = normalized ? static_cast(0.) : static_cast(1.); T inter_w = inter_xmax - inter_xmin + norm; T inter_h = inter_ymax - inter_ymin + norm; const T inter_area = inter_w * inter_h; const T bbox1_area = BBoxArea(box1, normalized); const T bbox2_area = BBoxArea(box2, normalized); return inter_area / (bbox1_area + bbox2_area - inter_area); } } template T PolyIoU(const T* box1, const T* box2, const size_t box_size, const bool normalized) { LOG(FATAL) << "PolyIoU not implement."; return *box1; } template struct decay_score; template struct decay_score { T operator()(T iou, T max_iou, T sigma) { return std::exp((max_iou * max_iou - iou * iou) * sigma); } }; template struct decay_score { T operator()(T iou, T max_iou, T sigma) { return (1. - iou) / (1. - max_iou); } }; template void NMSMatrix(const Tensor& bbox, const Tensor& scores, const T score_threshold, const T post_threshold, const float sigma, const int64_t top_k, const bool normalized, std::vector* selected_indices, std::vector* decayed_scores) { int64_t num_boxes = bbox.dims()[0]; int64_t box_size = bbox.dims()[1]; auto score_ptr = scores.data(); auto bbox_ptr = bbox.data(); std::vector perm(num_boxes); std::iota(perm.begin(), perm.end(), 0); auto end = std::remove_if( perm.begin(), perm.end(), [&score_ptr, score_threshold](int32_t idx) { return score_ptr[idx] <= score_threshold; }); auto sort_fn = [&score_ptr](int32_t lhs, int32_t rhs) { return score_ptr[lhs] > score_ptr[rhs]; }; int64_t num_pre = std::distance(perm.begin(), end); if (num_pre <= 0) { return; } if (top_k > -1 && num_pre > top_k) { num_pre = top_k; } std::partial_sort(perm.begin(), perm.begin() + num_pre, end, sort_fn); std::vector iou_matrix((num_pre * (num_pre - 1)) >> 1); std::vector iou_max(num_pre); iou_max[0] = 0.; for (int64_t i = 1; i < num_pre; i++) { T max_iou = 0.; auto idx_a = perm[i]; for (int64_t j = 0; j < i; j++) { auto idx_b = perm[j]; auto iou = JaccardOverlap( bbox_ptr + idx_a * box_size, bbox_ptr + idx_b * box_size, normalized); max_iou = std::max(max_iou, iou); iou_matrix[i * (i - 1) / 2 + j] = iou; } iou_max[i] = max_iou; } if (score_ptr[perm[0]] > post_threshold) { selected_indices->push_back(perm[0]); decayed_scores->push_back(score_ptr[perm[0]]); } decay_score decay_fn; for (int64_t i = 1; i < num_pre; i++) { T min_decay = 1.; for (int64_t j = 0; j < i; j++) { auto max_iou = iou_max[j]; auto iou = iou_matrix[i * (i - 1) / 2 + j]; auto decay = decay_fn(iou, max_iou, sigma); min_decay = std::min(min_decay, decay); } auto ds = min_decay * score_ptr[perm[i]]; if (ds <= post_threshold) continue; selected_indices->push_back(perm[i]); decayed_scores->push_back(ds); } } template size_t MultiClassMatrixNMS(const Tensor& scores, const Tensor& bboxes, std::vector* out, std::vector* indices, int start, int64_t background_label, int64_t nms_top_k, int64_t keep_top_k, bool normalized, T score_threshold, T post_threshold, bool use_gaussian, float gaussian_sigma) { std::vector all_indices; std::vector all_scores; std::vector all_classes; all_indices.reserve(scores.numel()); all_scores.reserve(scores.numel()); all_classes.reserve(scores.numel()); size_t num_det = 0; auto class_num = scores.dims()[0]; Tensor score_slice; for (int64_t c = 0; c < class_num; ++c) { if (c == background_label) continue; score_slice = scores.Slice(c, c + 1); if (use_gaussian) { NMSMatrix(bboxes, score_slice, score_threshold, post_threshold, gaussian_sigma, nms_top_k, normalized, &all_indices, &all_scores); } else { NMSMatrix(bboxes, score_slice, score_threshold, post_threshold, gaussian_sigma, nms_top_k, normalized, &all_indices, &all_scores); } for (size_t i = 0; i < all_indices.size() - num_det; i++) { all_classes.push_back(static_cast(c)); } num_det = all_indices.size(); } if (num_det <= 0) { return num_det; } if (keep_top_k > -1) { auto k = static_cast(keep_top_k); if (num_det > k) num_det = k; } std::vector perm(all_indices.size()); std::iota(perm.begin(), perm.end(), 0); std::partial_sort(perm.begin(), perm.begin() + num_det, perm.end(), [&all_scores](int lhs, int rhs) { return all_scores[lhs] > all_scores[rhs]; }); for (size_t i = 0; i < num_det; i++) { auto p = perm[i]; auto idx = all_indices[p]; auto cls = all_classes[p]; auto score = all_scores[p]; auto bbox = bboxes.data() + idx * bboxes.dims()[1]; (*indices).push_back(start + idx); (*out).push_back(cls); (*out).push_back(score); for (int j = 0; j < bboxes.dims()[1]; j++) { (*out).push_back(bbox[j]); } } return num_det; } class MatrixNmsComputeTester : public arena::TestCase { protected: // common attributes for this op. std::string type_ = "matrix_nms"; std::string bboxes_ = "bboxes"; std::string scores_ = "scores"; std::string out_ = "out"; std::string index_ = "index"; DDim bboxes_dims_{}; DDim scores_dims_{}; int background_label_{0}; float score_threshold_{0.01f}; float post_threshold_{0.0f}; int nms_top_k_{1}; int keep_top_k_{2}; bool normalized_{false}; bool use_gaussian_{true}; float gaussian_sigma_{2.0f}; public: MatrixNmsComputeTester(const Place& place, const std::string& alias, DDim bboxes_dims, DDim scores_dims, int background_label = 0, float score_threshold = 0.01f, float post_threshold = 0.0f, int nms_top_k = 1, int keep_top_k = 2, bool normalized = false, bool use_gaussian = true, float gaussian_sigma = 2.0f) : TestCase(place, alias), bboxes_dims_(bboxes_dims), scores_dims_(scores_dims), background_label_(background_label), score_threshold_(score_threshold), post_threshold_(post_threshold), nms_top_k_(nms_top_k), keep_top_k_(keep_top_k), normalized_(normalized), use_gaussian_(use_gaussian), gaussian_sigma_(gaussian_sigma) {} void RunBaseline(Scope* scope) override { auto* boxes = scope->FindTensor(bboxes_); auto* scores = scope->FindTensor(scores_); auto* outs = scope->NewTensor(out_); auto* index = scope->NewTensor(index_); CHECK(outs); outs->set_precision(PRECISION(kFloat)); CHECK(index); index->set_precision(PRECISION(kInt32)); auto score_dims = scores->dims(); auto batch_size = score_dims[0]; auto num_boxes = score_dims[2]; auto box_dim = boxes->dims()[2]; auto out_dim = box_dim + 2; Tensor boxes_slice, scores_slice; int64_t num_out = 0; std::vector offsets = {0}; std::vector detections; std::vector indices; detections.reserve(out_dim * num_boxes * batch_size); indices.reserve(num_boxes * batch_size); for (int i = 0; i < batch_size; ++i) { scores_slice = scores->Slice(i, i + 1); scores_slice.Resize({score_dims[1], score_dims[2]}); boxes_slice = boxes->Slice(i, i + 1); boxes_slice.Resize({score_dims[2], box_dim}); int start = i * score_dims[2]; num_out = MultiClassMatrixNMS(scores_slice, boxes_slice, &detections, &indices, start, background_label_, nms_top_k_, keep_top_k_, normalized_, score_threshold_, post_threshold_, use_gaussian_, gaussian_sigma_); offsets.push_back(offsets.back() + num_out); } int64_t num_kept = offsets.back(); if (num_kept == 0) { outs->Resize({0, out_dim}); index->Resize({0, 1}); } else { outs->Resize({num_kept, out_dim}); index->Resize({num_kept, 1}); std::copy( detections.begin(), detections.end(), outs->mutable_data()); std::copy(indices.begin(), indices.end(), index->mutable_data()); } LoD lod; lod.emplace_back(std::vector(offsets.begin(), offsets.end())); outs->set_lod(lod); index->set_lod(lod); } void PrepareOpDesc(cpp::OpDesc* op_desc) { op_desc->SetType(type_); op_desc->SetInput("BBoxes", {bboxes_}); op_desc->SetInput("Scores", {scores_}); op_desc->SetOutput("Out", {out_}); op_desc->SetOutput("Index", {index_}); op_desc->SetAttr("background_label", background_label_); op_desc->SetAttr("score_threshold", score_threshold_); op_desc->SetAttr("post_threshold", post_threshold_); op_desc->SetAttr("nms_top_k", nms_top_k_); op_desc->SetAttr("keep_top_k", keep_top_k_); op_desc->SetAttr("normalized", normalized_); op_desc->SetAttr("use_gaussian", use_gaussian_); op_desc->SetAttr("gaussian_sigma", gaussian_sigma_); } void PrepareData() override { std::vector bboxes(bboxes_dims_.production()); for (int i = 0; i < bboxes_dims_.production(); ++i) { bboxes[i] = i * 1. / bboxes_dims_.production(); } SetCommonTensor(bboxes_, bboxes_dims_, bboxes.data()); std::vector scores(scores_dims_.production()); for (int i = 0; i < scores_dims_.production(); ++i) { scores[i] = i * 1. / scores_dims_.production(); } SetCommonTensor(scores_, scores_dims_, scores.data()); } }; void TestMatrixNms(Place place, float abs_error) { int N = 3; int M = 2500; for (int class_num : {2, 4, 10}) { std::vector bbox_shape{N, M, 4}; std::vector score_shape{N, class_num, M}; std::unique_ptr tester(new MatrixNmsComputeTester( place, "def", DDim(bbox_shape), DDim(score_shape))); arena::Arena arena(std::move(tester), place, abs_error); arena.TestPrecision(); } } TEST(matrix_nms, precision) { float abs_error = 2e-5; Place place; #if defined(LITE_WITH_ARM) place = TARGET(kHost); #else return; #endif TestMatrixNms(place, abs_error); } } // namespace lite } // namespace paddle