multiclass_nms_op.cc 24.7 KB
Newer Older
1
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2

3 4 5
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
6

7
http://www.apache.org/licenses/LICENSE-2.0
8

9 10 11 12 13
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
limitations under the License. */

J
jerrywgz 已提交
14
#include <glog/logging.h>
15

Y
Yi Wang 已提交
16
#include "paddle/fluid/framework/op_registry.h"
17
#include "paddle/fluid/operators/detection/nms_util.h"
18 19 20 21 22 23 24

namespace paddle {
namespace operators {

using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;

25 26 27 28 29 30 31 32 33 34
inline std::vector<size_t> GetNmsLodFromRoisNum(const Tensor* rois_num) {
  std::vector<size_t> rois_lod;
  auto* rois_num_data = rois_num->data<int>();
  rois_lod.push_back(static_cast<size_t>(0));
  for (int i = 0; i < rois_num->numel(); ++i) {
    rois_lod.push_back(rois_lod.back() + static_cast<size_t>(rois_num_data[i]));
  }
  return rois_lod;
}

D
dangqingqing 已提交
35
class MultiClassNMSOp : public framework::OperatorWithKernel {
36 37 38 39
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

  void InferShape(framework::InferShapeContext* ctx) const override {
X
xiaoting 已提交
40 41 42
    OP_INOUT_CHECK(ctx->HasInput("BBoxes"), "Input", "BBoxes", "MultiClassNMS");
    OP_INOUT_CHECK(ctx->HasInput("Scores"), "Input", "Scores", "MultiClassNMS");
    OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "MultiClassNMS");
D
dangqingqing 已提交
43
    auto box_dims = ctx->GetInputDim("BBoxes");
44
    auto score_dims = ctx->GetInputDim("Scores");
J
jerrywgz 已提交
45
    auto score_size = score_dims.size();
46

47
    if (ctx->IsRuntime()) {
48 49 50 51 52
      PADDLE_ENFORCE_EQ(score_size == 2 || score_size == 3, true,
                        platform::errors::InvalidArgument(
                            "The rank of Input(Scores) must be 2 or 3"
                            ". But received rank = %d",
                            score_size));
53
      PADDLE_ENFORCE_EQ(box_dims.size(), 3,
X
xiaoting 已提交
54 55
                        platform::errors::InvalidArgument(
                            "The rank of Input(BBoxes) must be 3"
56
                            ". But received rank = %d",
X
xiaoting 已提交
57
                            box_dims.size()));
J
jerrywgz 已提交
58
      if (score_size == 3) {
59 60 61 62 63 64 65 66 67 68 69 70 71
        PADDLE_ENFORCE_EQ(box_dims[2] == 4 || box_dims[2] == 8 ||
                              box_dims[2] == 16 || box_dims[2] == 24 ||
                              box_dims[2] == 32,
                          true,
                          platform::errors::InvalidArgument(
                              "The last dimension of Input"
                              "(BBoxes) must be 4 or 8, "
                              "represents the layout of coordinate "
                              "[xmin, ymin, xmax, ymax] or "
                              "4 points: [x1, y1, x2, y2, x3, y3, x4, y4] or "
                              "8 points: [xi, yi] i= 1,2,...,8 or "
                              "12 points: [xi, yi] i= 1,2,...,12 or "
                              "16 points: [xi, yi] i= 1,2,...,16"));
J
jerrywgz 已提交
72 73
        PADDLE_ENFORCE_EQ(
            box_dims[1], score_dims[2],
X
xiaoting 已提交
74 75 76 77 78 79
            platform::errors::InvalidArgument(
                "The 2nd dimension of Input(BBoxes) must be equal to "
                "last dimension of Input(Scores), which represents the "
                "predicted bboxes."
                "But received box_dims[1](%s) != socre_dims[2](%s)",
                box_dims[1], score_dims[2]));
J
jerrywgz 已提交
80
      } else {
X
xiaoting 已提交
81 82
        PADDLE_ENFORCE_EQ(box_dims[2], 4,
                          platform::errors::InvalidArgument(
83 84
                              "The last dimension of Input"
                              "(BBoxes) must be 4. But received dimension = %d",
X
xiaoting 已提交
85
                              box_dims[2]));
86 87 88 89 90 91 92
        PADDLE_ENFORCE_EQ(
            box_dims[1], score_dims[1],
            platform::errors::InvalidArgument(
                "The 2nd dimension of Input"
                "(BBoxes) must be equal to the 2nd dimension of Input(Scores). "
                "But received box dimension = %d, score dimension = %d",
                box_dims[1], score_dims[1]));
J
jerrywgz 已提交
93
      }
94
    }
95 96
    // Here the box_dims[0] is not the real dimension of output.
    // It will be rewritten in the computing kernel.
J
jerrywgz 已提交
97
    if (score_size == 3) {
98
      ctx->SetOutputDim("Out", {-1, box_dims[2] + 2});
J
jerrywgz 已提交
99 100 101
    } else {
      ctx->SetOutputDim("Out", {-1, box_dims[2] + 2});
    }
102 103 104
    if (!ctx->IsRuntime()) {
      ctx->SetLoDLevel("Out", std::max(ctx->GetLoDLevel("BBoxes"), 1));
    }
105
  }
D
dangqingqing 已提交
106 107 108 109 110

 protected:
  framework::OpKernelType GetExpectedKernelType(
      const framework::ExecutionContext& ctx) const override {
    return framework::OpKernelType(
111
        OperatorWithKernel::IndicateVarDataType(ctx, "Scores"),
112
        platform::CPUPlace());
D
dangqingqing 已提交
113
  }
114 115
};

116 117 118 119 120 121 122 123 124
template <class T>
void SliceOneClass(const platform::DeviceContext& ctx,
                   const framework::Tensor& items, const int class_id,
                   framework::Tensor* one_class_item) {
  T* item_data = one_class_item->mutable_data<T>(ctx.GetPlace());
  const T* items_data = items.data<T>();
  const int64_t num_item = items.dims()[0];
  const int class_num = items.dims()[1];
  if (items.dims().size() == 3) {
J
jerrywgz 已提交
125 126 127 128 129 130 131 132 133 134
    int item_size = items.dims()[2];
    for (int i = 0; i < num_item; ++i) {
      std::memcpy(item_data + i * item_size,
                  items_data + i * class_num * item_size + class_id * item_size,
                  sizeof(T) * item_size);
    }
  } else {
    for (int i = 0; i < num_item; ++i) {
      item_data[i] = items_data[i * class_num + class_id];
    }
135 136 137
  }
}

138
template <typename T>
D
dangqingqing 已提交
139
class MultiClassNMSKernel : public framework::OpKernel<T> {
140 141 142
 public:
  void NMSFast(const Tensor& bbox, const Tensor& scores,
               const T score_threshold, const T nms_threshold, const T eta,
J
jerrywgz 已提交
143 144
               const int64_t top_k, std::vector<int>* selected_indices,
               const bool normalized) const {
145 146 147
    // The total boxes for each instance.
    int64_t num_boxes = bbox.dims()[0];
    // 4: [xmin ymin xmax ymax]
Y
Yipeng 已提交
148 149
    // 8: [x1 y1 x2 y2 x3 y3 x4 y4]
    // 16, 24, or 32: [x1 y1 x2 y2 ...  xn yn], n = 8, 12 or 16
150 151 152 153 154 155 156 157 158 159 160 161 162 163
    int64_t box_size = bbox.dims()[1];

    std::vector<T> scores_data(num_boxes);
    std::copy_n(scores.data<T>(), num_boxes, scores_data.begin());
    std::vector<std::pair<T, int>> sorted_indices;
    GetMaxScoreIndex(scores_data, score_threshold, top_k, &sorted_indices);

    selected_indices->clear();
    T adaptive_threshold = nms_threshold;
    const T* bbox_data = bbox.data<T>();

    while (sorted_indices.size() != 0) {
      const int idx = sorted_indices.front().second;
      bool keep = true;
164
      for (size_t k = 0; k < selected_indices->size(); ++k) {
165 166
        if (keep) {
          const int kept_idx = (*selected_indices)[k];
Y
Yipeng 已提交
167 168 169
          T overlap = T(0.);
          // 4: [xmin ymin xmax ymax]
          if (box_size == 4) {
J
jerrywgz 已提交
170 171 172
            overlap =
                JaccardOverlap<T>(bbox_data + idx * box_size,
                                  bbox_data + kept_idx * box_size, normalized);
Y
Yipeng 已提交
173 174 175 176
          }
          // 8: [x1 y1 x2 y2 x3 y3 x4 y4] or 16, 24, 32
          if (box_size == 8 || box_size == 16 || box_size == 24 ||
              box_size == 32) {
J
jerrywgz 已提交
177 178 179
            overlap = PolyIoU<T>(bbox_data + idx * box_size,
                                 bbox_data + kept_idx * box_size, box_size,
                                 normalized);
Y
Yipeng 已提交
180
          }
181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
          keep = overlap <= adaptive_threshold;
        } else {
          break;
        }
      }
      if (keep) {
        selected_indices->push_back(idx);
      }
      sorted_indices.erase(sorted_indices.begin());
      if (keep && eta < 1 && adaptive_threshold > 0.5) {
        adaptive_threshold *= eta;
      }
    }
  }

D
dangqingqing 已提交
196
  void MultiClassNMS(const framework::ExecutionContext& ctx,
197
                     const Tensor& scores, const Tensor& bboxes,
J
jerrywgz 已提交
198
                     const int scores_size,
199 200
                     std::map<int, std::vector<int>>* indices,
                     int* num_nmsed_out) const {
D
dangqingqing 已提交
201 202 203
    int64_t background_label = ctx.Attr<int>("background_label");
    int64_t nms_top_k = ctx.Attr<int>("nms_top_k");
    int64_t keep_top_k = ctx.Attr<int>("keep_top_k");
J
jerrywgz 已提交
204
    bool normalized = ctx.Attr<bool>("normalized");
205 206
    T nms_threshold = static_cast<T>(ctx.Attr<float>("nms_threshold"));
    T nms_eta = static_cast<T>(ctx.Attr<float>("nms_eta"));
D
dangqingqing 已提交
207
    T score_threshold = static_cast<T>(ctx.Attr<float>("score_threshold"));
J
jerrywgz 已提交
208
    auto& dev_ctx = ctx.template device_context<platform::CPUDeviceContext>();
209 210

    int num_det = 0;
211 212 213 214 215 216 217 218 219 220 221 222 223

    int64_t class_num = scores_size == 3 ? scores.dims()[0] : scores.dims()[1];
    Tensor bbox_slice, score_slice;
    for (int64_t c = 0; c < class_num; ++c) {
      if (c == background_label) continue;
      if (scores_size == 3) {
        score_slice = scores.Slice(c, c + 1);
        bbox_slice = bboxes;
      } else {
        score_slice.Resize({scores.dims()[0], 1});
        bbox_slice.Resize({scores.dims()[0], 4});
        SliceOneClass<T>(dev_ctx, scores, c, &score_slice);
        SliceOneClass<T>(dev_ctx, bboxes, c, &bbox_slice);
J
jerrywgz 已提交
224
      }
225 226 227
      NMSFast(bbox_slice, score_slice, score_threshold, nms_threshold, nms_eta,
              nms_top_k, &((*indices)[c]), normalized);
      if (scores_size == 2) {
J
jerrywgz 已提交
228 229
        std::stable_sort((*indices)[c].begin(), (*indices)[c].end());
      }
230
      num_det += (*indices)[c].size();
231 232
    }

233
    *num_nmsed_out = num_det;
234 235
    const T* scores_data = scores.data<T>();
    if (keep_top_k > -1 && num_det > keep_top_k) {
J
jerrywgz 已提交
236
      const T* sdata;
237
      std::vector<std::pair<float, std::pair<int, int>>> score_index_pairs;
238
      for (const auto& it : *indices) {
239
        int label = it.first;
J
jerrywgz 已提交
240
        if (scores_size == 3) {
241
          sdata = scores_data + label * scores.dims()[1];
J
jerrywgz 已提交
242
        } else {
243 244 245
          score_slice.Resize({scores.dims()[0], 1});
          SliceOneClass<T>(dev_ctx, scores, label, &score_slice);
          sdata = score_slice.data<T>();
J
jerrywgz 已提交
246
        }
247
        const std::vector<int>& label_indices = it.second;
248
        for (size_t j = 0; j < label_indices.size(); ++j) {
249 250 251 252 253 254
          int idx = label_indices[j];
          score_index_pairs.push_back(
              std::make_pair(sdata[idx], std::make_pair(label, idx)));
        }
      }
      // Keep top k results per image.
255 256
      std::stable_sort(score_index_pairs.begin(), score_index_pairs.end(),
                       SortScorePairDescend<std::pair<int, int>>);
257 258 259 260
      score_index_pairs.resize(keep_top_k);

      // Store the new indices.
      std::map<int, std::vector<int>> new_indices;
261
      for (size_t j = 0; j < score_index_pairs.size(); ++j) {
262 263 264 265
        int label = score_index_pairs[j].second.first;
        int idx = score_index_pairs[j].second.second;
        new_indices[label].push_back(idx);
      }
J
jerrywgz 已提交
266 267 268 269 270 271 272
      if (scores_size == 2) {
        for (const auto& it : new_indices) {
          int label = it.first;
          std::stable_sort(new_indices[label].begin(),
                           new_indices[label].end());
        }
      }
273 274
      new_indices.swap(*indices);
      *num_nmsed_out = keep_top_k;
275 276 277
    }
  }

J
jerrywgz 已提交
278 279
  void MultiClassOutput(const platform::DeviceContext& ctx,
                        const Tensor& scores, const Tensor& bboxes,
280
                        const std::map<int, std::vector<int>>& selected_indices,
281 282
                        const int scores_size, Tensor* outs,
                        int* oindices = nullptr, const int offset = 0) const {
J
jerrywgz 已提交
283
    int64_t class_num = scores.dims()[1];
Y
Yipeng 已提交
284 285
    int64_t predict_dim = scores.dims()[1];
    int64_t box_size = bboxes.dims()[1];
J
jerrywgz 已提交
286 287 288 289
    if (scores_size == 2) {
      box_size = bboxes.dims()[2];
    }
    int64_t out_dim = box_size + 2;
290 291 292
    auto* scores_data = scores.data<T>();
    auto* bboxes_data = bboxes.data<T>();
    auto* odata = outs->data<T>();
J
jerrywgz 已提交
293 294 295
    const T* sdata;
    Tensor bbox;
    bbox.Resize({scores.dims()[0], box_size});
296 297 298
    int count = 0;
    for (const auto& it : selected_indices) {
      int label = it.first;
D
dangqingqing 已提交
299
      const std::vector<int>& indices = it.second;
J
jerrywgz 已提交
300 301 302 303 304
      if (scores_size == 2) {
        SliceOneClass<T>(ctx, bboxes, label, &bbox);
      } else {
        sdata = scores_data + label * predict_dim;
      }
305

306
      for (size_t j = 0; j < indices.size(); ++j) {
307
        int idx = indices[j];
J
jerrywgz 已提交
308 309 310 311 312
        odata[count * out_dim] = label;  // label
        const T* bdata;
        if (scores_size == 3) {
          bdata = bboxes_data + idx * box_size;
          odata[count * out_dim + 1] = sdata[idx];  // score
313 314 315
          if (oindices != nullptr) {
            oindices[count] = offset + idx;
          }
J
jerrywgz 已提交
316 317 318
        } else {
          bdata = bbox.data<T>() + idx * box_size;
          odata[count * out_dim + 1] = *(scores_data + idx * class_num + label);
319 320 321
          if (oindices != nullptr) {
            oindices[count] = offset + idx * class_num + label;
          }
J
jerrywgz 已提交
322
        }
Y
Yipeng 已提交
323 324
        // xmin, ymin, xmax, ymax or multi-points coordinates
        std::memcpy(odata + count * out_dim + 2, bdata, box_size * sizeof(T));
D
dangqingqing 已提交
325
        count++;
326 327 328 329 330
      }
    }
  }

  void Compute(const framework::ExecutionContext& ctx) const override {
J
jerrywgz 已提交
331 332
    auto* boxes = ctx.Input<LoDTensor>("BBoxes");
    auto* scores = ctx.Input<LoDTensor>("Scores");
333
    auto* outs = ctx.Output<LoDTensor>("Out");
334 335
    bool return_index = ctx.HasOutput("Index") ? true : false;
    auto index = ctx.Output<LoDTensor>("Index");
336 337
    bool has_roisnum = ctx.HasInput("RoisNum") ? true : false;
    auto rois_num = ctx.Input<Tensor>("RoisNum");
338
    auto score_dims = scores->dims();
339
    auto score_size = score_dims.size();
J
jerrywgz 已提交
340
    auto& dev_ctx = ctx.template device_context<platform::CPUDeviceContext>();
341 342 343

    std::vector<std::map<int, std::vector<int>>> all_indices;
    std::vector<size_t> batch_starts = {0};
J
jerrywgz 已提交
344 345 346 347
    int64_t batch_size = score_dims[0];
    int64_t box_dim = boxes->dims()[2];
    int64_t out_dim = box_dim + 2;
    int num_nmsed_out = 0;
348
    Tensor boxes_slice, scores_slice;
349 350 351 352 353 354
    int n = 0;
    if (has_roisnum) {
      n = score_size == 3 ? batch_size : rois_num->numel();
    } else {
      n = score_size == 3 ? batch_size : boxes->lod().back().size() - 1;
    }
355
    for (int i = 0; i < n; ++i) {
356
      std::map<int, std::vector<int>> indices;
357 358 359 360 361 362
      if (score_size == 3) {
        scores_slice = scores->Slice(i, i + 1);
        scores_slice.Resize({score_dims[1], score_dims[2]});
        boxes_slice = boxes->Slice(i, i + 1);
        boxes_slice.Resize({score_dims[2], box_dim});
      } else {
363 364 365 366 367 368
        std::vector<size_t> boxes_lod;
        if (has_roisnum) {
          boxes_lod = GetNmsLodFromRoisNum(rois_num);
        } else {
          boxes_lod = boxes->lod().back();
        }
369 370 371 372 373
        if (boxes_lod[i] == boxes_lod[i + 1]) {
          all_indices.push_back(indices);
          batch_starts.push_back(batch_starts.back());
          continue;
        }
374 375
        scores_slice = scores->Slice(boxes_lod[i], boxes_lod[i + 1]);
        boxes_slice = boxes->Slice(boxes_lod[i], boxes_lod[i + 1]);
J
jerrywgz 已提交
376
      }
377 378 379 380
      MultiClassNMS(ctx, scores_slice, boxes_slice, score_size, &indices,
                    &num_nmsed_out);
      all_indices.push_back(indices);
      batch_starts.push_back(batch_starts.back() + num_nmsed_out);
J
jerrywgz 已提交
381 382 383 384
    }

    int num_kept = batch_starts.back();
    if (num_kept == 0) {
385 386 387 388 389 390 391 392
      if (return_index) {
        outs->mutable_data<T>({0, out_dim}, ctx.GetPlace());
        index->mutable_data<int>({0, 1}, ctx.GetPlace());
      } else {
        T* od = outs->mutable_data<T>({1, 1}, ctx.GetPlace());
        od[0] = -1;
        batch_starts = {0, 1};
      }
J
jerrywgz 已提交
393 394
    } else {
      outs->mutable_data<T>({num_kept, out_dim}, ctx.GetPlace());
395 396
      int offset = 0;
      int* oindices = nullptr;
397 398 399 400 401 402
      for (int i = 0; i < n; ++i) {
        if (score_size == 3) {
          scores_slice = scores->Slice(i, i + 1);
          boxes_slice = boxes->Slice(i, i + 1);
          scores_slice.Resize({score_dims[1], score_dims[2]});
          boxes_slice.Resize({score_dims[2], box_dim});
403 404 405
          if (return_index) {
            offset = i * score_dims[2];
          }
406
        } else {
407 408 409 410 411 412
          std::vector<size_t> boxes_lod;
          if (has_roisnum) {
            boxes_lod = GetNmsLodFromRoisNum(rois_num);
          } else {
            boxes_lod = boxes->lod().back();
          }
413
          if (boxes_lod[i] == boxes_lod[i + 1]) continue;
414 415
          scores_slice = scores->Slice(boxes_lod[i], boxes_lod[i + 1]);
          boxes_slice = boxes->Slice(boxes_lod[i], boxes_lod[i + 1]);
416 417 418
          if (return_index) {
            offset = boxes_lod[i] * score_dims[1];
          }
J
jerrywgz 已提交
419
        }
420

421 422 423 424
        int64_t s = batch_starts[i];
        int64_t e = batch_starts[i + 1];
        if (e > s) {
          Tensor out = outs->Slice(s, e);
425 426 427 428 429
          if (return_index) {
            int* output_idx =
                index->mutable_data<int>({num_kept, 1}, ctx.GetPlace());
            oindices = output_idx + s;
          }
430
          MultiClassOutput(dev_ctx, scores_slice, boxes_slice, all_indices[i],
431
                           score_dims.size(), &out, oindices, offset);
432 433 434
        }
      }
    }
435 436 437 438 439 440 441 442 443
    if (ctx.HasOutput("NmsRoisNum")) {
      auto* nms_rois_num = ctx.Output<Tensor>("NmsRoisNum");
      nms_rois_num->mutable_data<int>({n}, ctx.GetPlace());
      int* num_data = nms_rois_num->data<int>();
      for (int i = 1; i <= n; i++) {
        num_data[i - 1] = batch_starts[i] - batch_starts[i - 1];
      }
      nms_rois_num->Resize({n});
    }
444 445 446

    framework::LoD lod;
    lod.emplace_back(batch_starts);
447 448 449
    if (return_index) {
      index->set_lod(lod);
    }
450 451 452 453
    outs->set_lod(lod);
  }
};

D
dangqingqing 已提交
454
class MultiClassNMSOpMaker : public framework::OpProtoAndCheckerMaker {
455
 public:
Y
Yu Yang 已提交
456
  void Make() override {
D
dangqingqing 已提交
457
    AddInput("BBoxes",
J
jerrywgz 已提交
458 459
             "Two types of bboxes are supported:"
             "1. (Tensor) A 3-D Tensor with shape "
Y
Yipeng 已提交
460
             "[N, M, 4 or 8 16 24 32] represents the "
461 462
             "predicted locations of M bounding bboxes, N is the batch size. "
             "Each bounding box has four coordinate values and the layout is "
J
jerrywgz 已提交
463
             "[xmin, ymin, xmax, ymax], when box size equals to 4."
464 465
             "2. (LoDTensor) A 3-D Tensor with shape [M, C, 4]"
             "M is the number of bounding boxes, C is the class number");
D
dangqingqing 已提交
466
    AddInput("Scores",
J
jerrywgz 已提交
467 468
             "Two types of scores are supported:"
             "1. (Tensor) A 3-D Tensor with shape [N, C, M] represents the "
D
dangqingqing 已提交
469 470 471
             "predicted confidence predictions. N is the batch size, C is the "
             "class number, M is number of bounding boxes. For each category "
             "there are total M scores which corresponding M bounding boxes. "
472 473 474 475
             " Please note, M is equal to the 2nd dimension of BBoxes. "
             "2. (LoDTensor) A 2-D LoDTensor with shape [M, C]. "
             "M is the number of bbox, C is the class number. In this case, "
             "Input BBoxes should be the second case with shape [M, C, 4].");
D
dangqingqing 已提交
476
    AddAttr<int>(
477
        "background_label",
翟飞跃 已提交
478
        "(int, default: 0) "
D
dangqingqing 已提交
479 480
        "The index of background label, the background label will be ignored. "
        "If set to -1, then all categories will be considered.")
481
        .SetDefault(0);
D
dangqingqing 已提交
482 483
    AddAttr<float>("score_threshold",
                   "(float) "
D
dangqingqing 已提交
484 485
                   "Threshold to filter out bounding boxes with low "
                   "confidence score. If not provided, consider all boxes.");
D
dangqingqing 已提交
486 487 488
    AddAttr<int>("nms_top_k",
                 "(int64_t) "
                 "Maximum number of detections to be kept according to the "
T
tianshuo78520a 已提交
489
                 "confidences after the filtering detections based on "
D
dangqingqing 已提交
490
                 "score_threshold");
491
    AddAttr<float>("nms_threshold",
翟飞跃 已提交
492
                   "(float, default: 0.3) "
D
dangqingqing 已提交
493
                   "The threshold to be used in NMS.")
494 495 496
        .SetDefault(0.3);
    AddAttr<float>("nms_eta",
                   "(float) "
D
dangqingqing 已提交
497
                   "The parameter for adaptive NMS.")
498
        .SetDefault(1.0);
D
dangqingqing 已提交
499 500 501 502
    AddAttr<int>("keep_top_k",
                 "(int64_t) "
                 "Number of total bboxes to be kept per image after NMS "
                 "step. -1 means keeping all bboxes after NMS step.");
J
jerrywgz 已提交
503
    AddAttr<bool>("normalized",
J
jerrywgz 已提交
504
                  "(bool, default true) "
J
jerrywgz 已提交
505 506
                  "Whether detections are normalized.")
        .SetDefault(true);
507 508 509
    AddOutput("Out",
              "(LoDTensor) A 2-D LoDTensor with shape [No, 6] represents the "
              "detections. Each row has 6 values: "
Y
Yipeng 已提交
510 511 512 513 514 515
              "[label, confidence, xmin, ymin, xmax, ymax] or "
              "(LoDTensor) A 2-D LoDTensor with shape [No, 10] represents the "
              "detections. Each row has 10 values: "
              "[label, confidence, x1, y1, x2, y2, x3, y3, x4, y4]. No is the "
              "total number of detections in this mini-batch."
              "For each instance, "
516 517 518 519
              "the offsets in first dimension are called LoD, the number of "
              "offset is N + 1, if LoD[i + 1] - LoD[i] == 0, means there is "
              "no detected bbox.");
    AddComment(R"DOC(
D
dangqingqing 已提交
520
This operator is to do multi-class non maximum suppression (NMS) on a batched
521
of boxes and scores.
D
dangqingqing 已提交
522 523 524 525 526 527
In the NMS step, this operator greedily selects a subset of detection bounding
boxes that have high scores larger than score_threshold, if providing this
threshold, then selects the largest nms_top_k confidences scores if nms_top_k
is larger than -1. Then this operator pruns away boxes that have high IOU
(intersection over union) overlap with already selected boxes by adaptive
threshold NMS based on parameters of nms_threshold and nms_eta.
528
Aftern NMS step, at most keep_top_k number of total bboxes are to be kept
D
dangqingqing 已提交
529 530
per image if keep_top_k is larger than -1.
This operator support multi-class and batched inputs. It applying NMS
531 532 533
independently for each class. The outputs is a 2-D LoDTenosr, for each
image, the offsets in first dimension of LoDTensor are called LoD, the number
of offset is N + 1, where N is the batch size. If LoD[i + 1] - LoD[i] == 0,
534
means there is no detected bbox for this image.
535 536 537 538
)DOC");
  }
};

539 540 541 542 543 544 545 546 547 548 549 550 551 552
class MultiClassNMS2Op : public MultiClassNMSOp {
 public:
  MultiClassNMS2Op(const std::string& type,
                   const framework::VariableNameMap& inputs,
                   const framework::VariableNameMap& outputs,
                   const framework::AttributeMap& attrs)
      : MultiClassNMSOp(type, inputs, outputs, attrs) {}

  void InferShape(framework::InferShapeContext* ctx) const override {
    MultiClassNMSOp::InferShape(ctx);

    auto score_dims = ctx->GetInputDim("Scores");
    auto score_size = score_dims.size();
    if (score_size == 3) {
553
      ctx->SetOutputDim("Index", {-1, 1});
554 555 556
    } else {
      ctx->SetOutputDim("Index", {-1, 1});
    }
557 558 559
    if (!ctx->IsRuntime()) {
      ctx->SetLoDLevel("Index", std::max(ctx->GetLoDLevel("BBoxes"), 1));
    }
560 561 562 563 564 565 566 567 568 569 570 571 572 573 574
  }
};

class MultiClassNMS2OpMaker : public MultiClassNMSOpMaker {
 public:
  void Make() override {
    MultiClassNMSOpMaker::Make();
    AddOutput("Index",
              "(LoDTensor) A 2-D LoDTensor with shape [No, 1] represents the "
              "index of selected bbox. The index is the absolute index cross "
              "batches.")
        .AsIntermediate();
  }
};

575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602
class MultiClassNMS3Op : public MultiClassNMS2Op {
 public:
  MultiClassNMS3Op(const std::string& type,
                   const framework::VariableNameMap& inputs,
                   const framework::VariableNameMap& outputs,
                   const framework::AttributeMap& attrs)
      : MultiClassNMS2Op(type, inputs, outputs, attrs) {}

  void InferShape(framework::InferShapeContext* ctx) const override {
    MultiClassNMS2Op::InferShape(ctx);

    ctx->SetOutputDim("NmsRoisNum", {-1});
  }
};

class MultiClassNMS3OpMaker : public MultiClassNMS2OpMaker {
 public:
  void Make() override {
    MultiClassNMS2OpMaker::Make();
    AddInput("RoisNum",
             "(Tensor) The number of RoIs in shape (B),"
             "B is the number of images")
        .AsDispensable();
    AddOutput("NmsRoisNum", "(Tensor), The number of NMS RoIs in each image")
        .AsDispensable();
  }
};

603 604 605 606
}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;
H
hong 已提交
607 608 609 610
REGISTER_OPERATOR(
    multiclass_nms, ops::MultiClassNMSOp, ops::MultiClassNMSOpMaker,
    paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
    paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
D
dangqingqing 已提交
611 612
REGISTER_OP_CPU_KERNEL(multiclass_nms, ops::MultiClassNMSKernel<float>,
                       ops::MultiClassNMSKernel<double>);
H
hong 已提交
613 614 615 616
REGISTER_OPERATOR(
    multiclass_nms2, ops::MultiClassNMS2Op, ops::MultiClassNMS2OpMaker,
    paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
    paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
617 618
REGISTER_OP_CPU_KERNEL(multiclass_nms2, ops::MultiClassNMSKernel<float>,
                       ops::MultiClassNMSKernel<double>);
619 620 621 622 623 624 625

REGISTER_OPERATOR(
    multiclass_nms3, ops::MultiClassNMS3Op, ops::MultiClassNMS3OpMaker,
    paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
    paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(multiclass_nms3, ops::MultiClassNMSKernel<float>,
                       ops::MultiClassNMSKernel<double>);