diff --git a/paddle/fluid/operators/detection/multiclass_nms_op.cc b/paddle/fluid/operators/detection/multiclass_nms_op.cc index 68b4ab20150bb2563384d500623c8728ba333c1c..7f0bb2a97ce275106ac3e2f75a53aa02df29fff4 100644 --- a/paddle/fluid/operators/detection/multiclass_nms_op.cc +++ b/paddle/fluid/operators/detection/multiclass_nms_op.cc @@ -13,8 +13,10 @@ limitations under the License. */ #include +#include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/detection/nms_util.h" +#include "paddle/phi/infermeta/ternary.h" namespace paddle { namespace operators { @@ -609,12 +611,6 @@ class MultiClassNMS3Op : public MultiClassNMS2Op { const framework::VariableNameMap& outputs, const framework::AttributeMap& attrs) : MultiClassNMS2Op(type, inputs, outputs, attrs) {} - - void InferShape(framework::InferShapeContext* ctx) const override { - MultiClassNMS2Op::InferShape(ctx); - - ctx->SetOutputDim("NmsRoisNum", {-1}); - } }; class MultiClassNMS3OpMaker : public MultiClassNMS2OpMaker { @@ -633,6 +629,10 @@ class MultiClassNMS3OpMaker : public MultiClassNMS2OpMaker { } // namespace operators } // namespace paddle +DECLARE_INFER_SHAPE_FUNCTOR(multiclass_nms3, + MultiClassNMSShapeFunctor, + PD_INFER_META(phi::MultiClassNMSInferMeta)); + namespace ops = paddle::operators; REGISTER_OPERATOR( multiclass_nms, @@ -658,7 +658,5 @@ REGISTER_OPERATOR( ops::MultiClassNMS3Op, ops::MultiClassNMS3OpMaker, paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); -REGISTER_OP_CPU_KERNEL(multiclass_nms3, - ops::MultiClassNMSKernel, - ops::MultiClassNMSKernel); + paddle::framework::EmptyGradOpMaker, + MultiClassNMSShapeFunctor); diff --git a/paddle/phi/api/yaml/legacy_api.yaml b/paddle/phi/api/yaml/legacy_api.yaml index 6cbdf7424b7ca2c1f1de03234caf0ae16eba71f0..0e01074f0a5aff0e58fd8d2e98c67451dc945a41 100644 --- a/paddle/phi/api/yaml/legacy_api.yaml +++ b/paddle/phi/api/yaml/legacy_api.yaml @@ -1615,6 +1615,15 @@ func : multi_dot backward : multi_dot_grad +- api : multiclass_nms3 + args : (Tensor bboxes, Tensor scores, Tensor rois_num, float score_threshold, int nms_top_k, int keep_top_k, float nms_threshold=0.3, bool normalized=true, float nms_eta=1.0, int background_label=0) + output : Tensor(out), Tensor(index), Tensor(nms_rois_num) + infer_meta : + func : MultiClassNMSInferMeta + kernel : + func : multiclass_nms3 + optional : rois_num + # multinomial - api : multinomial args : (Tensor x, int num_samples, bool replacement) diff --git a/paddle/phi/infermeta/ternary.cc b/paddle/phi/infermeta/ternary.cc index 7dc799d989577d4d54ec779ef92d2cdf9fed96d0..3ee42b86d6e3e433280d38f00f35efc541d46c0b 100644 --- a/paddle/phi/infermeta/ternary.cc +++ b/paddle/phi/infermeta/ternary.cc @@ -743,6 +743,99 @@ void LinspaceInferMeta(const MetaTensor& start, LinspaceRawInferMeta(start, stop, number, out); } +void MultiClassNMSInferMeta(const MetaTensor& bboxes, + const MetaTensor& scores, + const MetaTensor& rois_num, + float score_threshold, + int nms_top_k, + int keep_top_k, + float nms_threshold, + bool normalized, + float nms_eta, + int background_label, + MetaTensor* out, + MetaTensor* index, + MetaTensor* nms_rois_num, + MetaConfig config) { + auto box_dims = bboxes.dims(); + auto score_dims = scores.dims(); + auto score_size = score_dims.size(); + + if (config.is_runtime) { + PADDLE_ENFORCE_EQ( + score_size == 2 || score_size == 3, + true, + errors::InvalidArgument("The rank of Input(Scores) must be 2 or 3" + ". But received rank = %d", + score_size)); + PADDLE_ENFORCE_EQ( + box_dims.size(), + 3, + errors::InvalidArgument("The rank of Input(BBoxes) must be 3" + ". But received rank = %d", + box_dims.size())); + if (score_size == 3) { + PADDLE_ENFORCE_EQ(box_dims[2] == 4 || box_dims[2] == 8 || + box_dims[2] == 16 || box_dims[2] == 24 || + box_dims[2] == 32, + true, + errors::InvalidArgument( + "The last dimension of Input" + "(BBoxes) must be 4 or 8, " + "represents the layout of coordinate " + "[xmin, ymin, xmax, ymax] or " + "4 points: [x1, y1, x2, y2, x3, y3, x4, y4] or " + "8 points: [xi, yi] i= 1,2,...,8 or " + "12 points: [xi, yi] i= 1,2,...,12 or " + "16 points: [xi, yi] i= 1,2,...,16")); + PADDLE_ENFORCE_EQ( + box_dims[1], + score_dims[2], + errors::InvalidArgument( + "The 2nd dimension of Input(BBoxes) must be equal to " + "last dimension of Input(Scores), which represents the " + "predicted bboxes." + "But received box_dims[1](%s) != socre_dims[2](%s)", + box_dims[1], + score_dims[2])); + } else { + PADDLE_ENFORCE_EQ(box_dims[2], + 4, + errors::InvalidArgument( + "The last dimension of Input" + "(BBoxes) must be 4. But received dimension = %d", + box_dims[2])); + PADDLE_ENFORCE_EQ( + box_dims[1], + score_dims[1], + errors::InvalidArgument( + "The 2nd dimension of Input" + "(BBoxes) must be equal to the 2nd dimension of Input(Scores). " + "But received box dimension = %d, score dimension = %d", + box_dims[1], + score_dims[1])); + } + } + PADDLE_ENFORCE_NE(out, + nullptr, + errors::InvalidArgument( + "The out in MultiClassNMSInferMeta can't be nullptr.")); + PADDLE_ENFORCE_NE( + index, + nullptr, + errors::InvalidArgument( + "The index in MultiClassNMSInferMeta can't be nullptr.")); + // Here the box_dims[0] is not the real dimension of output. + // It will be rewritten in the computing kernel. + + out->set_dims(phi::make_ddim({-1, box_dims[2] + 2})); + out->set_dtype(bboxes.dtype()); + index->set_dims(phi::make_ddim({-1, box_dims[2] + 2})); + index->set_dtype(DataType::INT32); + nms_rois_num->set_dims(phi::make_ddim({-1})); + nms_rois_num->set_dtype(DataType::INT32); +} + void NllLossRawInferMeta(const MetaTensor& input, const MetaTensor& label, const MetaTensor& weight, diff --git a/paddle/phi/infermeta/ternary.h b/paddle/phi/infermeta/ternary.h index 6cf9b169d6236c3819e2bb5cd5884b00c5bc0838..55a63b1c957c40257024117942c21382df676e22 100644 --- a/paddle/phi/infermeta/ternary.h +++ b/paddle/phi/infermeta/ternary.h @@ -123,6 +123,21 @@ void LinspaceInferMeta(const MetaTensor& start, DataType dtype, MetaTensor* out); +void MultiClassNMSInferMeta(const MetaTensor& bboxes, + const MetaTensor& scores, + const MetaTensor& rois_num, + float score_threshold, + int nms_top_k, + int keep_top_k, + float nms_threshold, + bool normalized, + float nms_eta, + int background_label, + MetaTensor* out, + MetaTensor* index, + MetaTensor* nms_rois_num, + MetaConfig config = MetaConfig()); + void NllLossRawInferMeta(const MetaTensor& input, const MetaTensor& label, const MetaTensor& weight, diff --git a/paddle/phi/kernels/CMakeLists.txt b/paddle/phi/kernels/CMakeLists.txt index 98ea91ce5a19f0d97ef8223e268bd716dea4455e..98982b8230ae7e73d415d79f067ed97e811ab51d 100644 --- a/paddle/phi/kernels/CMakeLists.txt +++ b/paddle/phi/kernels/CMakeLists.txt @@ -80,7 +80,8 @@ set(COMMON_KERNEL_DEPS lod_utils custom_kernel string_infermeta - utf8proc) + utf8proc + gpc) copy_if_different(${kernel_declare_file} ${kernel_declare_file_final}) diff --git a/paddle/phi/kernels/cpu/multiclass_nms3_kernel.cc b/paddle/phi/kernels/cpu/multiclass_nms3_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..e285e2aec5dc8d213306bb4780b24579700b136f --- /dev/null +++ b/paddle/phi/kernels/cpu/multiclass_nms3_kernel.cc @@ -0,0 +1,627 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/multiclass_nms3_kernel.h" + +#include "paddle/fluid/operators/detection/gpc.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/tensor_utils.h" + +namespace phi { + +using gpc::gpc_free_polygon; +using gpc::gpc_polygon_clip; + +template +class Point_ { + public: + // default constructor + Point_() {} + Point_(T _x, T _y) {} + Point_(const Point_& pt) {} + + Point_& operator=(const Point_& pt); + // conversion to another data type + // template operator Point_<_T>() const; + // conversion to the old-style C structures + // operator Vec() const; + + // checks whether the point is inside the specified rectangle + // bool inside(const Rect_& r) const; + T x; //!< x coordinate of the point + T y; //!< y coordinate of the point +}; + +template +void Array2PointVec(const T* box, + const size_t box_size, + std::vector>* vec) { + size_t pts_num = box_size / 2; + (*vec).resize(pts_num); + for (size_t i = 0; i < pts_num; i++) { + (*vec).at(i).x = box[2 * i]; + (*vec).at(i).y = box[2 * i + 1]; + } +} + +template +void Array2Poly(const T* box, const size_t box_size, gpc::gpc_polygon* poly) { + size_t pts_num = box_size / 2; + (*poly).num_contours = 1; + (*poly).hole = reinterpret_cast(malloc(sizeof(int))); + (*poly).hole[0] = 0; + (*poly).contour = (gpc::gpc_vertex_list*)malloc(sizeof(gpc::gpc_vertex_list)); + (*poly).contour->num_vertices = pts_num; + (*poly).contour->vertex = + (gpc::gpc_vertex*)malloc(sizeof(gpc::gpc_vertex) * pts_num); + for (size_t i = 0; i < pts_num; ++i) { + (*poly).contour->vertex[i].x = box[2 * i]; + (*poly).contour->vertex[i].y = box[2 * i + 1]; + } +} + +template +void PointVec2Poly(const std::vector>& vec, gpc::gpc_polygon* poly) { + int pts_num = vec.size(); + (*poly).num_contours = 1; + (*poly).hole = reinterpret_cast(malloc(sizeof(int))); + (*poly).hole[0] = 0; + (*poly).contour = (gpc::gpc_vertex_list*)malloc(sizeof(gpc::gpc_vertex_list)); + (*poly).contour->num_vertices = pts_num; + (*poly).contour->vertex = + (gpc::gpc_vertex*)malloc(sizeof(gpc::gpc_vertex) * pts_num); + for (size_t i = 0; i < pts_num; ++i) { + (*poly).contour->vertex[i].x = vec[i].x; + (*poly).contour->vertex[i].y = vec[i].y; + } +} + +template +void Poly2PointVec(const gpc::gpc_vertex_list& contour, + std::vector>* vec) { + int pts_num = contour.num_vertices; + (*vec).resize(pts_num); + for (int i = 0; i < pts_num; i++) { + (*vec).at(i).x = contour.vertex[i].x; + (*vec).at(i).y = contour.vertex[i].y; + } +} + +template +T GetContourArea(const std::vector>& vec) { + size_t pts_num = vec.size(); + if (pts_num < 3) return T(0.); + T area = T(0.); + for (size_t i = 0; i < pts_num; ++i) { + area += vec[i].x * vec[(i + 1) % pts_num].y - + vec[i].y * vec[(i + 1) % pts_num].x; + } + return std::fabs(area / 2.0); +} + +template +T PolyArea(const T* box, const size_t box_size, const bool normalized) { + // If coordinate values are is invalid + // if area size <= 0, return 0. + std::vector> vec; + Array2PointVec(box, box_size, &vec); + return GetContourArea(vec); +} + +template +T PolyOverlapArea(const T* box1, + const T* box2, + const size_t box_size, + const bool normalized) { + gpc::gpc_polygon poly1; + gpc::gpc_polygon poly2; + Array2Poly(box1, box_size, &poly1); + Array2Poly(box2, box_size, &poly2); + gpc::gpc_polygon respoly; + gpc::gpc_op op = gpc::GPC_INT; + gpc::gpc_polygon_clip(op, &poly2, &poly1, &respoly); + + T inter_area = T(0.); + int contour_num = respoly.num_contours; + for (int i = 0; i < contour_num; ++i) { + std::vector> resvec; + Poly2PointVec(respoly.contour[i], &resvec); + // inter_area += std::fabs(cv::contourArea(resvec)) + 0.5f * + // (cv::arcLength(resvec, true)); + inter_area += GetContourArea(resvec); + } + + gpc::gpc_free_polygon(&poly1); + gpc::gpc_free_polygon(&poly2); + gpc::gpc_free_polygon(&respoly); + return inter_area; +} + +template +bool SortScorePairDescend(const std::pair& pair1, + const std::pair& pair2) { + return pair1.first > pair2.first; +} + +template +static inline void GetMaxScoreIndex( + const std::vector& scores, + const T threshold, + int top_k, + std::vector>* sorted_indices) { + for (size_t i = 0; i < scores.size(); ++i) { + if (scores[i] > threshold) { + sorted_indices->push_back(std::make_pair(scores[i], i)); + } + } + // Sort the score pair according to the scores in descending order + std::stable_sort(sorted_indices->begin(), + sorted_indices->end(), + SortScorePairDescend); + // Keep top_k scores if needed. + if (top_k > -1 && top_k < static_cast(sorted_indices->size())) { + sorted_indices->resize(top_k); + } +} + +template +static inline T BBoxArea(const T* box, const bool normalized) { + if (box[2] < box[0] || box[3] < box[1]) { + // If coordinate values are is invalid + // (e.g. xmax < xmin or ymax < ymin), return 0. + return static_cast(0.); + } else { + const T w = box[2] - box[0]; + const T h = box[3] - box[1]; + if (normalized) { + return w * h; + } else { + // If coordinate values are not within range [0, 1]. + return (w + 1) * (h + 1); + } + } +} + +template +static inline T JaccardOverlap(const T* box1, + const T* box2, + const bool normalized) { + if (box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] || + box2[3] < box1[1]) { + return static_cast(0.); + } else { + const T inter_xmin = std::max(box1[0], box2[0]); + const T inter_ymin = std::max(box1[1], box2[1]); + const T inter_xmax = std::min(box1[2], box2[2]); + const T inter_ymax = std::min(box1[3], box2[3]); + T norm = normalized ? static_cast(0.) : static_cast(1.); + T inter_w = inter_xmax - inter_xmin + norm; + T inter_h = inter_ymax - inter_ymin + norm; + const T inter_area = inter_w * inter_h; + const T bbox1_area = BBoxArea(box1, normalized); + const T bbox2_area = BBoxArea(box2, normalized); + return inter_area / (bbox1_area + bbox2_area - inter_area); + } +} + +template +T PolyIoU(const T* box1, + const T* box2, + const size_t box_size, + const bool normalized) { + T bbox1_area = PolyArea(box1, box_size, normalized); + T bbox2_area = PolyArea(box2, box_size, normalized); + T inter_area = PolyOverlapArea(box1, box2, box_size, normalized); + if (bbox1_area == 0 || bbox2_area == 0 || inter_area == 0) { + // If coordinate values are invalid + // if area size <= 0, return 0. + return T(0.); + } else { + return inter_area / (bbox1_area + bbox2_area - inter_area); + } +} + +inline std::vector GetNmsLodFromRoisNum(const DenseTensor* rois_num) { + std::vector rois_lod; + auto* rois_num_data = rois_num->data(); + rois_lod.push_back(static_cast(0)); + for (int i = 0; i < rois_num->numel(); ++i) { + rois_lod.push_back(rois_lod.back() + static_cast(rois_num_data[i])); + } + return rois_lod; +} + +template +void SliceOneClass(const Context& ctx, + const DenseTensor& items, + const int class_id, + DenseTensor* one_class_item) { + // T* item_data = one_class_item->mutable_data(ctx.GetPlace()); + T* item_data = ctx.template Alloc(one_class_item); + const T* items_data = items.data(); + const int64_t num_item = items.dims()[0]; + const int class_num = items.dims()[1]; + if (items.dims().size() == 3) { + int item_size = items.dims()[2]; + for (int i = 0; i < num_item; ++i) { + std::memcpy(item_data + i * item_size, + items_data + i * class_num * item_size + class_id * item_size, + sizeof(T) * item_size); + } + } else { + for (int i = 0; i < num_item; ++i) { + item_data[i] = items_data[i * class_num + class_id]; + } + } +} + +template +void NMSFast(const DenseTensor& bbox, + const DenseTensor& scores, + const T score_threshold, + const T nms_threshold, + const T eta, + const int64_t top_k, + std::vector* selected_indices, + const bool normalized) { + // The total boxes for each instance. + int64_t num_boxes = bbox.dims()[0]; + // 4: [xmin ymin xmax ymax] + // 8: [x1 y1 x2 y2 x3 y3 x4 y4] + // 16, 24, or 32: [x1 y1 x2 y2 ... xn yn], n = 8, 12 or 16 + int64_t box_size = bbox.dims()[1]; + + std::vector scores_data(num_boxes); + std::copy_n(scores.data(), num_boxes, scores_data.begin()); + std::vector> sorted_indices; + GetMaxScoreIndex(scores_data, score_threshold, top_k, &sorted_indices); + + selected_indices->clear(); + T adaptive_threshold = nms_threshold; + const T* bbox_data = bbox.data(); + + while (sorted_indices.size() != 0) { + const int idx = sorted_indices.front().second; + bool keep = true; + for (size_t k = 0; k < selected_indices->size(); ++k) { + if (keep) { + const int kept_idx = (*selected_indices)[k]; + T overlap = T(0.); + // 4: [xmin ymin xmax ymax] + if (box_size == 4) { + overlap = JaccardOverlap(bbox_data + idx * box_size, + bbox_data + kept_idx * box_size, + normalized); + } + // 8: [x1 y1 x2 y2 x3 y3 x4 y4] or 16, 24, 32 + if (box_size == 8 || box_size == 16 || box_size == 24 || + box_size == 32) { + overlap = PolyIoU(bbox_data + idx * box_size, + bbox_data + kept_idx * box_size, + box_size, + normalized); + } + keep = overlap <= adaptive_threshold; + } else { + break; + } + } + if (keep) { + selected_indices->push_back(idx); + } + sorted_indices.erase(sorted_indices.begin()); + if (keep && eta < 1 && adaptive_threshold > 0.5) { + adaptive_threshold *= eta; + } + } +} + +template +void MultiClassNMS(const Context& ctx, + const DenseTensor& scores, + const DenseTensor& bboxes, + const int scores_size, + float scorethreshold, + int nms_top_k, + int keep_top_k, + float nmsthreshold, + bool normalized, + float nmseta, + int background_label, + std::map>* indices, + int* num_nmsed_out) { + T nms_threshold = static_cast(nmsthreshold); + T nms_eta = static_cast(nmseta); + T score_threshold = static_cast(scorethreshold); + + int num_det = 0; + + int64_t class_num = scores_size == 3 ? scores.dims()[0] : scores.dims()[1]; + DenseTensor bbox_slice, score_slice; + for (int64_t c = 0; c < class_num; ++c) { + if (c == background_label) continue; + if (scores_size == 3) { + score_slice = scores.Slice(c, c + 1); + bbox_slice = bboxes; + } else { + score_slice.Resize({scores.dims()[0], 1}); + bbox_slice.Resize({scores.dims()[0], 4}); + SliceOneClass(ctx, scores, c, &score_slice); + SliceOneClass(ctx, bboxes, c, &bbox_slice); + } + NMSFast(bbox_slice, + score_slice, + score_threshold, + nms_threshold, + nms_eta, + nms_top_k, + &((*indices)[c]), + normalized); + if (scores_size == 2) { + std::stable_sort((*indices)[c].begin(), (*indices)[c].end()); + } + num_det += (*indices)[c].size(); + } + + *num_nmsed_out = num_det; + const T* scores_data = scores.data(); + if (keep_top_k > -1 && num_det > keep_top_k) { + const T* sdata; + std::vector>> score_index_pairs; + for (const auto& it : *indices) { + int label = it.first; + if (scores_size == 3) { + sdata = scores_data + label * scores.dims()[1]; + } else { + score_slice.Resize({scores.dims()[0], 1}); + SliceOneClass(ctx, scores, label, &score_slice); + sdata = score_slice.data(); + } + const std::vector& label_indices = it.second; + for (size_t j = 0; j < label_indices.size(); ++j) { + int idx = label_indices[j]; + score_index_pairs.push_back( + std::make_pair(sdata[idx], std::make_pair(label, idx))); + } + } + // Keep top k results per image. + std::stable_sort(score_index_pairs.begin(), + score_index_pairs.end(), + SortScorePairDescend>); + score_index_pairs.resize(keep_top_k); + + // Store the new indices. + std::map> new_indices; + for (size_t j = 0; j < score_index_pairs.size(); ++j) { + int label = score_index_pairs[j].second.first; + int idx = score_index_pairs[j].second.second; + new_indices[label].push_back(idx); + } + if (scores_size == 2) { + for (const auto& it : new_indices) { + int label = it.first; + std::stable_sort(new_indices[label].begin(), new_indices[label].end()); + } + } + new_indices.swap(*indices); + *num_nmsed_out = keep_top_k; + } +} + +template +void MultiClassOutput(const Context& ctx, + const DenseTensor& scores, + const DenseTensor& bboxes, + const std::map>& selected_indices, + const int scores_size, + DenseTensor* out, + int* oindices = nullptr, + const int offset = 0) { + int64_t class_num = scores.dims()[1]; + int64_t predict_dim = scores.dims()[1]; + int64_t box_size = bboxes.dims()[1]; + if (scores_size == 2) { + box_size = bboxes.dims()[2]; + } + int64_t out_dim = box_size + 2; + auto* scores_data = scores.data(); + auto* bboxes_data = bboxes.data(); + auto* odata = out->data(); + const T* sdata; + DenseTensor bbox; + bbox.Resize({scores.dims()[0], box_size}); + int count = 0; + for (const auto& it : selected_indices) { + int label = it.first; + const std::vector& indices = it.second; + if (scores_size == 2) { + SliceOneClass(ctx, bboxes, label, &bbox); + } else { + sdata = scores_data + label * predict_dim; + } + + for (size_t j = 0; j < indices.size(); ++j) { + int idx = indices[j]; + odata[count * out_dim] = label; // label + const T* bdata; + if (scores_size == 3) { + bdata = bboxes_data + idx * box_size; + odata[count * out_dim + 1] = sdata[idx]; // score + if (oindices != nullptr) { + oindices[count] = offset + idx; + } + } else { + bdata = bbox.data() + idx * box_size; + odata[count * out_dim + 1] = *(scores_data + idx * class_num + label); + if (oindices != nullptr) { + oindices[count] = offset + idx * class_num + label; + } + } + // xmin, ymin, xmax, ymax or multi-points coordinates + std::memcpy(odata + count * out_dim + 2, bdata, box_size * sizeof(T)); + count++; + } + } +} + +template +void MultiClassNMSKernel(const Context& ctx, + const DenseTensor& bboxes, + const DenseTensor& scores, + const paddle::optional& rois_num, + float score_threshold, + int nms_top_k, + int keep_top_k, + float nms_threshold, + bool normalized, + float nms_eta, + int background_label, + DenseTensor* out, + DenseTensor* index, + DenseTensor* nms_rois_num) { + bool return_index = index != nullptr; + bool has_roisnum = rois_num.get_ptr() != nullptr; + auto score_dims = scores.dims(); + auto score_size = score_dims.size(); + + std::vector>> all_indices; + std::vector batch_starts = {0}; + int64_t batch_size = score_dims[0]; + int64_t box_dim = bboxes.dims()[2]; + int64_t out_dim = box_dim + 2; + int num_nmsed_out = 0; + DenseTensor boxes_slice, scores_slice; + int n = 0; + if (has_roisnum) { + n = score_size == 3 ? batch_size : rois_num.get_ptr()->numel(); + } else { + n = score_size == 3 ? batch_size : bboxes.lod().back().size() - 1; + } + for (int i = 0; i < n; ++i) { + std::map> indices; + if (score_size == 3) { + scores_slice = scores.Slice(i, i + 1); + scores_slice.Resize({score_dims[1], score_dims[2]}); + boxes_slice = bboxes.Slice(i, i + 1); + boxes_slice.Resize({score_dims[2], box_dim}); + } else { + std::vector boxes_lod; + if (has_roisnum) { + boxes_lod = GetNmsLodFromRoisNum(rois_num.get_ptr()); + } else { + boxes_lod = bboxes.lod().back(); + } + if (boxes_lod[i] == boxes_lod[i + 1]) { + all_indices.push_back(indices); + batch_starts.push_back(batch_starts.back()); + continue; + } + scores_slice = scores.Slice(boxes_lod[i], boxes_lod[i + 1]); + boxes_slice = bboxes.Slice(boxes_lod[i], boxes_lod[i + 1]); + } + MultiClassNMS(ctx, + scores_slice, + boxes_slice, + score_size, + score_threshold, + nms_top_k, + keep_top_k, + nms_threshold, + normalized, + nms_eta, + background_label, + &indices, + &num_nmsed_out); + all_indices.push_back(indices); + batch_starts.push_back(batch_starts.back() + num_nmsed_out); + } + + int num_kept = batch_starts.back(); + if (num_kept == 0) { + if (return_index) { + out->Resize({0, out_dim}); + ctx.template Alloc(out); + index->Resize({0, 1}); + ctx.template Alloc(index); + } else { + out->Resize({1, 1}); + T* od = ctx.template Alloc(out); + od[0] = -1; + batch_starts = {0, 1}; + } + } else { + out->Resize({num_kept, out_dim}); + ctx.template Alloc(out); + int offset = 0; + int* oindices = nullptr; + for (int i = 0; i < n; ++i) { + if (score_size == 3) { + scores_slice = scores.Slice(i, i + 1); + boxes_slice = bboxes.Slice(i, i + 1); + scores_slice.Resize({score_dims[1], score_dims[2]}); + boxes_slice.Resize({score_dims[2], box_dim}); + if (return_index) { + offset = i * score_dims[2]; + } + } else { + std::vector boxes_lod; + if (has_roisnum) { + boxes_lod = GetNmsLodFromRoisNum(rois_num.get_ptr()); + } else { + boxes_lod = bboxes.lod().back(); + } + if (boxes_lod[i] == boxes_lod[i + 1]) continue; + scores_slice = scores.Slice(boxes_lod[i], boxes_lod[i + 1]); + boxes_slice = bboxes.Slice(boxes_lod[i], boxes_lod[i + 1]); + if (return_index) { + offset = boxes_lod[i] * score_dims[1]; + } + } + + int64_t s = batch_starts[i]; + int64_t e = batch_starts[i + 1]; + if (e > s) { + DenseTensor nout = out->Slice(s, e); + if (return_index) { + index->Resize({num_kept, 1}); + int* output_idx = ctx.template Alloc(index); + oindices = output_idx + s; + } + MultiClassOutput(ctx, + scores_slice, + boxes_slice, + all_indices[i], + score_dims.size(), + &nout, + oindices, + offset); + } + } + } + if (nms_rois_num != nullptr) { + nms_rois_num->Resize({n}); + ctx.template Alloc(nms_rois_num); + int* num_data = nms_rois_num->data(); + for (int i = 1; i <= n; i++) { + num_data[i - 1] = batch_starts[i] - batch_starts[i - 1]; + } + nms_rois_num->Resize({n}); + } +} + +} // namespace phi + +PD_REGISTER_KERNEL( + multiclass_nms3, CPU, ALL_LAYOUT, phi::MultiClassNMSKernel, float, double) { +} diff --git a/paddle/phi/kernels/multiclass_nms3_kernel.h b/paddle/phi/kernels/multiclass_nms3_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..2d1dd383930888fd2712bdf91f4d039bcf425f1a --- /dev/null +++ b/paddle/phi/kernels/multiclass_nms3_kernel.h @@ -0,0 +1,37 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" + +namespace phi { + +template +void MultiClassNMSKernel(const Context& ctx, + const DenseTensor& bboxes, + const DenseTensor& scores, + const paddle::optional& rois_num, + float score_threshold, + int nms_top_k, + int keep_top_k, + float nms_threshold, + bool normalized, + float nms_eta, + int background_label, + DenseTensor* out, + DenseTensor* index, + DenseTensor* nms_rois_num); + +} // namespace phi diff --git a/paddle/phi/ops/compat/multiclass_nms3_sig.cc b/paddle/phi/ops/compat/multiclass_nms3_sig.cc new file mode 100644 index 0000000000000000000000000000000000000000..c35434071c80b0d37865fd750c7acfee40bf6eb1 --- /dev/null +++ b/paddle/phi/ops/compat/multiclass_nms3_sig.cc @@ -0,0 +1,36 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/core/compat/op_utils.h" + +namespace phi { + +KernelSignature MultiClassNMS3OpArgumentMapping( + const ArgumentMappingContext& ctx) { + return KernelSignature("multiclass_nms3", + {"BBoxes", "Scores", "RoisNum"}, + {"score_threshold", + "nms_top_k", + "keep_top_k", + "nms_threshold", + "normalized", + "nms_eta", + "background_label"}, + {"Out", "Index", "NmsRoisNum"}); +} + +} // namespace phi + +PD_REGISTER_ARG_MAPPING_FN(multiclass_nms3, + phi::MultiClassNMS3OpArgumentMapping); diff --git a/python/paddle/fluid/tests/unittests/op_test.py b/python/paddle/fluid/tests/unittests/op_test.py index b0274431d453afdd090943abadb17f1c8123e27e..160b4e2e6857df522a128790e744e63cb9f0041c 100644 --- a/python/paddle/fluid/tests/unittests/op_test.py +++ b/python/paddle/fluid/tests/unittests/op_test.py @@ -1457,6 +1457,7 @@ class OpTest(unittest.TestCase): # see details: https://stackoverflow.com/questions/38331703/why-does-numpys-broadcasting-sometimes-allow-comparing-arrays-of-different-leng if expect_np.size == 0: self.op_test.assertTrue(actual_np.size == 0) # }}} + # print("actual_np, expect_np", actual_np, expect_np) self._compare_numpy(name, actual_np, expect_np) if isinstance(expect, tuple): self._compare_list(name, actual, expect) diff --git a/python/paddle/fluid/tests/unittests/test_multiclass_nms_op.py b/python/paddle/fluid/tests/unittests/test_multiclass_nms_op.py index 67650158bef169bdca090491b954f8aa25b19090..a53c277ad02cdc9b0c1ddc8f50169e9d1ba4bfb8 100644 --- a/python/paddle/fluid/tests/unittests/test_multiclass_nms_op.py +++ b/python/paddle/fluid/tests/unittests/test_multiclass_nms_op.py @@ -19,7 +19,81 @@ import copy from op_test import OpTest import paddle import paddle.fluid as fluid -from paddle.fluid import Program, program_guard +from paddle.fluid import Program, program_guard, in_dygraph_mode, _non_static_mode +from paddle.fluid.layer_helper import LayerHelper +from paddle import _C_ops + + +def multiclass_nms3(bboxes, + scores, + rois_num=None, + score_threshold=0.3, + nms_top_k=1000, + keep_top_k=100, + nms_threshold=0.3, + normalized=True, + nms_eta=1., + background_label=-1, + return_index=True, + return_rois_num=True, + name=None): + + helper = LayerHelper('multiclass_nms3', **locals()) + + if in_dygraph_mode(): + attrs = (score_threshold, nms_top_k, keep_top_k, nms_threshold, + normalized, nms_eta, background_label) + output, index, nms_rois_num = _C_ops.final_state_multiclass_nms3( + bboxes, scores, rois_num, *attrs) + if not return_index: + index = None + return output, index, nms_rois_num + elif _non_static_mode(): + attrs = ('background_label', background_label, 'score_threshold', + score_threshold, 'nms_top_k', nms_top_k, 'nms_threshold', + nms_threshold, 'keep_top_k', keep_top_k, 'nms_eta', nms_eta, + 'normalized', normalized) + output, index, nms_rois_num = _C_ops.multiclass_nms3( + bboxes, scores, rois_num, *attrs) + if not return_index: + index = None + return output, index, nms_rois_num + + else: + output = helper.create_variable_for_type_inference(dtype=bboxes.dtype) + index = helper.create_variable_for_type_inference(dtype='int32') + + inputs = {'BBoxes': bboxes, 'Scores': scores} + outputs = {'Out': output, 'Index': index} + + if rois_num is not None: + inputs['RoisNum'] = rois_num + + if return_rois_num: + nms_rois_num = helper.create_variable_for_type_inference( + dtype='int32') + outputs['NmsRoisNum'] = nms_rois_num + + helper.append_op(type="multiclass_nms3", + inputs=inputs, + attrs={ + 'background_label': background_label, + 'score_threshold': score_threshold, + 'nms_top_k': nms_top_k, + 'nms_threshold': nms_threshold, + 'keep_top_k': keep_top_k, + 'nms_eta': nms_eta, + 'normalized': normalized + }, + outputs=outputs) + output.stop_gradient = True + index.stop_gradient = True + if not return_index: + index = None + if not return_rois_num: + nms_rois_num = None + + return output, nms_rois_num, index def softmax(x): @@ -541,8 +615,9 @@ class TestMulticlassNMS2LoDInput(TestMulticlassNMSLoDInput): 'normalized': normalized, } - def test_check_output(self): - self.check_output() + +def test_check_output(self): + self.check_output() class TestMulticlassNMS2LoDNoOutput(TestMulticlassNMS2LoDInput): @@ -590,6 +665,7 @@ class TestMulticlassNMSError(unittest.TestCase): class TestMulticlassNMS3Op(TestMulticlassNMS2Op): def setUp(self): + self.python_api = multiclass_nms3 self.set_argument() N = 7 M = 1200 @@ -623,8 +699,8 @@ class TestMulticlassNMS3Op(TestMulticlassNMS2Op): self.op_type = 'multiclass_nms3' self.inputs = {'BBoxes': boxes, 'Scores': scores} self.outputs = { - 'Out': (nmsed_outs, [lod]), - 'Index': (index_outs, [lod]), + 'Out': nmsed_outs, + 'Index': index_outs, 'NmsRoisNum': np.array(lod).astype('int32') } self.attrs = { @@ -638,7 +714,7 @@ class TestMulticlassNMS3Op(TestMulticlassNMS2Op): } def test_check_output(self): - self.check_output() + self.check_output(check_eager=True) class TestMulticlassNMS3OpNoOutput(TestMulticlassNMS3Op): @@ -649,71 +725,6 @@ class TestMulticlassNMS3OpNoOutput(TestMulticlassNMS3Op): self.score_threshold = 2.0 -class TestMulticlassNMS3LoDInput(TestMulticlassNMS2LoDInput): - - def setUp(self): - self.set_argument() - M = 1200 - C = 21 - BOX_SIZE = 4 - box_lod = [[1200]] - background = 0 - nms_threshold = 0.3 - nms_top_k = 400 - keep_top_k = 200 - score_threshold = self.score_threshold - normalized = False - - scores = np.random.random((M, C)).astype('float32') - - scores = np.apply_along_axis(softmax, 1, scores) - - boxes = np.random.random((M, C, BOX_SIZE)).astype('float32') - boxes[:, :, 0] = boxes[:, :, 0] * 10 - boxes[:, :, 1] = boxes[:, :, 1] * 10 - boxes[:, :, 2] = boxes[:, :, 2] * 10 + 10 - boxes[:, :, 3] = boxes[:, :, 3] * 10 + 10 - - det_outs, lod = lod_multiclass_nms(boxes, scores, background, - score_threshold, nms_threshold, - nms_top_k, keep_top_k, box_lod, - normalized) - - det_outs = np.array(det_outs) - nmsed_outs = det_outs[:, :-1].astype('float32') if len( - det_outs) else det_outs - self.op_type = 'multiclass_nms3' - self.inputs = { - 'BBoxes': (boxes, box_lod), - 'Scores': (scores, box_lod), - 'RoisNum': np.array(box_lod).astype('int32') - } - self.outputs = { - 'Out': (nmsed_outs, [lod]), - 'NmsRoisNum': np.array(lod).astype('int32') - } - self.attrs = { - 'background_label': 0, - 'nms_threshold': nms_threshold, - 'nms_top_k': nms_top_k, - 'keep_top_k': keep_top_k, - 'score_threshold': score_threshold, - 'nms_eta': 1.0, - 'normalized': normalized, - } - - def test_check_output(self): - self.check_output() - - -class TestMulticlassNMS3LoDNoOutput(TestMulticlassNMS3LoDInput): - - def set_argument(self): - # Here set 2.0 to test the case there is no outputs. - # In practical use, 0.0 < score_threshold < 1.0 - self.score_threshold = 2.0 - - if __name__ == '__main__': paddle.enable_static() unittest.main()