From f3a09de4912b102e78f1a3917acd6050bed780ad Mon Sep 17 00:00:00 2001 From: cambriconhsq <106155938+cambriconhsq@users.noreply.github.com> Date: Fri, 17 Jun 2022 17:51:05 +0800 Subject: [PATCH] [MLU] add mlu kernel for iou_similarity (#43503) --- .../fluid/operators/detection/CMakeLists.txt | 3 + .../detection/iou_similarity_op_mlu.cc | 227 ++++++++++++++++++ paddle/fluid/operators/mlu/mlu_baseop.cc | 14 ++ paddle/fluid/operators/mlu/mlu_baseop.h | 6 + .../mlu/test_iou_similarity_op_mlu.py | 131 ++++++++++ 5 files changed, 381 insertions(+) create mode 100644 paddle/fluid/operators/detection/iou_similarity_op_mlu.cc create mode 100644 python/paddle/fluid/tests/unittests/mlu/test_iou_similarity_op_mlu.py diff --git a/paddle/fluid/operators/detection/CMakeLists.txt b/paddle/fluid/operators/detection/CMakeLists.txt index 6e5ea3e8aa7..c05c39e88d7 100644 --- a/paddle/fluid/operators/detection/CMakeLists.txt +++ b/paddle/fluid/operators/detection/CMakeLists.txt @@ -45,6 +45,9 @@ if(WITH_XPU) detection_library(prior_box_op SRCS prior_box_op.cc prior_box_op_xpu.cc) detection_library(generate_proposals_v2_op SRCS generate_proposals_v2_op.cc generate_proposals_v2_op_xpu.cc) +elseif(WITH_MLU) + detection_library(iou_similarity_op SRCS iou_similarity_op.cc + iou_similarity_op_mlu.cc) elseif(WITH_ASCEND_CL) detection_library(iou_similarity_op SRCS iou_similarity_op.cc iou_similarity_op_npu.cc) diff --git a/paddle/fluid/operators/detection/iou_similarity_op_mlu.cc b/paddle/fluid/operators/detection/iou_similarity_op_mlu.cc new file mode 100644 index 00000000000..2d86a264b11 --- /dev/null +++ b/paddle/fluid/operators/detection/iou_similarity_op_mlu.cc @@ -0,0 +1,227 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/detection/iou_similarity_op.h" +#include "paddle/fluid/operators/mlu/mlu_baseop.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +template +struct IouFunction { + public: + explicit IouFunction(const framework::ExecutionContext& ctx) : ctx(ctx) { + place = ctx.GetPlace(); + } + void Transpose(const Tensor* x, Tensor* y, const std::vector& axis) { + // y should be init first + TransposeFromMLUTensor(ctx, axis, x, y, false /*need_reshape_or_alloc*/); + } + void Add(const Tensor* x, const Tensor* y, Tensor* z) { + // y should be init first + MLUCnnlTensorDesc x_desc(*x); + MLUCnnlTensorDesc y_desc(*y); + MLUCnnlTensorDesc z_desc(*z); + + MLUCnnlOpTensorDesc add_op_desc(CNNL_OP_TENSOR_ADD, ToCnnlDataType(), + CNNL_NOT_PROPAGATE_NAN); + MLUCnnl::OpTensor(ctx, add_op_desc.get(), x_desc.get(), GetBasePtr(x), + y_desc.get(), GetBasePtr(y), z_desc.get(), GetBasePtr(z), + ToCnnlDataType()); + } + + void Sub(const Tensor* x, const Tensor* y, Tensor* z) { + // y should be init first + MLUCnnlTensorDesc x_desc(*x); + MLUCnnlTensorDesc y_desc(*y); + MLUCnnlTensorDesc z_desc(*z); + + MLUCnnlOpTensorDesc sub_op_desc(CNNL_OP_TENSOR_SUB, ToCnnlDataType(), + CNNL_NOT_PROPAGATE_NAN); + MLUCnnl::OpTensor(ctx, sub_op_desc.get(), x_desc.get(), GetBasePtr(x), + y_desc.get(), GetBasePtr(y), z_desc.get(), GetBasePtr(z), + ToCnnlDataType()); + } + void Mul(const Tensor* x, const Tensor* y, Tensor* z) { + // z should be init first + MLUCnnlTensorDesc x_desc(*x); + MLUCnnlTensorDesc y_desc(*y); + MLUCnnlTensorDesc z_desc(*z); + + MLUCnnlOpTensorDesc mul_op_desc(CNNL_OP_TENSOR_MUL, ToCnnlDataType(), + CNNL_NOT_PROPAGATE_NAN); + MLUCnnl::OpTensor(ctx, mul_op_desc.get(), x_desc.get(), GetBasePtr(x), + y_desc.get(), GetBasePtr(y), z_desc.get(), GetBasePtr(z), + ToCnnlDataType()); + } + void DivNoNan(const Tensor* x, const Tensor* y, Tensor* z) { + // z should be init first + MLUCnnlTensorDesc x_desc(*x); + MLUCnnlTensorDesc y_desc(*y); + MLUCnnlTensorDesc z_desc(*z); + + cnnlComputationPreference_t prefer = CNNL_COMPUTATION_FAST; + + MLUCnnl::DivNoNan(ctx, prefer, x_desc.get(), GetBasePtr(x), y_desc.get(), + GetBasePtr(y), z_desc.get(), GetBasePtr(z)); + } + void Adds(const Tensor* x, float scalar, Tensor* y) { + // y should be init first + MLUCnnlTensorDesc x_desc(*x); + MLUCnnlTensorDesc y_desc(*y); + float alpha = 1.0; + float beta = scalar; + MLUCnnl::Transform(ctx, &alpha, &beta, x_desc.get(), GetBasePtr(x), + y_desc.get(), GetBasePtr(y)); + } + void Maximum(const Tensor* x, const Tensor* y, Tensor* z) { + // z should be init first + MLUCnnlTensorDesc x_desc(*x); + MLUCnnlTensorDesc y_desc(*y); + MLUCnnlTensorDesc z_desc(*z); + + MLUCnnl::Maximum(ctx, x_desc.get(), GetBasePtr(x), y_desc.get(), + GetBasePtr(y), z_desc.get(), GetBasePtr(z)); + } + void Minimum(const Tensor* x, const Tensor* y, Tensor* z) { + // z should be init first + MLUCnnlTensorDesc x_desc(*x); + MLUCnnlTensorDesc y_desc(*y); + MLUCnnlTensorDesc z_desc(*z); + + MLUCnnl::Minimum(ctx, x_desc.get(), GetBasePtr(x), y_desc.get(), + GetBasePtr(y), z_desc.get(), GetBasePtr(z)); + } + + private: + platform::Place place; + const framework::ExecutionContext& ctx; +}; + +template +class IouSimilarityMLUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + bool normalized = ctx.Attr("box_normalized"); + auto* out = ctx.Output("Out"); + + auto _type = x->dtype(); + auto place = ctx.GetPlace(); + + IouFunction F(ctx); + + auto N = x->dims()[0]; + auto M = y->dims()[0]; + + out->mutable_data({N, M}, place); + Tensor xt(_type); + Tensor yt(_type); + xt.mutable_data({4, N}, place); + yt.mutable_data({4, M}, place); + std::vector vec_trans = {1, 0}; + F.Transpose(x, &xt, vec_trans); + F.Transpose(y, &yt, vec_trans); + Tensor xmin1 = xt.Slice(0, 1); + Tensor ymin1 = xt.Slice(1, 2); + Tensor xmax1 = xt.Slice(2, 3); + Tensor ymax1 = xt.Slice(3, 4); + Tensor xmin2 = yt.Slice(0, 1); + Tensor ymin2 = yt.Slice(1, 2); + Tensor xmax2 = yt.Slice(2, 3); + Tensor ymax2 = yt.Slice(3, 4); + xmin1.Resize({N, 1}); + ymin1.Resize({N, 1}); + xmax1.Resize({N, 1}); + ymax1.Resize({N, 1}); + xmin2.Resize({1, M}); + ymin2.Resize({1, M}); + xmax2.Resize({1, M}); + ymax2.Resize({1, M}); + + Tensor w1(_type); + Tensor h1(_type); + Tensor w2(_type); + Tensor h2(_type); + Tensor area1(_type); + Tensor area2(_type); + w1.mutable_data({N, 1}, place); + h1.mutable_data({N, 1}, place); + w2.mutable_data({1, M}, place); + h2.mutable_data({1, M}, place); + area1.mutable_data({N, 1}, place); + area2.mutable_data({1, M}, place); + F.Sub(&xmax1, &xmin1, &w1); + F.Sub(&ymax1, &ymin1, &h1); + F.Sub(&xmax2, &xmin2, &w2); + F.Sub(&ymax2, &ymin2, &h2); + if (!normalized) { + F.Adds(&w1, 1.0f, &w1); + F.Adds(&h1, 1.0f, &h1); + F.Adds(&w2, 1.0f, &w2); + F.Adds(&h2, 1.0f, &h2); + } + F.Mul(&w1, &h1, &area1); + F.Mul(&w2, &h2, &area2); + + Tensor inter_xmax(_type); + Tensor inter_ymax(_type); + Tensor inter_xmin(_type); + Tensor inter_ymin(_type); + inter_xmax.mutable_data({N, M}, place); + inter_ymax.mutable_data({N, M}, place); + inter_xmin.mutable_data({N, M}, place); + inter_ymin.mutable_data({N, M}, place); + F.Minimum(&xmax1, &xmax2, &inter_xmax); + F.Minimum(&ymax1, &ymax2, &inter_ymax); + F.Maximum(&xmin1, &xmin2, &inter_xmin); + F.Maximum(&ymin1, &ymin2, &inter_ymin); + + Tensor inter_w(_type); + Tensor inter_h(_type); + inter_w.mutable_data({N, M}, place); + inter_h.mutable_data({N, M}, place); + F.Sub(&inter_xmax, &inter_xmin, &inter_w); + F.Sub(&inter_ymax, &inter_ymin, &inter_h); + + if (!normalized) { + F.Adds(&inter_w, 1.0f, &inter_w); + F.Adds(&inter_h, 1.0f, &inter_h); + } + Tensor zeros(_type); + zeros.mutable_data({1}, place); + FillMLUTensorWithHostValue(ctx, static_cast(0), &zeros); + F.Maximum(&inter_w, &zeros, &inter_w); + F.Maximum(&inter_h, &zeros, &inter_h); + + F.Mul(&inter_w, &inter_h, out); + Tensor union_area(_type); + union_area.mutable_data({N, M}, place); + F.Add(&area1, &area2, &union_area); + F.Sub(&union_area, out, &union_area); + F.DivNoNan(out, &union_area, out); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +namespace plat = paddle::platform; + +REGISTER_OP_MLU_KERNEL(iou_similarity, ops::IouSimilarityMLUKernel, + ops::IouSimilarityMLUKernel); diff --git a/paddle/fluid/operators/mlu/mlu_baseop.cc b/paddle/fluid/operators/mlu/mlu_baseop.cc index 8414a7921de..dd1ac814938 100644 --- a/paddle/fluid/operators/mlu/mlu_baseop.cc +++ b/paddle/fluid/operators/mlu/mlu_baseop.cc @@ -2857,6 +2857,20 @@ MLUCnnlTrigonDesc::~MLUCnnlTrigonDesc() { nullptr /*max_norm*/, nullptr /*norm_type*/, output_desc, output)); } +/* static */ void MLUCnnl::Transform(const ExecutionContext& ctx, + const void* alpha, const void* beta, + const cnnlTensorDescriptor_t input_desc, + const void* input, + const cnnlTensorDescriptor_t output_desc, + void* output) { + cnnlHandle_t handle = GetHandleFromCTX(ctx); + + const cnnlPointerMode_t pointer_mode = CNNL_POINTER_MODE_HOST; + PADDLE_ENFORCE_MLU_SUCCESS(cnnlTransform_v2(handle, pointer_mode, alpha, + input_desc, input, beta, + output_desc, output)); +} + /* static */ void MLUCnnl::EmbeddingBackward( const ExecutionContext& ctx, int padding_idx, bool scale_grad_by_freq, const cnnlTensorDescriptor_t indices_desc, const void* indices, diff --git a/paddle/fluid/operators/mlu/mlu_baseop.h b/paddle/fluid/operators/mlu/mlu_baseop.h index 6c5f716625c..636618bf2d9 100644 --- a/paddle/fluid/operators/mlu/mlu_baseop.h +++ b/paddle/fluid/operators/mlu/mlu_baseop.h @@ -1289,6 +1289,12 @@ class MLUCnnl { const cnnlTensorDescriptor_t indices_desc, const int* indices, const cnnlTensorDescriptor_t output_desc, void* output); + static void Transform(const ExecutionContext& ctx, const void* alpha, + const void* beta, + const cnnlTensorDescriptor_t input_desc, + const void* input, + const cnnlTensorDescriptor_t output_desc, void* output); + static void EmbeddingBackward( const ExecutionContext& ctx, int padding_idx, bool scale_grad_by_freq, const cnnlTensorDescriptor_t indices_desc, const void* indices, diff --git a/python/paddle/fluid/tests/unittests/mlu/test_iou_similarity_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_iou_similarity_op_mlu.py new file mode 100644 index 00000000000..4120cb8fbcc --- /dev/null +++ b/python/paddle/fluid/tests/unittests/mlu/test_iou_similarity_op_mlu.py @@ -0,0 +1,131 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import numpy.random as random +import sys + +sys.path.append("..") +import math +import paddle +from op_test import OpTest + +paddle.enable_static() + +np.random.seed(2022) + + +class TestMluIouSimilarityOp(OpTest): + + def setUp(self): + self.op_type = "iou_similarity" + self.set_mlu() + self.init_dtype() + self.set_init_config() + self.set_attrs() + self.set_inputs() + self.set_outputs() + + def set_mlu(self): + self.__class__.use_mlu = True + self.place = paddle.MLUPlace(0) + + def init_dtype(self): + self.dtype = np.float32 + + def set_init_config(self): + self.N = 2 + self.M = 3 + self.box_normalized = False + self.use_lod = False + + def set_inputs(self): + self.boxes1 = random.rand(self.N, 4).astype(self.dtype) + self.boxes2 = random.rand(self.M, 4).astype(self.dtype) + if self.use_lod: + self.boxes1_lod = [[1 for _ in range(self.N)]] + self.inputs = { + 'X': (self.boxes1, self.boxes1_lod), + 'Y': self.boxes2 + } + else: + self.inputs = {'X': self.boxes1, 'Y': self.boxes2} + + def set_attrs(self): + self.attrs = {"box_normalized": self.box_normalized} + + def set_outputs(self): + self.output = random.rand(self.N, self.M).astype(self.dtype) + self._compute_iou() + self.outputs = {'Out': self.output} + + def test_check_output(self): + self.check_output_with_place(self.place) + + def _compute_iou(self, ): + for row in range(self.boxes1.shape[0]): + for col in range(self.boxes2.shape[0]): + xmin1, ymin1, xmax1, ymax1 = self.boxes1[row] + xmin2, ymin2, xmax2, ymax2 = self.boxes2[col] + if not self.box_normalized: + area1 = (ymax1 - ymin1 + 1) * (xmax1 - xmin1 + 1) + area2 = (ymax2 - ymin2 + 1) * (xmax2 - xmin2 + 1) + else: + area1 = (ymax1 - ymin1) * (xmax1 - xmin1) + area2 = (ymax2 - ymin2) * (xmax2 - xmin2) + + inter_xmax = min(xmax1, xmax2) + inter_ymax = min(ymax1, ymax2) + inter_xmin = max(xmin1, xmin2) + inter_ymin = max(ymin1, ymin2) + inter_height = inter_ymax - inter_ymin + inter_width = inter_xmax - inter_xmin + if not self.box_normalized: + inter_height += 1 + inter_width += 1 + inter_height = max(inter_height, 0) + inter_width = max(inter_width, 0) + inter_area = inter_width * inter_height + union_area = area1 + area2 - inter_area + sim_score = inter_area / union_area + self.output[row, col] = sim_score + + +class TestMluIouSimilarityOpWithLoD(TestMluIouSimilarityOp): + + def set_init_config(self): + super(TestMluIouSimilarityOpWithLoD, self).set_init_config() + self.box_normalized = True + self.use_lod = True + + +class TestMluIouSimilarityOpWithBoxNormalized(TestMluIouSimilarityOp): + + def set_init_config(self): + super(TestMluIouSimilarityOpWithBoxNormalized, self).set_init_config() + self.box_normalized = True + self.use_lod = True + + +def TestMluIouSimilarityOpFp16(TestMluIouSimilarityOp): + + def init_dtype(self): + self.dtype = np.float16 + + +if __name__ == '__main__': + unittest.main() -- GitLab