From 999242e35f450e2904df22a56ca8954f1811dbf8 Mon Sep 17 00:00:00 2001 From: zhulei <563755780@qq.com> Date: Tue, 19 Oct 2021 19:32:30 +0800 Subject: [PATCH] [NPU] Add iou_similarity op (#36412) * [NPU] Add iou_similarity op * [NPU] Add iou_similarity op * [NPU] Add iou_similarity op --- .../fluid/operators/detection/CMakeLists.txt | 2 + .../detection/iou_similarity_op_npu.cc | 192 ++++++++++++++++++ .../npu/test_iou_similarity_op_npu.py | 126 ++++++++++++ 3 files changed, 320 insertions(+) create mode 100644 paddle/fluid/operators/detection/iou_similarity_op_npu.cc create mode 100644 python/paddle/fluid/tests/unittests/npu/test_iou_similarity_op_npu.py diff --git a/paddle/fluid/operators/detection/CMakeLists.txt b/paddle/fluid/operators/detection/CMakeLists.txt index 871240aa15..506ae56a12 100644 --- a/paddle/fluid/operators/detection/CMakeLists.txt +++ b/paddle/fluid/operators/detection/CMakeLists.txt @@ -64,6 +64,8 @@ endif() if(WITH_XPU) detection_library(iou_similarity_op SRCS iou_similarity_op.cc iou_similarity_op_xpu.cc) +elseif(WITH_ASCEND_CL) + detection_library(iou_similarity_op SRCS iou_similarity_op.cc iou_similarity_op_npu.cc) else() detection_library(iou_similarity_op SRCS iou_similarity_op.cc iou_similarity_op.cu) endif() diff --git a/paddle/fluid/operators/detection/iou_similarity_op_npu.cc b/paddle/fluid/operators/detection/iou_similarity_op_npu.cc new file mode 100644 index 0000000000..9a91d4bd8f --- /dev/null +++ b/paddle/fluid/operators/detection/iou_similarity_op_npu.cc @@ -0,0 +1,192 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/detection/iou_similarity_op.h" +#include "paddle/fluid/operators/npu_op_runner.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +template +struct IouFunction { + public: + explicit IouFunction(const framework::ExecutionContext& ctx) : ctx(ctx) { + place = ctx.GetPlace(); + stream = ctx.template device_context() + .stream(); + } + void Transpose(const Tensor* x, Tensor* y, const std::vector& axis) { + // y should be init first + const auto& runner = + NpuOpRunner("TransposeD", {*x}, {*y}, {{"perm", axis}}); + runner.Run(stream); + } + void Add(const Tensor* x, const Tensor* y, Tensor* z) { + // y should be init first + const auto& runner = NpuOpRunner("AddV2", {*x, *y}, {*z}, {}); + runner.Run(stream); + } + void Sub(const Tensor* x, const Tensor* y, Tensor* z) { + // y should be init first + const auto& runner = NpuOpRunner("Sub", {*x, *y}, {*z}, {}); + runner.Run(stream); + } + void Mul(const Tensor* x, const Tensor* y, Tensor* z) { + // y should be init first + const auto& runner = NpuOpRunner("Mul", {*x, *y}, {*z}, {}); + runner.Run(stream); + } + void DivNoNan(const Tensor* x, const Tensor* y, Tensor* z) { + // y should be init first + const auto& runner = NpuOpRunner("DivNoNan", {*x, *y}, {*z}, {}); + runner.Run(stream); + } + void Adds(const Tensor* x, float scalar, Tensor* y) { + // y should be init first + const auto& runner = NpuOpRunner("Adds", {*x}, {*y}, {{"value", scalar}}); + runner.Run(stream); + } + void Maximum(const Tensor* x, const Tensor* y, Tensor* z) { + // z should be init first + const auto& runner = NpuOpRunner("Maximum", {*x, *y}, {*z}, {}); + runner.Run(stream); + } + void Minimum(const Tensor* x, const Tensor* y, Tensor* z) { + // z should be init first + const auto& runner = NpuOpRunner("Minimum", {*x, *y}, {*z}, {}); + runner.Run(stream); + } + + private: + platform::Place place; + aclrtStream stream; + const framework::ExecutionContext& ctx; +}; + +template +class IouSimilarityNPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + bool normalized = ctx.Attr("box_normalized"); + auto* out = ctx.Output("Out"); + + auto _type = x->type(); + auto place = ctx.GetPlace(); + + IouFunction F(ctx); + + auto N = x->dims()[0]; + auto M = y->dims()[0]; + + out->mutable_data({N, M}, place); + Tensor xt(_type); + Tensor yt(_type); + xt.mutable_data({4, N}, place); + yt.mutable_data({4, M}, place); + std::vector vec_trans = {1, 0}; + F.Transpose(x, &xt, vec_trans); + F.Transpose(y, &yt, vec_trans); + Tensor xmin1 = xt.Slice(0, 1); + Tensor ymin1 = xt.Slice(1, 2); + Tensor xmax1 = xt.Slice(2, 3); + Tensor ymax1 = xt.Slice(3, 4); + Tensor xmin2 = yt.Slice(0, 1); + Tensor ymin2 = yt.Slice(1, 2); + Tensor xmax2 = yt.Slice(2, 3); + Tensor ymax2 = yt.Slice(3, 4); + xmin1.Resize({N, 1}); + ymin1.Resize({N, 1}); + xmax1.Resize({N, 1}); + ymax1.Resize({N, 1}); + xmin2.Resize({1, M}); + ymin2.Resize({1, M}); + xmax2.Resize({1, M}); + ymax2.Resize({1, M}); + + Tensor w1(_type); + Tensor h1(_type); + Tensor w2(_type); + Tensor h2(_type); + Tensor area1(_type); + Tensor area2(_type); + w1.mutable_data({N, 1}, place); + h1.mutable_data({N, 1}, place); + w2.mutable_data({1, M}, place); + h2.mutable_data({1, M}, place); + area1.mutable_data({N, 1}, place); + area2.mutable_data({1, M}, place); + F.Sub(&xmax1, &xmin1, &w1); + F.Sub(&ymax1, &ymin1, &h1); + F.Sub(&xmax2, &xmin2, &w2); + F.Sub(&ymax2, &ymin2, &h2); + if (!normalized) { + F.Adds(&w1, 1.0f, &w1); + F.Adds(&h1, 1.0f, &h1); + F.Adds(&w2, 1.0f, &w2); + F.Adds(&h2, 1.0f, &h2); + } + F.Mul(&w1, &h1, &area1); + F.Mul(&w2, &h2, &area2); + + Tensor inter_xmax(_type); + Tensor inter_ymax(_type); + Tensor inter_xmin(_type); + Tensor inter_ymin(_type); + inter_xmax.mutable_data({N, M}, place); + inter_ymax.mutable_data({N, M}, place); + inter_xmin.mutable_data({N, M}, place); + inter_ymin.mutable_data({N, M}, place); + F.Minimum(&xmax1, &xmax2, &inter_xmax); + F.Minimum(&ymax1, &ymax2, &inter_ymax); + F.Maximum(&xmin1, &xmin2, &inter_xmin); + F.Maximum(&ymin1, &ymin2, &inter_ymin); + + Tensor inter_w(_type); + Tensor inter_h(_type); + inter_w.mutable_data({N, M}, place); + inter_h.mutable_data({N, M}, place); + F.Sub(&inter_xmax, &inter_xmin, &inter_w); + F.Sub(&inter_ymax, &inter_ymin, &inter_h); + + if (!normalized) { + F.Adds(&inter_w, 1.0f, &inter_w); + F.Adds(&inter_h, 1.0f, &inter_h); + } + Tensor zeros(_type); + zeros.mutable_data({1}, place); + FillNpuTensorWithConstant(&zeros, static_cast(0)); + F.Maximum(&inter_w, &zeros, &inter_w); + F.Maximum(&inter_h, &zeros, &inter_h); + + F.Mul(&inter_w, &inter_h, out); + Tensor union_area(_type); + union_area.mutable_data({N, M}, place); + F.Add(&area1, &area2, &union_area); + F.Sub(&union_area, out, &union_area); + F.DivNoNan(out, &union_area, out); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +namespace plat = paddle::platform; + +REGISTER_OP_NPU_KERNEL(iou_similarity, ops::IouSimilarityNPUKernel, + ops::IouSimilarityNPUKernel); diff --git a/python/paddle/fluid/tests/unittests/npu/test_iou_similarity_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_iou_similarity_op_npu.py new file mode 100644 index 0000000000..22042ce492 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/npu/test_iou_similarity_op_npu.py @@ -0,0 +1,126 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import numpy.random as random +import sys +sys.path.append("..") +import math +import paddle +from op_test import OpTest + +paddle.enable_static() + +np.random.seed(2021) + + +class TestNpuIouSimilarityOp(OpTest): + def setUp(self): + self.op_type = "iou_similarity" + self.set_npu() + self.init_dtype() + self.set_init_config() + self.set_attrs() + self.set_inputs() + self.set_outputs() + + def set_npu(self): + self.__class__.use_npu = True + self.place = paddle.NPUPlace(0) + + def init_dtype(self): + self.dtype = np.float32 + + def set_init_config(self): + self.N = 2 + self.M = 3 + self.box_normalized = False + self.use_lod = False + + def set_inputs(self): + self.boxes1 = random.rand(self.N, 4).astype(self.dtype) + self.boxes2 = random.rand(self.M, 4).astype(self.dtype) + if self.use_lod: + self.boxes1_lod = [[1 for _ in range(self.N)]] + self.inputs = { + 'X': (self.boxes1, self.boxes1_lod), + 'Y': self.boxes2 + } + else: + self.inputs = {'X': self.boxes1, 'Y': self.boxes2} + + def set_attrs(self): + self.attrs = {"box_normalized": self.box_normalized} + + def set_outputs(self): + self.output = random.rand(self.N, self.M).astype(self.dtype) + self._compute_iou() + self.outputs = {'Out': self.output} + + def test_check_output(self): + self.check_output_with_place(self.place) + + def _compute_iou(self, ): + for row in range(self.boxes1.shape[0]): + for col in range(self.boxes2.shape[0]): + xmin1, ymin1, xmax1, ymax1 = self.boxes1[row] + xmin2, ymin2, xmax2, ymax2 = self.boxes2[col] + if not self.box_normalized: + area1 = (ymax1 - ymin1 + 1) * (xmax1 - xmin1 + 1) + area2 = (ymax2 - ymin2 + 1) * (xmax2 - xmin2 + 1) + else: + area1 = (ymax1 - ymin1) * (xmax1 - xmin1) + area2 = (ymax2 - ymin2) * (xmax2 - xmin2) + + inter_xmax = min(xmax1, xmax2) + inter_ymax = min(ymax1, ymax2) + inter_xmin = max(xmin1, xmin2) + inter_ymin = max(ymin1, ymin2) + inter_height = inter_ymax - inter_ymin + inter_width = inter_xmax - inter_xmin + if not self.box_normalized: + inter_height += 1 + inter_width += 1 + inter_height = max(inter_height, 0) + inter_width = max(inter_width, 0) + inter_area = inter_width * inter_height + union_area = area1 + area2 - inter_area + sim_score = inter_area / union_area + self.output[row, col] = sim_score + + +class TestNpuIouSimilarityOpWithLoD(TestNpuIouSimilarityOp): + def set_init_config(self): + super(TestNpuIouSimilarityOpWithLoD, self).set_init_config() + self.box_normalized = True + self.use_lod = True + + +class TestNpuIouSimilarityOpWithBoxNormalized(TestNpuIouSimilarityOp): + def set_init_config(self): + super(TestNpuIouSimilarityOpWithBoxNormalized, self).set_init_config() + self.box_normalized = True + self.use_lod = True + + +def TestNpuIouSimilarityOpFp16(TestNpuIouSimilarityOp): + def init_dtype(self): + self.dtype = np.float16 + + +if __name__ == '__main__': + unittest.main() -- GitLab