From 922f0868ecd3ba40264b5df04b323cd3ce2b8149 Mon Sep 17 00:00:00 2001 From: zhangyikun02 <48021248+zhangyk0314@users.noreply.github.com> Date: Mon, 19 Dec 2022 14:38:29 +0800 Subject: [PATCH] add diag_v2 op for xpu, test=kunlun (#49088) --- cmake/external/xpu.cmake | 2 +- paddle/phi/backends/xpu/xpu2_op_list.cc | 5 + paddle/phi/kernels/xpu/diag_kernel.cc | 60 ++++ .../unittests/xpu/test_diag_v2_op_xpu.py | 324 ++++++++++++++++++ 4 files changed, 390 insertions(+), 1 deletion(-) create mode 100644 paddle/phi/kernels/xpu/diag_kernel.cc create mode 100644 python/paddle/fluid/tests/unittests/xpu/test_diag_v2_op_xpu.py diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake index e13009cca8a..daaf0666892 100644 --- a/cmake/external/xpu.cmake +++ b/cmake/external/xpu.cmake @@ -10,7 +10,7 @@ set(XPU_RT_LIB_NAME "libxpurt.so") if(NOT DEFINED XPU_BASE_URL) set(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.su.bcebos.com/KL-SDK/klsdk-dev") - set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20221201") + set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20221215") else() set(XPU_BASE_URL "${XPU_BASE_URL}") endif() diff --git a/paddle/phi/backends/xpu/xpu2_op_list.cc b/paddle/phi/backends/xpu/xpu2_op_list.cc index 05b4991b858..aa9714ee36e 100644 --- a/paddle/phi/backends/xpu/xpu2_op_list.cc +++ b/paddle/phi/backends/xpu/xpu2_op_list.cc @@ -122,6 +122,11 @@ XPUOpMap& get_kl2_ops() { {"deformable_conv", XPUKernelSet({phi::DataType::FLOAT32})}, {"depthwise_conv2d_grad", XPUKernelSet({phi::DataType::FLOAT32})}, {"depthwise_conv2d", XPUKernelSet({phi::DataType::FLOAT32})}, + {"diag_v2", + XPUKernelSet({phi::DataType::FLOAT32, + phi::DataType::FLOAT16, + phi::DataType::INT32, + phi::DataType::INT64})}, {"dropout_grad", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, {"dropout", diff --git a/paddle/phi/kernels/xpu/diag_kernel.cc b/paddle/phi/kernels/xpu/diag_kernel.cc new file mode 100644 index 00000000000..fe7495f471d --- /dev/null +++ b/paddle/phi/kernels/xpu/diag_kernel.cc @@ -0,0 +1,60 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/diag_kernel.h" + +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/backends/xpu/xpu_context.h" +#include "paddle/phi/core/kernel_registry.h" + +namespace phi { + +template +void DiagKernel(const Context& dev_ctx, + const DenseTensor& x, + int offset, + float padding_value, + DenseTensor* out) { + using XPUType = typename XPUTypeTrait::Type; + auto* x_data = reinterpret_cast(x.data()); + dev_ctx.template Alloc(out); + auto* out_data = reinterpret_cast(out->data()); + + auto x_shape = vectorize(x.dims()); + auto out_shape = vectorize(out->dims()); + + if (x.dims().size() == 0) { + x_shape = std::vector({1}); + } + + int r = xpu::diag(dev_ctx.x_context(), + x_data, + out_data, + x_shape, + out_shape, + offset, + padding_value); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "diag"); +} + +} // namespace phi + +PD_REGISTER_KERNEL(diag, + XPU, + ALL_LAYOUT, + phi::DiagKernel, + phi::dtype::float16, + int, + float, + int64_t) {} diff --git a/python/paddle/fluid/tests/unittests/xpu/test_diag_v2_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_diag_v2_op_xpu.py new file mode 100644 index 00000000000..4f1e1050927 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/xpu/test_diag_v2_op_xpu.py @@ -0,0 +1,324 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import unittest + +sys.path.append("..") + +import numpy as np +from op_test_xpu import XPUOpTest +from xpu.get_test_cover_info import ( + XPUOpTestWrapper, + create_test_class, + get_xpu_op_support_types, +) + +import paddle +import paddle.fluid as fluid +from paddle.fluid import Program, program_guard +from paddle.fluid.framework import _test_eager_guard + +paddle.enable_static() + + +class XPUTestDiagV2Op(XPUOpTestWrapper): + def __init__(self): + self.op_name = 'diag_v2' + self.use_dynamic_create_class = False + + class TestDiagV2Op(XPUOpTest): + def setUp(self): + self.op_type = "diag_v2" + self.dtype = self.in_type + self.place = paddle.XPUPlace(0) + self.python_api = paddle.diag + self.x = np.random.rand(10, 10).astype(self.dtype) + self.offset = 0 + self.padding_value = 0.0 + self.out = np.diag(self.x, self.offset) + + self.init_config() + self.inputs = {'X': self.x} + self.attrs = { + 'offset': self.offset, + 'padding_value': self.padding_value, + } + self.outputs = {'Out': self.out} + + def test_check_output(self): + paddle.enable_static() + self.check_output(check_eager=False) + + def test_check_grad(self): + paddle.enable_static() + self.check_grad(['X'], 'Out', check_eager=False) + + def init_config(self): + pass + + class TestDiagV2OpCase1(TestDiagV2Op): + def init_config(self): + self.offset = 1 + self.out = np.diag(self.x, self.offset) + + class TestDiagV2OpCase2(TestDiagV2Op): + def init_config(self): + self.offset = -1 + self.out = np.diag(self.x, self.offset) + + class TestDiagV2OpCase3(TestDiagV2Op): + def init_config(self): + self.x = np.random.rand(100).astype(self.dtype) + self.padding_value = 2 + n = self.x.size + self.out = ( + self.padding_value * np.ones((n, n)) + + np.diag(self.x, self.offset) + - np.diag(self.padding_value * np.ones(n)) + ) + + class TestDiagV2Error(unittest.TestCase): + def test_errors(self): + paddle.enable_static() + with program_guard(Program(), Program()): + + def test_diag_v2_type(): + x = [1, 2, 3] + output = paddle.diag(x) + + self.assertRaises(TypeError, test_diag_v2_type) + + x = paddle.static.data('data', [3, 3]) + self.assertRaises(TypeError, paddle.diag, x, offset=2.5) + + self.assertRaises(TypeError, paddle.diag, x, padding_value=[9]) + + x = paddle.static.data('data2', [3, 3, 3]) + self.assertRaises(ValueError, paddle.diag, x) + + class TestDiagV2API(unittest.TestCase): + def setUp(self): + self.dtype = self.in_type + self.input_np = np.random.random(size=(10, 10)).astype(self.dtype) + self.expected0 = np.diag(self.input_np) + self.expected1 = np.diag(self.input_np, k=1) + self.expected2 = np.diag(self.input_np, k=-1) + + self.input_np2 = np.random.rand(100).astype(self.dtype) + self.offset = 0 + self.padding_value = 8 + n = self.input_np2.size + self.expected3 = ( + self.padding_value * np.ones((n, n)) + + np.diag(self.input_np2, self.offset) + - np.diag(self.padding_value * np.ones(n)) + ) + + self.input_np3 = np.random.randint(-10, 10, size=(100)).astype( + self.dtype + ) + self.padding_value = 8.0 + n = self.input_np3.size + self.expected4 = ( + self.padding_value * np.ones((n, n)) + + np.diag(self.input_np3, self.offset) + - np.diag(self.padding_value * np.ones(n)) + ) + + self.padding_value = -8 + self.expected5 = ( + self.padding_value * np.ones((n, n)) + + np.diag(self.input_np3, self.offset) + - np.diag(self.padding_value * np.ones(n)) + ) + + self.input_np4 = np.random.random(size=(2000, 2000)).astype( + self.dtype + ) + self.expected6 = np.diag(self.input_np4) + self.expected7 = np.diag(self.input_np4, k=1) + self.expected8 = np.diag(self.input_np4, k=-1) + + self.input_np5 = np.random.random(size=(2000)).astype(self.dtype) + self.expected9 = np.diag(self.input_np5) + self.expected10 = np.diag(self.input_np5, k=1) + self.expected11 = np.diag(self.input_np5, k=-1) + + self.input_np6 = np.random.random(size=(2000, 1500)).astype( + self.dtype + ) + self.expected12 = np.diag(self.input_np6, k=-1) + + def run_imperative(self): + x = paddle.to_tensor(self.input_np) + y = paddle.diag(x) + np.testing.assert_allclose(y.numpy(), self.expected0, rtol=1e-05) + + y = paddle.diag(x, offset=1) + np.testing.assert_allclose(y.numpy(), self.expected1, rtol=1e-05) + + y = paddle.diag(x, offset=-1) + np.testing.assert_allclose(y.numpy(), self.expected2, rtol=1e-05) + + x = paddle.to_tensor(self.input_np2) + y = paddle.diag(x, padding_value=8) + np.testing.assert_allclose(y.numpy(), self.expected3, rtol=1e-05) + + x = paddle.to_tensor(self.input_np3) + y = paddle.diag(x, padding_value=8.0) + np.testing.assert_allclose(y.numpy(), self.expected4, rtol=1e-05) + + y = paddle.diag(x, padding_value=-8) + np.testing.assert_allclose(y.numpy(), self.expected5, rtol=1e-05) + + x = paddle.to_tensor(self.input_np4) + y = paddle.diag(x) + np.testing.assert_allclose(y.numpy(), self.expected6, rtol=1e-05) + + y = paddle.diag(x, offset=1) + np.testing.assert_allclose(y.numpy(), self.expected7, rtol=1e-05) + + y = paddle.diag(x, offset=-1) + np.testing.assert_allclose(y.numpy(), self.expected8, rtol=1e-05) + + x = paddle.to_tensor(self.input_np5) + y = paddle.diag(x) + np.testing.assert_allclose(y.numpy(), self.expected9, rtol=1e-05) + + y = paddle.diag(x, offset=1) + np.testing.assert_allclose(y.numpy(), self.expected10, rtol=1e-05) + + y = paddle.diag(x, offset=-1) + np.testing.assert_allclose(y.numpy(), self.expected11, rtol=1e-05) + + x = paddle.to_tensor(self.input_np6) + y = paddle.diag(x, offset=-1) + np.testing.assert_allclose(y.numpy(), self.expected12, rtol=1e-05) + + def run_static(self, use_gpu=False): + if self.dtype == np.float16: + return + np.random.seed(1024) + x = paddle.static.data( + name='input', shape=[10, 10], dtype=self.dtype + ) + x2 = paddle.static.data( + name='input2', shape=[100], dtype=self.dtype + ) + x3 = paddle.static.data( + name='input3', shape=[100], dtype=self.dtype + ) + x4 = paddle.static.data( + name='input4', shape=[2000, 2000], dtype=self.dtype + ) + x5 = paddle.static.data( + name='input5', shape=[2000], dtype=self.dtype + ) + x6 = paddle.static.data( + name='input6', shape=[2000, 1500], dtype=self.dtype + ) + result0 = paddle.diag(x) + result1 = paddle.diag(x, offset=1) + result2 = paddle.diag(x, offset=-1) + result3 = paddle.diag(x, name='aaa') + result4 = paddle.diag(x2, padding_value=8) + result5 = paddle.diag(x3, padding_value=8.0) + result6 = paddle.diag(x3, padding_value=-8) + result7 = paddle.diag(x4) + result8 = paddle.diag(x4, offset=1) + result9 = paddle.diag(x4, offset=-1) + result10 = paddle.diag(x5) + result11 = paddle.diag(x5, offset=1) + result12 = paddle.diag(x5, offset=-1) + result13 = paddle.diag(x6, offset=-1) + + place = fluid.XPUPlace(0) + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + ( + res0, + res1, + res2, + res4, + res5, + res6, + res7, + res8, + res9, + res10, + res11, + res12, + res13, + ) = exe.run( + feed={ + "input": self.input_np, + "input2": self.input_np2, + 'input3': self.input_np3, + 'input4': self.input_np4, + 'input5': self.input_np5, + 'input6': self.input_np6, + }, + fetch_list=[ + result0, + result1, + result2, + result4, + result5, + result6, + result7, + result8, + result9, + result10, + result11, + result12, + result13, + ], + ) + + np.testing.assert_allclose(res0, self.expected0, rtol=1e-05) + np.testing.assert_allclose(res1, self.expected1, rtol=1e-05) + np.testing.assert_allclose(res2, self.expected2, rtol=1e-05) + self.assertTrue('aaa' in result3.name) + np.testing.assert_allclose(res4, self.expected3, rtol=1e-05) + np.testing.assert_allclose(res5, self.expected4, rtol=1e-05) + np.testing.assert_allclose(res6, self.expected5, rtol=1e-05) + np.testing.assert_allclose(res7, self.expected6, rtol=1e-05) + np.testing.assert_allclose(res8, self.expected7, rtol=1e-05) + np.testing.assert_allclose(res9, self.expected8, rtol=1e-05) + np.testing.assert_allclose(res10, self.expected9, rtol=1e-05) + np.testing.assert_allclose(res11, self.expected10, rtol=1e-05) + np.testing.assert_allclose(res12, self.expected11, rtol=1e-05) + np.testing.assert_allclose(res13, self.expected12, rtol=1e-05) + + def test_xpu(self): + paddle.disable_static(place=paddle.fluid.XPUPlace(0)) + self.run_imperative() + with _test_eager_guard(): + self.run_imperative() + + paddle.enable_static() + + with fluid.program_guard(fluid.Program()): + self.run_static() + + +support_types = get_xpu_op_support_types('diag_v2') +for stype in support_types: + create_test_class(globals(), XPUTestDiagV2Op, stype) + + +if __name__ == "__main__": + paddle.enable_static() + unittest.main() -- GitLab