From 35c48c75316cbefc0fcc3a582ea57deac70660ec Mon Sep 17 00:00:00 2001 From: DesmonDay <908660116@qq.com> Date: Tue, 3 Jan 2023 07:22:06 +0000 Subject: [PATCH] support 0D for paddle.sort/argsort --- paddle/phi/infermeta/unary.cc | 32 +++++---- paddle/phi/kernels/cpu/argsort_grad_kernel.cc | 7 ++ paddle/phi/kernels/cpu/argsort_kernel.cc | 9 +++ paddle/phi/kernels/gpu/argsort_grad_kernel.cu | 8 +++ paddle/phi/kernels/gpu/argsort_kernel.cu | 8 +++ .../tests/unittests/test_zero_dim_tensor.py | 72 +++++++++++++++++++ .../unittests/xpu/test_zero_dim_tensor_xpu.py | 36 ++++++++++ 7 files changed, 160 insertions(+), 12 deletions(-) diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc index c3b96b813b8..895d50c7bbd 100644 --- a/paddle/phi/infermeta/unary.cc +++ b/paddle/phi/infermeta/unary.cc @@ -220,18 +220,26 @@ void ArgsortInferMeta(const MetaTensor& input, MetaTensor* indices) { auto in_dims = input.dims(); auto num_dims = in_dims.size(); - PADDLE_ENFORCE_GE( - axis, - -num_dims, - phi::errors::InvalidArgument("'axis'(%d) must be greater than or equal to" - " -num_dims(%d).", - axis, - -num_dims)); - PADDLE_ENFORCE_LT( - axis, - num_dims, - phi::errors::InvalidArgument( - "'axis'(%d) must be less than num_dims(%d).", axis, num_dims)); + if (num_dims > 0) { + PADDLE_ENFORCE_GE(axis, + -num_dims, + phi::errors::InvalidArgument( + "'axis'(%d) must be greater than or equal to" + " -num_dims(%d).", + axis, + -num_dims)); + PADDLE_ENFORCE_LT( + axis, + num_dims, + phi::errors::InvalidArgument( + "'axis'(%d) must be less than num_dims(%d).", axis, num_dims)); + } else { // 0-dim tensor + PADDLE_ENFORCE_EQ( + axis == 0 || axis == -1, + 1, + phi::errors::InvalidArgument( + "'axis'(%d) must be 0 or -1 if input tensor is 0-dim.", axis)); + } output->share_dims(input); output->set_dtype(input.dtype()); diff --git a/paddle/phi/kernels/cpu/argsort_grad_kernel.cc b/paddle/phi/kernels/cpu/argsort_grad_kernel.cc index 1e60847232c..f866b62a2bd 100644 --- a/paddle/phi/kernels/cpu/argsort_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/argsort_grad_kernel.cc @@ -18,6 +18,7 @@ #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/eigen_function.h" +#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/transpose_kernel.h" namespace phi { @@ -58,6 +59,7 @@ void ArgsortGradKernel(const Context& dev_ctx, bool descending, DenseTensor* in_grad) { auto in_dims = indices.dims(); + auto rank = input.dims().size(); axis = (axis < 0) ? (in_dims.size() + axis) : axis; dev_ctx.template Alloc(in_grad); auto dxt = EigenVector::Flatten(*in_grad); @@ -65,6 +67,11 @@ void ArgsortGradKernel(const Context& dev_ctx, dxt.device(place) = dxt.constant(static_cast(0)); if (out_grad.numel() == 0) return; + if (rank == 0) { + phi::funcs::set_constant(dev_ctx, in_grad, 1.0); + return; + } + // Do full assign if (axis == -1 || axis + 1 == in_dims.size()) { const int64_t input_height = diff --git a/paddle/phi/kernels/cpu/argsort_kernel.cc b/paddle/phi/kernels/cpu/argsort_kernel.cc index 8621a717e10..07b61fa3f35 100644 --- a/paddle/phi/kernels/cpu/argsort_kernel.cc +++ b/paddle/phi/kernels/cpu/argsort_kernel.cc @@ -18,6 +18,7 @@ #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/eigen_function.h" +#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/transpose_kernel.h" namespace phi { @@ -75,9 +76,17 @@ void ArgsortKernel(const Context& dev_ctx, DenseTensor* output, DenseTensor* indices) { auto in_dims = input.dims(); + auto rank = in_dims.size(); axis = (axis < 0) ? (in_dims.size() + axis) : axis; T* out_data = dev_ctx.template Alloc(output); + // For 0D Tensor + if (rank == 0) { + phi::Copy(dev_ctx, input, dev_ctx.GetPlace(), false, output); + phi::funcs::set_constant(dev_ctx, indices, 0); + return; + } + // Do full sort if (axis == -1 || axis + 1 == in_dims.size()) { const int64_t input_height = diff --git a/paddle/phi/kernels/gpu/argsort_grad_kernel.cu b/paddle/phi/kernels/gpu/argsort_grad_kernel.cu index a2d149cb2e4..f28da8704cb 100644 --- a/paddle/phi/kernels/gpu/argsort_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/argsort_grad_kernel.cu @@ -28,6 +28,7 @@ namespace cub = hipcub; #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/primitive/functor_primitives.h" #include "paddle/phi/kernels/transpose_kernel.h" @@ -141,11 +142,18 @@ void ArgsortGradKernel(const Context& dev_ctx, bool descending, DenseTensor* in_grad) { dev_ctx.template Alloc(in_grad); + phi::funcs::set_constant(dev_ctx, in_grad, 0.0); if (out_grad.numel() == 0) return; auto in_dims = in_grad->dims(); + auto rank = in_dims.size(); axis = (axis < 0) ? (in_dims.size() + axis) : axis; int64_t size = in_grad->numel(); + if (rank == 0) { + phi::funcs::set_constant(dev_ctx, in_grad, 1.0); + return; + } + // Parallel acceleration when the input size is equal to the length of the // ‘axis’ dimension. // Compared to 'special case for full sort' below, the gradient calculation diff --git a/paddle/phi/kernels/gpu/argsort_kernel.cu b/paddle/phi/kernels/gpu/argsort_kernel.cu index 1c3825b90e2..13455a7639c 100644 --- a/paddle/phi/kernels/gpu/argsort_kernel.cu +++ b/paddle/phi/kernels/gpu/argsort_kernel.cu @@ -30,6 +30,7 @@ namespace cub = hipcub; #include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/blas/blas.h" +#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/primitive/functor_primitives.h" #include "paddle/phi/kernels/transpose_kernel.h" @@ -396,6 +397,7 @@ void ArgsortKernel(const Context &dev_ctx, DenseTensor *output, DenseTensor *indices) { auto in_dims = input.dims(); + auto rank = in_dims.size(); axis = (axis < 0) ? (in_dims.size() + axis) : axis; const T *in_data = input.data(); @@ -403,6 +405,12 @@ void ArgsortKernel(const Context &dev_ctx, T *out_data = dev_ctx.template Alloc(output); int64_t *ids_data = dev_ctx.template Alloc(indices); + if (rank == 0) { + phi::Copy(dev_ctx, input, dev_ctx.GetPlace(), false, output); + phi::funcs::set_constant(dev_ctx, indices, 0); + return; + } + // Use thrust for parallel acceleration when the input size is equal to the // length of the ‘axis’ dimension. // Compared to the following 'Special case for full sort', ascending sort is diff --git a/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py b/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py index b8a11510480..c99aabbf9b5 100644 --- a/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py @@ -747,6 +747,42 @@ class TestSundryAPI(unittest.TestCase): np.testing.assert_array_equal(out3_1.numpy(), out3_2.numpy()) np.testing.assert_array_equal(out3_2.numpy(), np.asarray(1)) + def test_sort(self): + x1 = paddle.rand([]) + x2 = paddle.rand([]) + x1.stop_gradient = False + x2.stop_gradient = False + out1 = paddle.sort(x1, axis=-1) + out2 = paddle.sort(x2, axis=0) + + out1.backward() + out2.backward() + + self.assertEqual(out1.shape, []) + self.assertEqual(out2.shape, []) + self.assertEqual(out1.grad.shape, []) + self.assertEqual(out2.grad.shape, []) + self.assertEqual(x1.grad.shape, []) + self.assertEqual(x2.grad.shape, []) + + def test_argsort(self): + x1 = paddle.rand([]) + x2 = paddle.rand([]) + x1.stop_gradient = False + x2.stop_gradient = False + out1 = paddle.argsort(x1, axis=-1) + out2 = paddle.argsort(x2, axis=0) + + out1.backward() + out2.backward() + + self.assertEqual(out1.shape, []) + self.assertEqual(out2.shape, []) + self.assertEqual(out1.grad.shape, []) + self.assertEqual(out2.grad.shape, []) + self.assertEqual(x1.grad.shape, []) + self.assertEqual(x2.grad.shape, []) + class TestSundryAPIStatic(unittest.TestCase): def setUp(self): @@ -990,6 +1026,42 @@ class TestSundryAPIStatic(unittest.TestCase): np.testing.assert_array_equal(out3_1, out3_2) np.testing.assert_array_equal(out3_2, np.asarray(1)) + @prog_scope() + def test_sort(self): + x1 = paddle.rand([]) + x1.stop_gradient = False + out1 = paddle.sort(x1, axis=-1) + paddle.static.append_backward(out1) + + x2 = paddle.rand([]) + x2.stop_gradient = False + out2 = paddle.sort(x2, axis=0) + paddle.static.append_backward(out2) + + prog = paddle.static.default_main_program() + res = self.exe.run(prog, fetch_list=[out1, out2]) + + self.assertEqual(res[0].shape, ()) + self.assertEqual(res[1].shape, ()) + + @prog_scope() + def test_argsort(self): + x1 = paddle.rand([]) + x1.stop_gradient = False + out1 = paddle.argsort(x1, axis=-1) + paddle.static.append_backward(out1) + + x2 = paddle.rand([]) + x2.stop_gradient = False + out2 = paddle.argsort(x2, axis=0) + paddle.static.append_backward(out2) + + prog = paddle.static.default_main_program() + res = self.exe.run(prog, fetch_list=[out1, out2]) + + self.assertEqual(res[0].shape, ()) + self.assertEqual(res[1].shape, ()) + # Use to test API whose zero-dim input tensors don't have grad and not need to test backward in OpTest. class TestNoBackwardAPI(unittest.TestCase): diff --git a/python/paddle/fluid/tests/unittests/xpu/test_zero_dim_tensor_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_zero_dim_tensor_xpu.py index b561b775f29..ebd4354593d 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_zero_dim_tensor_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_zero_dim_tensor_xpu.py @@ -556,6 +556,42 @@ class TestSundryAPI(unittest.TestCase): np.testing.assert_array_equal(out3_1.numpy(), out3_2.numpy()) np.testing.assert_array_equal(out3_2.numpy(), np.asarray(1)) + def test_sort(self): + x1 = paddle.rand([]) + x2 = paddle.rand([]) + x1.stop_gradient = False + x2.stop_gradient = False + out1 = paddle.sort(x1, axis=-1) + out2 = paddle.sort(x2, axis=0) + + out1.backward() + out2.backward() + + self.assertEqual(out1.shape, []) + self.assertEqual(out2.shape, []) + self.assertEqual(out1.grad.shape, []) + self.assertEqual(out2.grad.shape, []) + self.assertEqual(x1.grad.shape, []) + self.assertEqual(x2.grad.shape, []) + + def test_argsort(self): + x1 = paddle.rand([]) + x2 = paddle.rand([]) + x1.stop_gradient = False + x2.stop_gradient = False + out1 = paddle.argsort(x1, axis=-1) + out2 = paddle.argsort(x2, axis=0) + + out1.backward() + out2.backward() + + self.assertEqual(out1.shape, []) + self.assertEqual(out2.shape, []) + self.assertEqual(out1.grad.shape, []) + self.assertEqual(out2.grad.shape, []) + self.assertEqual(x1.grad.shape, []) + self.assertEqual(x2.grad.shape, []) + # Use to test API whose zero-dim input tensors don't have grad and not need to test backward in OpTest. class TestNoBackwardAPI(unittest.TestCase): -- GitLab