From 73aa98cf37a6167d10e1c0516422e79e3bef590f Mon Sep 17 00:00:00 2001 From: Roc <30228238+sljlp@users.noreply.github.com> Date: Mon, 26 Dec 2022 11:44:49 +0800 Subject: [PATCH] [0d Tensor] update scatter for zero-dimension tensor (#49279) * revert concat and change concat to stack * let stack kernel support int8, uint8 and bool type --- paddle/fluid/pybind/distributed_py.cc | 40 ++++++++++----------- paddle/phi/infermeta/multiary.cc | 3 +- paddle/phi/kernels/cpu/stack_grad_kernel.cc | 4 +++ paddle/phi/kernels/cpu/stack_kernel.cc | 6 +++- paddle/phi/kernels/funcs/concat_funcs.h | 3 +- paddle/phi/kernels/gpu/concat_kernel.cu | 29 --------------- paddle/phi/kernels/gpu/stack_grad_kernel.cu | 3 ++ paddle/phi/kernels/gpu/stack_kernel.cu | 3 ++ 8 files changed, 37 insertions(+), 54 deletions(-) diff --git a/paddle/fluid/pybind/distributed_py.cc b/paddle/fluid/pybind/distributed_py.cc index 94b9b36f50..9515ca7f64 100644 --- a/paddle/fluid/pybind/distributed_py.cc +++ b/paddle/fluid/pybind/distributed_py.cc @@ -255,9 +255,9 @@ void BindDistributed(py::module *m) { bool sync_op) { auto out_tensor_list = CastPyArg2VectorOfTensor(py_out_tensor_list.ptr(), 0); - Tensor concat_out_tensor = paddle::concat(out_tensor_list, 0); + Tensor stack_out_tensor = paddle::stack(out_tensor_list, 0); auto p_out_tensor = std::dynamic_pointer_cast( - concat_out_tensor.impl()); + stack_out_tensor.impl()); auto *out_dense = p_out_tensor.get(); auto in_tensor = CastPyArg2Tensor(py_in_tensor.ptr(), 0); @@ -307,16 +307,16 @@ void BindDistributed(py::module *m) { bool sync_op) { auto out_tensor_list = CastPyArg2VectorOfTensor(py_out_tensor_list.ptr(), 0); - Tensor concat_out_tensor = paddle::concat(out_tensor_list, 0); + Tensor stack_out_tensor = paddle::stack(out_tensor_list, 0); auto p_out_tensor = std::dynamic_pointer_cast( - concat_out_tensor.impl()); + stack_out_tensor.impl()); auto *out_dense = p_out_tensor.get(); auto in_tensor_list = CastPyArg2VectorOfTensor(py_in_tensor_list.ptr(), 0); - Tensor concat_in_tensor = paddle::concat(in_tensor_list, 0); + Tensor stack_in_tensor = paddle::stack(in_tensor_list, 0); auto p_in_tensor = std::dynamic_pointer_cast( - concat_in_tensor.impl()); + stack_in_tensor.impl()); auto in_dense = *p_in_tensor; // in_tensor_list should not be empty @@ -430,9 +430,9 @@ void BindDistributed(py::module *m) { auto in_tensor_list = CastPyArg2VectorOfTensor(py_in_tensor_list.ptr(), 0); - Tensor concat_in_tensor = paddle::concat(in_tensor_list, 0); + Tensor stack_in_tensor = paddle::stack(in_tensor_list, 0); auto p_in_tensor = std::dynamic_pointer_cast( - concat_in_tensor.impl()); + stack_in_tensor.impl()); auto in_dense = *p_in_tensor; distributed::ReduceScatterOptions opts{op}; @@ -484,9 +484,9 @@ void BindDistributed(py::module *m) { auto in_tensor_list = CastPyArg2VectorOfTensor(py_in_tensor_list.ptr(), 0); - Tensor concat_in_tensor = paddle::concat(in_tensor_list, 0); + Tensor stack_in_tensor = paddle::stack(in_tensor_list, 0); auto p_in_tensor = std::dynamic_pointer_cast( - concat_in_tensor.impl()); + stack_in_tensor.impl()); auto in_dense = *p_in_tensor; distributed::ScatterOptions opts{src}; @@ -746,9 +746,9 @@ void BindDistributed(py::module *m) { py::handle py_in_tensor) { auto out_tensor_list = CastPyArg2VectorOfTensor(py_out_tensor_list.ptr(), 0); - Tensor concat_out_tensor = paddle::concat(out_tensor_list, 0); + Tensor stack_out_tensor = paddle::stack(out_tensor_list, 0); auto p_out_tensor = std::dynamic_pointer_cast( - concat_out_tensor.impl()); + stack_out_tensor.impl()); auto *out_dense = p_out_tensor.get(); auto in_tensor = CastPyArg2Tensor(py_in_tensor.ptr(), 0); @@ -854,16 +854,16 @@ void BindDistributed(py::module *m) { py::handle py_in_tensor_list) { auto out_tensor_list = CastPyArg2VectorOfTensor(py_out_tensor_list.ptr(), 0); - Tensor concat_out_tensor = paddle::concat(out_tensor_list, 0); + Tensor stack_out_tensor = paddle::stack(out_tensor_list, 0); auto p_out_tensor = std::dynamic_pointer_cast( - concat_out_tensor.impl()); + stack_out_tensor.impl()); auto *out_dense = p_out_tensor.get(); auto in_tensor_list = CastPyArg2VectorOfTensor(py_in_tensor_list.ptr(), 0); - Tensor concat_in_tensor = paddle::concat(in_tensor_list, 0); + Tensor stack_in_tensor = paddle::stack(in_tensor_list, 0); auto p_in_tensor = std::dynamic_pointer_cast( - concat_in_tensor.impl()); + stack_in_tensor.impl()); auto in_dense = *p_in_tensor; // in_tensor_list should not be empty @@ -999,9 +999,9 @@ void BindDistributed(py::module *m) { auto in_tensor_list = CastPyArg2VectorOfTensor(py_in_tensor_list.ptr(), 0); - Tensor concat_in_tensor = paddle::concat(in_tensor_list, 0); + Tensor stack_in_tensor = paddle::stack(in_tensor_list, 0); auto p_in_tensor = std::dynamic_pointer_cast( - concat_in_tensor.impl()); + stack_in_tensor.impl()); auto in_dense = *p_in_tensor; distributed::ReduceScatterOptions opts{op}; @@ -1057,9 +1057,9 @@ void BindDistributed(py::module *m) { auto in_tensor_list = CastPyArg2VectorOfTensor(py_in_tensor_list.ptr(), 0); - Tensor concat_in_tensor = paddle::concat(in_tensor_list, 0); + Tensor stack_in_tensor = paddle::stack(in_tensor_list, 0); auto p_in_tensor = std::dynamic_pointer_cast( - concat_in_tensor.impl()); + stack_in_tensor.impl()); auto in_dense = *p_in_tensor; distributed::ScatterOptions opts{src}; diff --git a/paddle/phi/infermeta/multiary.cc b/paddle/phi/infermeta/multiary.cc index a45a036b29..319e173adb 100644 --- a/paddle/phi/infermeta/multiary.cc +++ b/paddle/phi/infermeta/multiary.cc @@ -911,14 +911,13 @@ void ConcatInferMeta(const std::vector& x, // 1. calculate axis int rank = x.at(0)->dims().size(); PADDLE_ENFORCE_EQ( - !rank || (axis >= -rank && axis < rank), + axis >= -rank && axis < rank, true, phi::errors::InvalidArgument( "The axis is expected to be in range of [%d, %d), but got %d", -rank, rank, axis)); - axis = rank ? axis : 0; if (axis < 0) { axis = axis + rank; } diff --git a/paddle/phi/kernels/cpu/stack_grad_kernel.cc b/paddle/phi/kernels/cpu/stack_grad_kernel.cc index 018705333e..e3190b2c74 100644 --- a/paddle/phi/kernels/cpu/stack_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/stack_grad_kernel.cc @@ -54,6 +54,10 @@ PD_REGISTER_KERNEL(stack_grad, phi::StackGradKernel, float, double, + bool, int64_t, int, + uint8_t, + int8_t, + phi::dtype::float16, phi::dtype::bfloat16) {} diff --git a/paddle/phi/kernels/cpu/stack_kernel.cc b/paddle/phi/kernels/cpu/stack_kernel.cc index 5eb1cf061b..a9c428c680 100644 --- a/paddle/phi/kernels/cpu/stack_kernel.cc +++ b/paddle/phi/kernels/cpu/stack_kernel.cc @@ -57,6 +57,10 @@ PD_REGISTER_KERNEL(stack, phi::StackKernel, float, double, - int, + bool, int64_t, + int, + uint8_t, + int8_t, + phi::dtype::float16, phi::dtype::bfloat16) {} diff --git a/paddle/phi/kernels/funcs/concat_funcs.h b/paddle/phi/kernels/funcs/concat_funcs.h index 61a0e6ad7e..db965c2ef9 100644 --- a/paddle/phi/kernels/funcs/concat_funcs.h +++ b/paddle/phi/kernels/funcs/concat_funcs.h @@ -21,14 +21,13 @@ namespace funcs { static inline int64_t ComputeAxis(int64_t axis, int64_t rank) { PADDLE_ENFORCE_EQ( - !rank || (axis >= -rank && axis < rank), + axis >= -rank && axis < rank, true, phi::errors::InvalidArgument( "The axis is expected to be in range of [%d, %d), but got %d", -rank, rank, axis)); - axis = rank ? axis : 0; if (axis < 0) { axis = axis + rank; } diff --git a/paddle/phi/kernels/gpu/concat_kernel.cu b/paddle/phi/kernels/gpu/concat_kernel.cu index 497f78ca9f..80ff71b215 100644 --- a/paddle/phi/kernels/gpu/concat_kernel.cu +++ b/paddle/phi/kernels/gpu/concat_kernel.cu @@ -34,35 +34,6 @@ void ConcatKernel(const Context& dev_ctx, DenseTensor* out) { int64_t axis = axis_scalar.to(); - if (UNLIKELY(x[0]->dims().size() == 0)) { - // for dims is 0 specially - phi::DDim tmp_1dim, out_dims; - out_dims[0] = x.size(); - tmp_1dim[0] = 1; - - out->Resize(out_dims); - dev_ctx.template Alloc(out); - - size_t output_offset = 0; - for (auto* in : x) { - if (in->numel() == 0UL) { - continue; - } - auto in_stride = phi::stride_numel(tmp_1dim); - auto out_stride = phi::stride_numel(out->dims()); - paddle::operators::StridedNumelCopyWithAxis( - dev_ctx, - axis, - out->data() + output_offset, - out_stride, - in->data(), - in_stride, - in_stride[axis]); - output_offset += in_stride[axis]; - } - return; - } - axis = phi::funcs::ComputeAxis(axis, x[0]->dims().size()); std::vector x_dims; diff --git a/paddle/phi/kernels/gpu/stack_grad_kernel.cu b/paddle/phi/kernels/gpu/stack_grad_kernel.cu index f99747b059..ea61be0abf 100644 --- a/paddle/phi/kernels/gpu/stack_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/stack_grad_kernel.cu @@ -139,7 +139,10 @@ PD_REGISTER_KERNEL(stack_grad, phi::StackGradKernel, float, double, + bool, int64_t, int, + uint8_t, + int8_t, phi::dtype::float16, phi::dtype::bfloat16) {} diff --git a/paddle/phi/kernels/gpu/stack_kernel.cu b/paddle/phi/kernels/gpu/stack_kernel.cu index 5cad80288b..3cfb98beca 100644 --- a/paddle/phi/kernels/gpu/stack_kernel.cu +++ b/paddle/phi/kernels/gpu/stack_kernel.cu @@ -175,7 +175,10 @@ PD_REGISTER_KERNEL(stack, phi::StackKernel, float, double, + bool, int64_t, int, + uint8_t, + int8_t, phi::dtype::float16, phi::dtype::bfloat16) {} -- GitLab