diff --git a/paddle/fluid/pybind/distributed_py.cc b/paddle/fluid/pybind/distributed_py.cc index 94b9b36f50adff1598fe3b0ca95ff743f587bb1c..9515ca7f645ec0b5a97b39680cc51d0a3aa36efd 100644 --- a/paddle/fluid/pybind/distributed_py.cc +++ b/paddle/fluid/pybind/distributed_py.cc @@ -255,9 +255,9 @@ void BindDistributed(py::module *m) { bool sync_op) { auto out_tensor_list = CastPyArg2VectorOfTensor(py_out_tensor_list.ptr(), 0); - Tensor concat_out_tensor = paddle::concat(out_tensor_list, 0); + Tensor stack_out_tensor = paddle::stack(out_tensor_list, 0); auto p_out_tensor = std::dynamic_pointer_cast( - concat_out_tensor.impl()); + stack_out_tensor.impl()); auto *out_dense = p_out_tensor.get(); auto in_tensor = CastPyArg2Tensor(py_in_tensor.ptr(), 0); @@ -307,16 +307,16 @@ void BindDistributed(py::module *m) { bool sync_op) { auto out_tensor_list = CastPyArg2VectorOfTensor(py_out_tensor_list.ptr(), 0); - Tensor concat_out_tensor = paddle::concat(out_tensor_list, 0); + Tensor stack_out_tensor = paddle::stack(out_tensor_list, 0); auto p_out_tensor = std::dynamic_pointer_cast( - concat_out_tensor.impl()); + stack_out_tensor.impl()); auto *out_dense = p_out_tensor.get(); auto in_tensor_list = CastPyArg2VectorOfTensor(py_in_tensor_list.ptr(), 0); - Tensor concat_in_tensor = paddle::concat(in_tensor_list, 0); + Tensor stack_in_tensor = paddle::stack(in_tensor_list, 0); auto p_in_tensor = std::dynamic_pointer_cast( - concat_in_tensor.impl()); + stack_in_tensor.impl()); auto in_dense = *p_in_tensor; // in_tensor_list should not be empty @@ -430,9 +430,9 @@ void BindDistributed(py::module *m) { auto in_tensor_list = CastPyArg2VectorOfTensor(py_in_tensor_list.ptr(), 0); - Tensor concat_in_tensor = paddle::concat(in_tensor_list, 0); + Tensor stack_in_tensor = paddle::stack(in_tensor_list, 0); auto p_in_tensor = std::dynamic_pointer_cast( - concat_in_tensor.impl()); + stack_in_tensor.impl()); auto in_dense = *p_in_tensor; distributed::ReduceScatterOptions opts{op}; @@ -484,9 +484,9 @@ void BindDistributed(py::module *m) { auto in_tensor_list = CastPyArg2VectorOfTensor(py_in_tensor_list.ptr(), 0); - Tensor concat_in_tensor = paddle::concat(in_tensor_list, 0); + Tensor stack_in_tensor = paddle::stack(in_tensor_list, 0); auto p_in_tensor = std::dynamic_pointer_cast( - concat_in_tensor.impl()); + stack_in_tensor.impl()); auto in_dense = *p_in_tensor; distributed::ScatterOptions opts{src}; @@ -746,9 +746,9 @@ void BindDistributed(py::module *m) { py::handle py_in_tensor) { auto out_tensor_list = CastPyArg2VectorOfTensor(py_out_tensor_list.ptr(), 0); - Tensor concat_out_tensor = paddle::concat(out_tensor_list, 0); + Tensor stack_out_tensor = paddle::stack(out_tensor_list, 0); auto p_out_tensor = std::dynamic_pointer_cast( - concat_out_tensor.impl()); + stack_out_tensor.impl()); auto *out_dense = p_out_tensor.get(); auto in_tensor = CastPyArg2Tensor(py_in_tensor.ptr(), 0); @@ -854,16 +854,16 @@ void BindDistributed(py::module *m) { py::handle py_in_tensor_list) { auto out_tensor_list = CastPyArg2VectorOfTensor(py_out_tensor_list.ptr(), 0); - Tensor concat_out_tensor = paddle::concat(out_tensor_list, 0); + Tensor stack_out_tensor = paddle::stack(out_tensor_list, 0); auto p_out_tensor = std::dynamic_pointer_cast( - concat_out_tensor.impl()); + stack_out_tensor.impl()); auto *out_dense = p_out_tensor.get(); auto in_tensor_list = CastPyArg2VectorOfTensor(py_in_tensor_list.ptr(), 0); - Tensor concat_in_tensor = paddle::concat(in_tensor_list, 0); + Tensor stack_in_tensor = paddle::stack(in_tensor_list, 0); auto p_in_tensor = std::dynamic_pointer_cast( - concat_in_tensor.impl()); + stack_in_tensor.impl()); auto in_dense = *p_in_tensor; // in_tensor_list should not be empty @@ -999,9 +999,9 @@ void BindDistributed(py::module *m) { auto in_tensor_list = CastPyArg2VectorOfTensor(py_in_tensor_list.ptr(), 0); - Tensor concat_in_tensor = paddle::concat(in_tensor_list, 0); + Tensor stack_in_tensor = paddle::stack(in_tensor_list, 0); auto p_in_tensor = std::dynamic_pointer_cast( - concat_in_tensor.impl()); + stack_in_tensor.impl()); auto in_dense = *p_in_tensor; distributed::ReduceScatterOptions opts{op}; @@ -1057,9 +1057,9 @@ void BindDistributed(py::module *m) { auto in_tensor_list = CastPyArg2VectorOfTensor(py_in_tensor_list.ptr(), 0); - Tensor concat_in_tensor = paddle::concat(in_tensor_list, 0); + Tensor stack_in_tensor = paddle::stack(in_tensor_list, 0); auto p_in_tensor = std::dynamic_pointer_cast( - concat_in_tensor.impl()); + stack_in_tensor.impl()); auto in_dense = *p_in_tensor; distributed::ScatterOptions opts{src}; diff --git a/paddle/phi/infermeta/multiary.cc b/paddle/phi/infermeta/multiary.cc index a45a036b298c32e19431a6a0ca8f0f9ea58ed653..319e173adb3b55983f59773e2e5f598ef63eb6f2 100644 --- a/paddle/phi/infermeta/multiary.cc +++ b/paddle/phi/infermeta/multiary.cc @@ -911,14 +911,13 @@ void ConcatInferMeta(const std::vector& x, // 1. calculate axis int rank = x.at(0)->dims().size(); PADDLE_ENFORCE_EQ( - !rank || (axis >= -rank && axis < rank), + axis >= -rank && axis < rank, true, phi::errors::InvalidArgument( "The axis is expected to be in range of [%d, %d), but got %d", -rank, rank, axis)); - axis = rank ? axis : 0; if (axis < 0) { axis = axis + rank; } diff --git a/paddle/phi/kernels/cpu/stack_grad_kernel.cc b/paddle/phi/kernels/cpu/stack_grad_kernel.cc index 018705333e962d6e9852b7937cde58e2aff12cdc..e3190b2c74fbe8afc27f5c4a0f9883214f18795d 100644 --- a/paddle/phi/kernels/cpu/stack_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/stack_grad_kernel.cc @@ -54,6 +54,10 @@ PD_REGISTER_KERNEL(stack_grad, phi::StackGradKernel, float, double, + bool, int64_t, int, + uint8_t, + int8_t, + phi::dtype::float16, phi::dtype::bfloat16) {} diff --git a/paddle/phi/kernels/cpu/stack_kernel.cc b/paddle/phi/kernels/cpu/stack_kernel.cc index 5eb1cf061be2b072937b8a7da0b882ee4512156c..a9c428c68047d417c09b8a375650b718b4aaa259 100644 --- a/paddle/phi/kernels/cpu/stack_kernel.cc +++ b/paddle/phi/kernels/cpu/stack_kernel.cc @@ -57,6 +57,10 @@ PD_REGISTER_KERNEL(stack, phi::StackKernel, float, double, - int, + bool, int64_t, + int, + uint8_t, + int8_t, + phi::dtype::float16, phi::dtype::bfloat16) {} diff --git a/paddle/phi/kernels/funcs/concat_funcs.h b/paddle/phi/kernels/funcs/concat_funcs.h index 61a0e6ad7e59ee94710375515650f47a4a75bbd5..db965c2ef9b65413aa8faf2b0dd7813cd8487dd8 100644 --- a/paddle/phi/kernels/funcs/concat_funcs.h +++ b/paddle/phi/kernels/funcs/concat_funcs.h @@ -21,14 +21,13 @@ namespace funcs { static inline int64_t ComputeAxis(int64_t axis, int64_t rank) { PADDLE_ENFORCE_EQ( - !rank || (axis >= -rank && axis < rank), + axis >= -rank && axis < rank, true, phi::errors::InvalidArgument( "The axis is expected to be in range of [%d, %d), but got %d", -rank, rank, axis)); - axis = rank ? axis : 0; if (axis < 0) { axis = axis + rank; } diff --git a/paddle/phi/kernels/gpu/concat_kernel.cu b/paddle/phi/kernels/gpu/concat_kernel.cu index 497f78ca9fc8fa17f4bbf145c4fedb60d8bf5edb..80ff71b2158241370cde44dc581e7b3f388877fa 100644 --- a/paddle/phi/kernels/gpu/concat_kernel.cu +++ b/paddle/phi/kernels/gpu/concat_kernel.cu @@ -34,35 +34,6 @@ void ConcatKernel(const Context& dev_ctx, DenseTensor* out) { int64_t axis = axis_scalar.to(); - if (UNLIKELY(x[0]->dims().size() == 0)) { - // for dims is 0 specially - phi::DDim tmp_1dim, out_dims; - out_dims[0] = x.size(); - tmp_1dim[0] = 1; - - out->Resize(out_dims); - dev_ctx.template Alloc(out); - - size_t output_offset = 0; - for (auto* in : x) { - if (in->numel() == 0UL) { - continue; - } - auto in_stride = phi::stride_numel(tmp_1dim); - auto out_stride = phi::stride_numel(out->dims()); - paddle::operators::StridedNumelCopyWithAxis( - dev_ctx, - axis, - out->data() + output_offset, - out_stride, - in->data(), - in_stride, - in_stride[axis]); - output_offset += in_stride[axis]; - } - return; - } - axis = phi::funcs::ComputeAxis(axis, x[0]->dims().size()); std::vector x_dims; diff --git a/paddle/phi/kernels/gpu/stack_grad_kernel.cu b/paddle/phi/kernels/gpu/stack_grad_kernel.cu index f99747b05933c2b539b25b3693961f959b350932..ea61be0abf1a56ca40c6c52d72b20357f6c2e9e7 100644 --- a/paddle/phi/kernels/gpu/stack_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/stack_grad_kernel.cu @@ -139,7 +139,10 @@ PD_REGISTER_KERNEL(stack_grad, phi::StackGradKernel, float, double, + bool, int64_t, int, + uint8_t, + int8_t, phi::dtype::float16, phi::dtype::bfloat16) {} diff --git a/paddle/phi/kernels/gpu/stack_kernel.cu b/paddle/phi/kernels/gpu/stack_kernel.cu index 5cad80288bf691c3b5c9349d9eb51f1c67f9ab47..3cfb98beca3514dab0707a1556398e67fb1b7d53 100644 --- a/paddle/phi/kernels/gpu/stack_kernel.cu +++ b/paddle/phi/kernels/gpu/stack_kernel.cu @@ -175,7 +175,10 @@ PD_REGISTER_KERNEL(stack, phi::StackKernel, float, double, + bool, int64_t, int, + uint8_t, + int8_t, phi::dtype::float16, phi::dtype::bfloat16) {}