diff --git a/paddle/phi/kernels/cpu/size_kernel.cc b/paddle/phi/kernels/cpu/size_kernel.cc deleted file mode 100644 index 4019976ecec9c61014d9f508367be7ed93c08327..0000000000000000000000000000000000000000 --- a/paddle/phi/kernels/cpu/size_kernel.cc +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/phi/kernels/size_kernel.h" - -#include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/impl/size_kernel_impl.h" - -PD_REGISTER_KERNEL(size, - CPU, - ALL_LAYOUT, - phi::SizeKernel, - uint8_t, - int16_t, - int, - int64_t, - phi::dtype::float16, - float, - double, - bool) {} diff --git a/paddle/phi/kernels/gpu/size_kernel.cu b/paddle/phi/kernels/gpu/size_kernel.cu deleted file mode 100644 index fb6acd5599a8e5c9b81923c73dc045f0dc1a6f90..0000000000000000000000000000000000000000 --- a/paddle/phi/kernels/gpu/size_kernel.cu +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/phi/kernels/size_kernel.h" - -#include "paddle/phi/backends/gpu/gpu_context.h" -#include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/impl/size_kernel_impl.h" - -PD_REGISTER_KERNEL(size, - GPU, - ALL_LAYOUT, - phi::SizeKernel, - int16_t, - int, - int64_t, - phi::dtype::float16, - float, - double, - bool) {} diff --git a/paddle/phi/kernels/impl/size_kernel_impl.h b/paddle/phi/kernels/size_kernel.cc similarity index 56% rename from paddle/phi/kernels/impl/size_kernel_impl.h rename to paddle/phi/kernels/size_kernel.cc index f9757bc4477569247c1aac11fd6523cca8945951..e197d3de28645e12a88bbeb2e505a26ed98a3666 100644 --- a/paddle/phi/kernels/impl/size_kernel_impl.h +++ b/paddle/phi/kernels/size_kernel.cc @@ -12,28 +12,33 @@ // See the License for the specific language governing permissions and // limitations under the License. -#pragma once +#include "paddle/phi/kernels/size_kernel.h" +#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" namespace phi { -template +template void SizeKernel(const Context& ctx, const DenseTensor& input, DenseTensor* out) { - auto place = ctx.GetPlace(); - auto out_data = ctx.template Alloc(out); - auto cpu_place = phi::CPUPlace(); - if (place == cpu_place) { - out_data[0] = input.numel(); - } else { - DenseTensor cpu_tensor; - cpu_tensor.Resize(out->dims()); - auto cpu_data = ctx.template HostAlloc(&cpu_tensor); - cpu_data[0] = input.numel(); - phi::Copy(ctx, cpu_tensor, place, false, out); - } + auto* out_data = ctx.template HostAlloc(out); + out_data[0] = input.numel(); } } // namespace phi + +PD_REGISTER_GENERAL_KERNEL( + size, CPU, ALL_LAYOUT, phi::SizeKernel, ALL_DTYPE) { + kernel->OutputAt(0).SetDataType(phi::DataType::INT64); +} + +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +PD_REGISTER_GENERAL_KERNEL( + size, GPU, ALL_LAYOUT, phi::SizeKernel, ALL_DTYPE) { + kernel->OutputAt(0) + .SetBackend(phi::Backend::CPU) + .SetDataType(phi::DataType::INT64); +} +#endif diff --git a/paddle/phi/kernels/size_kernel.h b/paddle/phi/kernels/size_kernel.h index 2d7a29104db0813f4d4dca340575d0c1a5885d4c..6b4871778ea180e9fff54b2a26454246266d4575 100644 --- a/paddle/phi/kernels/size_kernel.h +++ b/paddle/phi/kernels/size_kernel.h @@ -18,7 +18,7 @@ namespace phi { -template +template void SizeKernel(const Context& ctx, const DenseTensor& input, DenseTensor* out); } // namespace phi diff --git a/python/paddle/distributed/collective.py b/python/paddle/distributed/collective.py index f656cb076cb2f66e1949abdd3aa6429a2e7d737d..3a06c2f84af7dce4ea4e1fe724248611997e0cf7 100644 --- a/python/paddle/distributed/collective.py +++ b/python/paddle/distributed/collective.py @@ -1140,6 +1140,9 @@ def all_gather_object(object_list, obj, group=None): ), "all_gather_object doesn't support static graph mode." tensor, len_of_tensor = _convert_object_to_tensor(obj) + if paddle.get_device() != "cpu": + len_of_tensor = len_of_tensor._copy_to( + paddle.framework._current_expected_place(), False) # gather len_of_tensor from all ranks list_len_of_tensor = []