diff --git a/paddle/phi/kernels/cpu/size_kernel.cc b/paddle/phi/kernels/cpu/size_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..4019976ecec9c61014d9f508367be7ed93c08327 --- /dev/null +++ b/paddle/phi/kernels/cpu/size_kernel.cc @@ -0,0 +1,32 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/size_kernel.h" + +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/size_kernel_impl.h" + +PD_REGISTER_KERNEL(size, + CPU, + ALL_LAYOUT, + phi::SizeKernel, + uint8_t, + int16_t, + int, + int64_t, + phi::dtype::float16, + float, + double, + bool) {} diff --git a/paddle/phi/kernels/gpu/size_kernel.cu b/paddle/phi/kernels/gpu/size_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..fb6acd5599a8e5c9b81923c73dc045f0dc1a6f90 --- /dev/null +++ b/paddle/phi/kernels/gpu/size_kernel.cu @@ -0,0 +1,31 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/size_kernel.h" + +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/size_kernel_impl.h" + +PD_REGISTER_KERNEL(size, + GPU, + ALL_LAYOUT, + phi::SizeKernel, + int16_t, + int, + int64_t, + phi::dtype::float16, + float, + double, + bool) {} diff --git a/paddle/phi/kernels/size_kernel.cc b/paddle/phi/kernels/impl/size_kernel_impl.h similarity index 56% rename from paddle/phi/kernels/size_kernel.cc rename to paddle/phi/kernels/impl/size_kernel_impl.h index e197d3de28645e12a88bbeb2e505a26ed98a3666..f9757bc4477569247c1aac11fd6523cca8945951 100644 --- a/paddle/phi/kernels/size_kernel.cc +++ b/paddle/phi/kernels/impl/size_kernel_impl.h @@ -12,33 +12,28 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/phi/kernels/size_kernel.h" +#pragma once -#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" namespace phi { -template +template void SizeKernel(const Context& ctx, const DenseTensor& input, DenseTensor* out) { - auto* out_data = ctx.template HostAlloc(out); - out_data[0] = input.numel(); + auto place = ctx.GetPlace(); + auto out_data = ctx.template Alloc(out); + auto cpu_place = phi::CPUPlace(); + if (place == cpu_place) { + out_data[0] = input.numel(); + } else { + DenseTensor cpu_tensor; + cpu_tensor.Resize(out->dims()); + auto cpu_data = ctx.template HostAlloc(&cpu_tensor); + cpu_data[0] = input.numel(); + phi::Copy(ctx, cpu_tensor, place, false, out); + } } } // namespace phi - -PD_REGISTER_GENERAL_KERNEL( - size, CPU, ALL_LAYOUT, phi::SizeKernel, ALL_DTYPE) { - kernel->OutputAt(0).SetDataType(phi::DataType::INT64); -} - -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -PD_REGISTER_GENERAL_KERNEL( - size, GPU, ALL_LAYOUT, phi::SizeKernel, ALL_DTYPE) { - kernel->OutputAt(0) - .SetBackend(phi::Backend::CPU) - .SetDataType(phi::DataType::INT64); -} -#endif diff --git a/paddle/phi/kernels/size_kernel.h b/paddle/phi/kernels/size_kernel.h index 6b4871778ea180e9fff54b2a26454246266d4575..2d7a29104db0813f4d4dca340575d0c1a5885d4c 100644 --- a/paddle/phi/kernels/size_kernel.h +++ b/paddle/phi/kernels/size_kernel.h @@ -18,7 +18,7 @@ namespace phi { -template +template void SizeKernel(const Context& ctx, const DenseTensor& input, DenseTensor* out); } // namespace phi diff --git a/python/paddle/distributed/collective.py b/python/paddle/distributed/collective.py index 3a06c2f84af7dce4ea4e1fe724248611997e0cf7..f656cb076cb2f66e1949abdd3aa6429a2e7d737d 100644 --- a/python/paddle/distributed/collective.py +++ b/python/paddle/distributed/collective.py @@ -1140,9 +1140,6 @@ def all_gather_object(object_list, obj, group=None): ), "all_gather_object doesn't support static graph mode." tensor, len_of_tensor = _convert_object_to_tensor(obj) - if paddle.get_device() != "cpu": - len_of_tensor = len_of_tensor._copy_to( - paddle.framework._current_expected_place(), False) # gather len_of_tensor from all ranks list_len_of_tensor = []