From 01b26b128ca39ac099a29075d91a39fb1392047a Mon Sep 17 00:00:00 2001
From: Chen Weihang <sunny_cwh@163.com>
Date: Fri, 16 Sep 2022 15:50:33 +0800
Subject: [PATCH] Revert "Simplify size op impl (#45808)"

This reverts commit c252b1de0fdbdb8ee40e50fc023ba432f1828dc9.
---
 paddle/phi/kernels/cpu/size_kernel.cc         | 32 ++++++++++++++++++
 paddle/phi/kernels/gpu/size_kernel.cu         | 31 +++++++++++++++++
 .../size_kernel_impl.h}                       | 33 ++++++++-----------
 paddle/phi/kernels/size_kernel.h              |  2 +-
 python/paddle/distributed/collective.py       |  3 --
 5 files changed, 78 insertions(+), 23 deletions(-)
 create mode 100644 paddle/phi/kernels/cpu/size_kernel.cc
 create mode 100644 paddle/phi/kernels/gpu/size_kernel.cu
 rename paddle/phi/kernels/{size_kernel.cc => impl/size_kernel_impl.h} (56%)

diff --git a/paddle/phi/kernels/cpu/size_kernel.cc b/paddle/phi/kernels/cpu/size_kernel.cc
new file mode 100644
index 00000000000..4019976ecec
--- /dev/null
+++ b/paddle/phi/kernels/cpu/size_kernel.cc
@@ -0,0 +1,32 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/size_kernel.h"
+
+#include "paddle/phi/backends/cpu/cpu_context.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/impl/size_kernel_impl.h"
+
+PD_REGISTER_KERNEL(size,
+                   CPU,
+                   ALL_LAYOUT,
+                   phi::SizeKernel,
+                   uint8_t,
+                   int16_t,
+                   int,
+                   int64_t,
+                   phi::dtype::float16,
+                   float,
+                   double,
+                   bool) {}
diff --git a/paddle/phi/kernels/gpu/size_kernel.cu b/paddle/phi/kernels/gpu/size_kernel.cu
new file mode 100644
index 00000000000..fb6acd5599a
--- /dev/null
+++ b/paddle/phi/kernels/gpu/size_kernel.cu
@@ -0,0 +1,31 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/size_kernel.h"
+
+#include "paddle/phi/backends/gpu/gpu_context.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/impl/size_kernel_impl.h"
+
+PD_REGISTER_KERNEL(size,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::SizeKernel,
+                   int16_t,
+                   int,
+                   int64_t,
+                   phi::dtype::float16,
+                   float,
+                   double,
+                   bool) {}
diff --git a/paddle/phi/kernels/size_kernel.cc b/paddle/phi/kernels/impl/size_kernel_impl.h
similarity index 56%
rename from paddle/phi/kernels/size_kernel.cc
rename to paddle/phi/kernels/impl/size_kernel_impl.h
index e197d3de286..f9757bc4477 100644
--- a/paddle/phi/kernels/size_kernel.cc
+++ b/paddle/phi/kernels/impl/size_kernel_impl.h
@@ -12,33 +12,28 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/phi/kernels/size_kernel.h"
+#pragma once
 
-#include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/core/tensor_utils.h"
 
 namespace phi {
 
-template <typename Context>
+template <typename T, typename Context>
 void SizeKernel(const Context& ctx,
                 const DenseTensor& input,
                 DenseTensor* out) {
-  auto* out_data = ctx.template HostAlloc<int64_t>(out);
-  out_data[0] = input.numel();
+  auto place = ctx.GetPlace();
+  auto out_data = ctx.template Alloc<int64_t>(out);
+  auto cpu_place = phi::CPUPlace();
+  if (place == cpu_place) {
+    out_data[0] = input.numel();
+  } else {
+    DenseTensor cpu_tensor;
+    cpu_tensor.Resize(out->dims());
+    auto cpu_data = ctx.template HostAlloc<int64_t>(&cpu_tensor);
+    cpu_data[0] = input.numel();
+    phi::Copy(ctx, cpu_tensor, place, false, out);
+  }
 }
 
 }  // namespace phi
-
-PD_REGISTER_GENERAL_KERNEL(
-    size, CPU, ALL_LAYOUT, phi::SizeKernel<phi::CPUContext>, ALL_DTYPE) {
-  kernel->OutputAt(0).SetDataType(phi::DataType::INT64);
-}
-
-#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-PD_REGISTER_GENERAL_KERNEL(
-    size, GPU, ALL_LAYOUT, phi::SizeKernel<phi::GPUContext>, ALL_DTYPE) {
-  kernel->OutputAt(0)
-      .SetBackend(phi::Backend::CPU)
-      .SetDataType(phi::DataType::INT64);
-}
-#endif
diff --git a/paddle/phi/kernels/size_kernel.h b/paddle/phi/kernels/size_kernel.h
index 6b4871778ea..2d7a29104db 100644
--- a/paddle/phi/kernels/size_kernel.h
+++ b/paddle/phi/kernels/size_kernel.h
@@ -18,7 +18,7 @@
 
 namespace phi {
 
-template <typename Context>
+template <typename T, typename Context>
 void SizeKernel(const Context& ctx, const DenseTensor& input, DenseTensor* out);
 
 }  // namespace phi
diff --git a/python/paddle/distributed/collective.py b/python/paddle/distributed/collective.py
index 5ceb046f550..2835f7b3e32 100644
--- a/python/paddle/distributed/collective.py
+++ b/python/paddle/distributed/collective.py
@@ -1117,9 +1117,6 @@ def all_gather_object(object_list, obj, group=None):
     ), "all_gather_object doesn't support static graph mode."
 
     tensor, len_of_tensor = _convert_object_to_tensor(obj)
-    if paddle.get_device() != "cpu":
-        len_of_tensor = len_of_tensor._copy_to(
-            paddle.framework._current_expected_place(), False)
 
     # gather len_of_tensor from all ranks
     list_len_of_tensor = []
-- 
GitLab