From 64538c8d12f12c77d2373b66c25e9499e8acb3d0 Mon Sep 17 00:00:00 2001
From: Chen Weihang <chenweihang@baidu.com>
Date: Tue, 4 Jan 2022 10:17:55 +0800
Subject: [PATCH] [PTen] Move inner empty and cast api to kernel.h (#38587)

* move inner cast api to cast_kernel.h

* resolve conflit
---
 paddle/fluid/operators/cast_op.h              |  2 +-
 paddle/pten/include/creation.h                | 31 ------------
 paddle/pten/include/manipulation.h            | 13 -----
 paddle/pten/kernels/CMakeLists.txt            |  2 +
 paddle/pten/kernels/cast_kernel.h             | 21 ++++++--
 paddle/pten/kernels/cpu/cast_kernel.cc        | 12 ++---
 paddle/pten/kernels/empty_kernel.cc           | 16 +++---
 paddle/pten/kernels/empty_kernel.h            | 49 +++++++++++++++++--
 paddle/pten/kernels/gpu/cast_kernel.cu        | 12 ++---
 .../hybird/cuda/reduce/reduce_cuda_impl.h     |  2 +-
 .../pten/kernels/hybird/general/reduce_impl.h |  2 +-
 11 files changed, 86 insertions(+), 76 deletions(-)
diff --git a/paddle/fluid/operators/cast_op.h b/paddle/fluid/operators/cast_op.h
index 466adfa5f36..4f7fe2854ae 100644
--- a/paddle/fluid/operators/cast_op.h
+++ b/paddle/fluid/operators/cast_op.h
@@ -71,7 +71,7 @@ class CastOpKernel : public framework::OpKernel<InT> {
         static_cast<framework::proto::VarType::Type>(out_dtype));
 
     // call new kernel
-    pten::Cast<InT>(dev_ctx, *pt_x.get(), pt_out_dtype, pt_out.get());
+    pten::CastKernel<InT>(dev_ctx, *pt_x.get(), pt_out_dtype, pt_out.get());
   }
 };
 
diff --git a/paddle/pten/include/creation.h b/paddle/pten/include/creation.h
index 73c5999ca92..fa5bd49ca30 100644
--- a/paddle/pten/include/creation.h
+++ b/paddle/pten/include/creation.h
@@ -23,37 +23,6 @@ namespace pten {
 
 // TODO(YuanRisheng) This function name should be same as User API name.
 // TODO(zyfncg) Automatic code generation
-template <typename T, typename ContextT>
-DenseTensor Empty(const ContextT& dev_ctx,
-                  const ScalarArray& shape,
-                  DataType dtype = DataType::FLOAT32,
-                  Backend backend = Backend::CPU,  // Is backend needed here?
-                  DataLayout layout = DataLayout::NCHW) {
-  auto out_meta = CreateInferMeta(shape, dtype, layout);
-  pten::DenseTensor dense_out(
-      pten::make_intrusive<paddle::experimental::SharedStorage>(
-          dev_ctx.GetPlace()),
-      std::move(out_meta));
-  Empty<T, ContextT>(dev_ctx, shape, &dense_out);
-  return dense_out;
-}
-
-template <typename T, typename ContextT>
-DenseTensor EmptyLike(
-    const ContextT& dev_ctx,
-    const DenseTensor& x,
-    DataType dtype = DataType::UNDEFINED,
-    Backend backend = Backend::UNDEFINED,  // Is backend needed here?
-    DataLayout layout = DataLayout::UNDEFINED) {
-  auto out_meta = CreateLikeInferMeta(x.meta(), dtype, layout);
-  pten::DenseTensor dense_out(
-      pten::make_intrusive<paddle::experimental::SharedStorage>(
-          dev_ctx.GetPlace()),
-      std::move(out_meta));
-  EmptyLike<T, ContextT>(dev_ctx, &dense_out);
-  return dense_out;
-}
-
 template <typename T, typename ContextT>
 DenseTensor Full(const ContextT& dev_ctx,
                  const ScalarArray& shape,
diff --git a/paddle/pten/include/manipulation.h b/paddle/pten/include/manipulation.h
index e317964dd1e..a8625e52f56 100644
--- a/paddle/pten/include/manipulation.h
+++ b/paddle/pten/include/manipulation.h
@@ -37,19 +37,6 @@ DenseTensor Flatten(const ContextT& dev_ctx,
   return dense_out;
 }
 
-template <typename T, typename ContextT>
-DenseTensor Cast(const ContextT& dev_ctx,
-                 const DenseTensor& x,
-                 DataType out_dtype) {
-  auto out_meta = CastInferMeta(x.meta(), out_dtype);
-  pten::DenseTensor dense_out(
-      pten::make_intrusive<paddle::experimental::SharedStorage>(
-          dev_ctx.GetPlace()),
-      std::move(out_meta));
-  Cast<T, ContextT>(dev_ctx, x, out_dtype, &dense_out);
-  return dense_out;
-}
-
 template <typename T, typename ContextT>
 DenseTensor Reshape(const ContextT& dev_ctx,
                     const DenseTensor& x,
diff --git a/paddle/pten/kernels/CMakeLists.txt b/paddle/pten/kernels/CMakeLists.txt
index 4c705767f4c..b76d408f89e 100644
--- a/paddle/pten/kernels/CMakeLists.txt
+++ b/paddle/pten/kernels/CMakeLists.txt
@@ -26,6 +26,8 @@ set_property(GLOBAL PROPERTY PTEN_KERNELS "")
 
 set(COMMON_KERNEL_DEPS dense_tensor kernel_context kernel_factory convert_utils)
 set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} eigen_function blas)
+# remove this dep after removing fluid deps on tensor creation
+set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} pten_api_utils)
 set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} infermeta)
 
 set(MATH_KERNEL_DEPS ${COMMON_KERNEL_DEPS} cast_kernel copy_kernel pten_transpose_cpu)
diff --git a/paddle/pten/kernels/cast_kernel.h b/paddle/pten/kernels/cast_kernel.h
index 5243fa05fac..8fdce9cda6f 100644
--- a/paddle/pten/kernels/cast_kernel.h
+++ b/paddle/pten/kernels/cast_kernel.h
@@ -15,13 +15,24 @@ limitations under the License. */
 #pragma once
 
 #include "paddle/pten/core/dense_tensor.h"
+#include "paddle/pten/kernels/empty_kernel.h"
 
 namespace pten {
 
-template <typename T, typename ContextT>
-void Cast(const ContextT& dev_ctx,
-          const DenseTensor& x,
-          DataType out_dtype,
-          DenseTensor* out);
+template <typename T, typename Context>
+void CastKernel(const Context& dev_ctx,
+                const DenseTensor& x,
+                DataType out_dtype,
+                DenseTensor* out);
+
+template <typename T, typename Context>
+DenseTensor Cast(const Context& dev_ctx,
+                 const DenseTensor& x,
+                 DataType out_dtype) {
+  auto out_meta = CastInferMeta(x.meta(), out_dtype);
+  auto dense_out = pten::Empty<T, Context>(dev_ctx, std::move(out_meta));
+  CastKernel<T, Context>(dev_ctx, x, out_dtype, &dense_out);
+  return dense_out;
+}
 
 }  // namespace pten
diff --git a/paddle/pten/kernels/cpu/cast_kernel.cc b/paddle/pten/kernels/cpu/cast_kernel.cc
index a9964d99eef..c6736cdd1bc 100644
--- a/paddle/pten/kernels/cpu/cast_kernel.cc
+++ b/paddle/pten/kernels/cpu/cast_kernel.cc
@@ -46,11 +46,11 @@ void CastKernelImpl(const CPUContext& dev_ctx,
         CastOpTransformFunctor<InT, OutT>());
 }
 
-template <typename T, typename ContextT>
-void Cast(const ContextT& dev_ctx,
-          const DenseTensor& x,
-          DataType out_dtype,
-          DenseTensor* out) {
+template <typename T, typename Context>
+void CastKernel(const Context& dev_ctx,
+                const DenseTensor& x,
+                DataType out_dtype,
+                DenseTensor* out) {
   PD_VISIT_ALL_TYPES(out_dtype, "CastKernelImpl", ([&] {
                        CastKernelImpl<T, data_t>(dev_ctx, x, out);
                      }));
@@ -61,7 +61,7 @@ void Cast(const ContextT& dev_ctx,
 PT_REGISTER_CTX_KERNEL(cast,
                        CPU,
                        ALL_LAYOUT,
-                       pten::Cast,
+                       pten::CastKernel,
                        float,
                        double,
                        int,
diff --git a/paddle/pten/kernels/empty_kernel.cc b/paddle/pten/kernels/empty_kernel.cc
index 4c6d8706e0f..94886806bcc 100644
--- a/paddle/pten/kernels/empty_kernel.cc
+++ b/paddle/pten/kernels/empty_kernel.cc
@@ -20,14 +20,14 @@ limitations under the License. */
 namespace pten {
 
 template <typename T, typename ContextT>
-void Empty(const ContextT& dev_ctx,
-           const ScalarArray& shape,
-           DenseTensor* out) {
+void EmptyKernel(const ContextT& dev_ctx,
+                 const ScalarArray& shape,
+                 DenseTensor* out) {
   out->Resize(paddle::framework::make_ddim(shape.GetData()));
 }
 
 template <typename T, typename ContextT>
-void EmptyLike(const ContextT& dev_ctx, DenseTensor* out) {
+void EmptyLikeKernel(const ContextT& dev_ctx, DenseTensor* out) {
   out->mutable_data<T>();
 }
 
@@ -36,7 +36,7 @@ void EmptyLike(const ContextT& dev_ctx, DenseTensor* out) {
 PT_REGISTER_CTX_KERNEL(empty,
                        CPU,
                        ALL_LAYOUT,
-                       pten::Empty,
+                       pten::EmptyKernel,
                        bool,
                        int,
                        int64_t,
@@ -47,7 +47,7 @@ PT_REGISTER_CTX_KERNEL(empty,
 PT_REGISTER_CTX_KERNEL(empty_like,
                        CPU,
                        ALL_LAYOUT,
-                       pten::EmptyLike,
+                       pten::EmptyLikeKernel,
                        bool,
                        int,
                        int64_t,
@@ -59,7 +59,7 @@ PT_REGISTER_CTX_KERNEL(empty_like,
 PT_REGISTER_CTX_KERNEL(empty,
                        GPU,
                        ALL_LAYOUT,
-                       pten::Empty,
+                       pten::EmptyKernel,
                        bool,
                        int,
                        int64_t,
@@ -70,7 +70,7 @@ PT_REGISTER_CTX_KERNEL(empty,
 PT_REGISTER_CTX_KERNEL(empty_like,
                        GPU,
                        ALL_LAYOUT,
-                       pten::EmptyLike,
+                       pten::EmptyLikeKernel,
                        bool,
                        int,
                        int64_t,
diff --git a/paddle/pten/kernels/empty_kernel.h b/paddle/pten/kernels/empty_kernel.h
index 7aa5a27765a..3249526805b 100644
--- a/paddle/pten/kernels/empty_kernel.h
+++ b/paddle/pten/kernels/empty_kernel.h
@@ -14,15 +14,56 @@
 
 #pragma once
 
+#include "paddle/pten/api/lib/utils/storage.h"
 #include "paddle/pten/common/scalar_array.h"
 #include "paddle/pten/core/dense_tensor.h"
+#include "paddle/pten/infermeta/nary.h"
+#include "paddle/pten/infermeta/unary.h"
 
 namespace pten {
 
-template <typename T, typename ContextT>
-void Empty(const ContextT& dev_ctx, const ScalarArray& shape, DenseTensor* out);
+template <typename T, typename Context>
+void EmptyKernel(const Context& dev_ctx,
+                 const ScalarArray& shape,
+                 DenseTensor* out);
 
-template <typename T, typename ContextT>
-void EmptyLike(const ContextT& dev_ctx, DenseTensor* out);
+template <typename T, typename Context>
+void EmptyLikeKernel(const Context& dev_ctx, DenseTensor* out);
+
+// TODO(chenweihang): the tensor creation method need to be replaced later,
+// all kernel api call Empty here instead of making tensor self
+template <typename T, typename Context>
+DenseTensor Empty(const Context& dev_ctx, DenseTensorMeta&& meta) {
+  pten::DenseTensor dense_out(
+      pten::make_intrusive<paddle::experimental::SharedStorage>(
+          dev_ctx.GetPlace()),
+      std::move(meta));
+  return dense_out;
+}
+
+template <typename T, typename Context>
+DenseTensor Empty(const Context& dev_ctx,
+                  const ScalarArray& shape,
+                  DataType dtype = DataType::FLOAT32,
+                  Backend backend = Backend::CPU,  // Is backend needed here?
+                  DataLayout layout = DataLayout::NCHW) {
+  auto out_meta = CreateInferMeta(shape, dtype, layout);
+  auto dense_out = Empty<T, Context>(dev_ctx, std::move(out_meta));
+  EmptyKernel<T, Context>(dev_ctx, shape, &dense_out);
+  return dense_out;
+}
+
+template <typename T, typename Context>
+DenseTensor EmptyLike(
+    const Context& dev_ctx,
+    const DenseTensor& x,
+    DataType dtype = DataType::UNDEFINED,
+    Backend backend = Backend::UNDEFINED,  // Is backend needed here?
+    DataLayout layout = DataLayout::UNDEFINED) {
+  auto out_meta = CreateLikeInferMeta(x.meta(), dtype, layout);
+  auto dense_out = Empty<T, Context>(dev_ctx, std::move(out_meta));
+  EmptyLikeKernel<T, Context>(dev_ctx, &dense_out);
+  return dense_out;
+}
 
 }  // namespace pten
diff --git a/paddle/pten/kernels/gpu/cast_kernel.cu b/paddle/pten/kernels/gpu/cast_kernel.cu
index e413a38d5e0..58adbcc6f35 100644
--- a/paddle/pten/kernels/gpu/cast_kernel.cu
+++ b/paddle/pten/kernels/gpu/cast_kernel.cu
@@ -85,11 +85,11 @@ void CastCUDAKernelImpl(const GPUContext& dev_ctx,
   CastCUDAKernelImplWithPtr(dev_ctx, in_data, out_data, size);
 }
 
-template <typename T, typename ContextT>
-void Cast(const ContextT& dev_ctx,
-          const DenseTensor& x,
-          DataType out_dtype,
-          DenseTensor* out) {
+template <typename T, typename Context>
+void CastKernel(const Context& dev_ctx,
+                const DenseTensor& x,
+                DataType out_dtype,
+                DenseTensor* out) {
   PD_VISIT_ALL_TYPES(out_dtype, "CastCUDAKernelImpl", ([&] {
                        CastCUDAKernelImpl<T, data_t>(dev_ctx, x, out);
                      }));
@@ -101,7 +101,7 @@ void Cast(const ContextT& dev_ctx,
   PT_REGISTER_CTX_KERNEL(cast,                              \
                          GPU,                               \
                          ALL_LAYOUT,                        \
-                         pten::Cast,                        \
+                         pten::CastKernel,                  \
                          float,                             \
                          double,                            \
                          int,                               \
diff --git a/paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h b/paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h
index d4fdb477633..4cfcad9149a 100644
--- a/paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h
+++ b/paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h
@@ -1112,7 +1112,7 @@ void TensorReduceFunctorImpl(const pten::DenseTensor& x,
       AsyncCopy(x, y);
       y->Resize(out_dims);
     } else {
-      pten::Cast<Tx>(*dev_ctx, x, y->dtype(), y);
+      pten::CastKernel<Tx>(*dev_ctx, x, y->dtype(), y);
     }
     return;
   }
diff --git a/paddle/pten/kernels/hybird/general/reduce_impl.h b/paddle/pten/kernels/hybird/general/reduce_impl.h
index 5dddccd11f2..631ad7f6125 100644
--- a/paddle/pten/kernels/hybird/general/reduce_impl.h
+++ b/paddle/pten/kernels/hybird/general/reduce_impl.h
@@ -60,7 +60,7 @@ void Reduce(const DeviceContext& dev_ctx,
         pten::DenseTensorMeta(out_dtype, x.dims(), x.layout()));
 
     // cast x tensor to out_dtype
-    pten::Cast<T, DeviceContext>(dev_ctx, x, out_dtype, &tmp_tensor);
+    pten::CastKernel<T, DeviceContext>(dev_ctx, x, out_dtype, &tmp_tensor);
 
     // do reduce sum
     PD_VISIT_ALL_TYPES(
-- 
GitLab