move the directory of fill kernels in pten (#38219)

06128b9f · zyfncg · GitHub · 327e5050 · 06128b9f · 06128b9f
13 changed file
--- a/paddle/fluid/pybind/CMakeLists.txt
+++ b/paddle/fluid/pybind/CMakeLists.txt
@@ -275,7 +275,7 @@ if(WITH_PYTHON)
  if(NOT ON_INFER)
    cc_library(paddle_eager
    SRCS eager.cc eager_functions.cc eager_method.cc eager_properties.cc eager_utils.cc
-    DEPS eager_api autograd_meta backward grad_node_info pten op_function_common dygraph_function dygraph_node math_cpu linalg_cpu creation_cpu utils_cpu manipulation_cpu accumulation_node global_utils utils python)
+    DEPS eager_api autograd_meta backward grad_node_info pten op_function_common dygraph_function dygraph_node math_cpu linalg_cpu utils_cpu manipulation_cpu accumulation_node global_utils utils python)
    add_dependencies(paddle_eager eager_codegen)
    add_dependencies(paddle_eager eager_op_function_generator_cmd)
    list(APPEND PYBIND_DEPS paddle_eager)

--- a/paddle/pten/CMakeLists.txt
+++ b/paddle/pten/CMakeLists.txt
@@ -24,11 +24,11 @@ add_subdirectory(tests)

 # make an unity target for compile deps
 set(PTEN_DEPS convert_utils dense_tensor pten_context kernel_factory kernel_context)
-set(PTEN_DEPS ${PTEN_DEPS} scale_kernel_eigen)
-set(PTEN_DEPS ${PTEN_DEPS} math_cpu linalg_cpu creation_cpu manipulation_cpu)
+set(PTEN_DEPS ${PTEN_DEPS} scale_kernel_eigen full_kernel_eigen)
+set(PTEN_DEPS ${PTEN_DEPS} math_cpu linalg_cpu manipulation_cpu)
 set(PTEN_DEPS ${PTEN_DEPS} nary unary binary)
 if(WITH_GPU OR WITH_ROCM)
-  set(PTEN_DEPS ${PTEN_DEPS} math_cuda linalg_cuda creation_cuda manipulation_cuda)
+  set(PTEN_DEPS ${PTEN_DEPS} math_cuda linalg_cuda manipulation_cuda)
 endif()
 if(WITH_XPU)
  set(PTEN_DEPS ${PTEN_DEPS} manipulation_xpu)

--- a/paddle/pten/include/creation.h
+++ b/paddle/pten/include/creation.h
@@ -16,8 +16,7 @@

 #include "paddle/pten/api/lib/utils/storage.h"
 #include "paddle/pten/include/infermeta.h"
-#include "paddle/pten/kernels/cpu/creation.h"
-#include "paddle/pten/kernels/cuda/creation.h"
+#include "paddle/pten/kernels/full_kernel.h"

 namespace pten {

@@ -36,7 +35,7 @@ DenseTensor FullLike(
      pten::make_intrusive<paddle::experimental::SharedStorage>(
          dev_ctx.GetPlace()),
      std::move(out_meta));
-  FullLike<T>(dev_ctx, val, &dense_out);
+  FullLike<T, ContextT>(dev_ctx, val, &dense_out);
  return dense_out;
 }


--- a/paddle/pten/kernels/cpu/CMakeLists.txt
+++ b/paddle/pten/kernels/cpu/CMakeLists.txt
 cc_library(math_cpu SRCS math.cc DEPS dense_tensor kernel_context kernel_factory eigen_function blas pten_transpose_cpu)
 cc_library(linalg_cpu SRCS linalg.cc DEPS dense_tensor kernel_context kernel_factory)
-cc_library(creation_cpu SRCS creation.cc DEPS dense_tensor kernel_context kernel_factory eigen_function)
 cc_library(utils_cpu SRCS utils.cc DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
 cc_library(manipulation_cpu SRCS manipulation.cc DEPS dense_tensor kernel_context kernel_factory utils_cpu unary)
--- a/paddle/pten/kernels/cpu/creation.cc
+++ b/paddle/pten/kernels/cpu/creation.cc
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/pten/kernels/cpu/creation.h"
-
-#include "paddle/pten/core/kernel_registry.h"
-#include "paddle/pten/kernels/hybird/eigen/fill.h"
-
-namespace pten {
-
-template <typename T>
-void FullLike(const CPUContext& dev_ctx, const Scalar& val, DenseTensor* out) {
-  auto value = val.to<float>();
-  using CommonType = typename std::common_type<
-      float,
-      typename std::conditional<
-          std::is_same<T, paddle::platform::float16>::value,
-          float,
-          T>::type>::type;
-
-  auto common_type_value = static_cast<CommonType>(value);
-
-  PADDLE_ENFORCE_EQ(
-      (common_type_value >=
-       static_cast<CommonType>(std::numeric_limits<T>::lowest())) &&
-          (common_type_value <=
-           static_cast<CommonType>(std::numeric_limits<T>::max())),
-      true,
-      paddle::platform::errors::InvalidArgument(
-          "The filled value is out of range for target type, "
-          "current kernel type is %s, the range should between %f "
-          "and %f, but now value is %f.",
-          typeid(T).name(),
-          static_cast<CommonType>(std::numeric_limits<T>::lowest()),
-          static_cast<CommonType>(std::numeric_limits<T>::max()),
-          static_cast<float>(value)));
-  eigen::fill<CPUContext, T>(dev_ctx, out, value);
-}
-
-template <typename T>
-void Full(const CPUContext& dev_ctx,
-          const ScalarArray& shape,
-          const Scalar& val,
-          DenseTensor* out) {
-  out->Resize(paddle::framework::make_ddim(shape.GetData()));
-  eigen::fill<CPUContext, T>(dev_ctx, out, val.to<T>());
-}
-
-}  // namespace pten
-
-PT_REGISTER_KERNEL(full_like,
-                   CPU,
-                   ALL_LAYOUT,
-                   pten::FullLike,
-                   float,
-                   double,
-                   int,
-                   int64_t,
-                   bool,
-                   paddle::platform::float16) {}
-
-PT_REGISTER_KERNEL(full,
-                   CPU,
-                   ALL_LAYOUT,
-                   pten::Full,
-                   float,
-                   double,
-                   uint8_t,
-                   int16_t,
-                   int,
-                   int64_t,
-                   bool,
-                   paddle::platform::float16,
-                   paddle::platform::bfloat16,
-                   paddle::platform::complex<float>,
-                   paddle::platform::complex<double>) {}
--- a/paddle/pten/kernels/cuda/CMakeLists.txt
+++ b/paddle/pten/kernels/cuda/CMakeLists.txt
 if(WITH_GPU)
  nv_library(math_cuda SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_cuda)
  nv_library(linalg_cuda SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
-  nv_library(creation_cuda SRCS creation.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
  nv_library(utils_cuda SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
  nv_library(manipulation_cuda SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_cuda unary)
 elseif(WITH_ROCM)
  hip_library(math_cuda SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_cuda)
  hip_library(linalg_cuda SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
-  hip_library(creation_cuda SRCS creation.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
  hip_library(utils_cuda SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
  hip_library(manipulation_cuda SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_cuda unary)
 endif()
--- a/paddle/pten/kernels/cuda/creation.cu
+++ b/paddle/pten/kernels/cuda/creation.cu
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/pten/kernels/cuda/creation.h"
-
-#include "paddle/pten/core/kernel_registry.h"
-#include "paddle/pten/kernels/hybird/eigen/fill.h"
-
-namespace pten {
-
-template <typename T>
-void FullLike(const CUDAContext& dev_ctx, const Scalar& val, DenseTensor* out) {
-  auto value = val.to<float>();
-  using CommonType = typename std::common_type<
-      float,
-      typename std::conditional<
-          std::is_same<T, paddle::platform::float16>::value,
-          float,
-          T>::type>::type;
-
-  auto common_type_value = static_cast<CommonType>(value);
-
-  PADDLE_ENFORCE_EQ(
-      (common_type_value >=
-       static_cast<CommonType>(std::numeric_limits<T>::lowest())) &&
-          (common_type_value <=
-           static_cast<CommonType>(std::numeric_limits<T>::max())),
-      true,
-      paddle::platform::errors::InvalidArgument(
-          "The filled value is out of range for target type, "
-          "current kernel type is %s, the range should between %f "
-          "and %f, but now value is %f.",
-          typeid(T).name(),
-          static_cast<CommonType>(std::numeric_limits<T>::lowest()),
-          static_cast<CommonType>(std::numeric_limits<T>::max()),
-          static_cast<float>(value)));
-
-  eigen::fill<CUDAContext, T>(dev_ctx, out, val.to<float>());
-}
-
-template <typename T>
-void Full(const CUDAContext& dev_ctx,
-          const ScalarArray& shape,
-          const Scalar& val,
-          DenseTensor* out) {
-  out->Resize(paddle::framework::make_ddim(shape.GetData()));
-  eigen::fill<CUDAContext, T>(dev_ctx, out, val.to<T>());
-}
-
-}  // namespace pten
-
-PT_REGISTER_KERNEL(full_like,
-                   CUDA,
-                   ALL_LAYOUT,
-                   pten::FullLike,
-                   float,
-                   double,
-                   int,
-                   int64_t,
-                   bool,
-                   paddle::platform::float16) {}
-
-PT_REGISTER_KERNEL(full,
-                   CUDA,
-                   ALL_LAYOUT,
-                   pten::Full,
-                   float,
-                   double,
-                   uint8_t,
-                   int16_t,
-                   int,
-                   int64_t,
-                   bool,
-                   paddle::platform::float16,
-                   paddle::platform::complex<float>,
-                   paddle::platform::complex<double>) {}
--- a/paddle/pten/kernels/cuda/creation.h
+++ b/paddle/pten/kernels/cuda/creation.h
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-// CUDA and HIP use same api
-#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-
-#include "paddle/pten/backends/cuda/cuda_context.h"
-#include "paddle/pten/common/scalar.h"
-#include "paddle/pten/common/scalar_array.h"
-#include "paddle/pten/core/dense_tensor.h"
-
-namespace pten {
-
-template <typename T>
-void FullLike(const CUDAContext& dev_ctx, const Scalar& val, DenseTensor* out);
-
-template <typename T>
-void Full(const CUDAContext& dev_ctx,
-          const ScalarArray& shape,
-          const Scalar& val,
-          DenseTensor* out);
-
-}  // namespace pten
-
-#endif
--- a/paddle/pten/kernels/eigen/CMakeLists.txt
+++ b/paddle/pten/kernels/eigen/CMakeLists.txt
 if(WITH_GPU)
  nv_library(scale_kernel_eigen SRCS scale_kernel.cc DEPS dense_tensor kernel_context kernel_factory eigen_function)
+  nv_library(full_kernel_eigen SRCS full_kernel.cc full_kernel.cu DEPS kernel_context kernel_factory dense_tensor eigen_function)
 elseif(WITH_ROCM)
  hip_library(scale_kernel_eigen SRCS scale_kernel.cc DEPS dense_tensor kernel_context kernel_factory eigen_function)
+  hip_library(full_kernel_eigen SRCS full_kernel.cc full_kernel.cu DEPS kernel_context kernel_factory dense_tensor eigen_function)
 else()
  cc_library(scale_kernel_eigen SRCS scale_kernel.cc DEPS dense_tensor kernel_context kernel_factory eigen_function)
+  cc_library(full_kernel_eigen SRCS full_kernel.cc DEPS dense_tensor kernel_context kernel_factory eigen_function)
 endif()
--- a/paddle/pten/kernels/hybird/eigen/fill.h
+++ b/paddle/pten/kernels/hybird/eigen/fill.h
@@ -14,21 +14,60 @@ limitations under the License. */

 #pragma once

+#include "paddle/pten/common/scalar.h"
+#include "paddle/pten/common/scalar_array.h"
 #include "paddle/pten/core/dense_tensor.h"
+
 #include "paddle/pten/kernels/hybird/eigen/common.h"

 // See Note [ Why still include the fluid headers? ]
 #include "paddle/fluid/operators/eigen/eigen_function.h"

 namespace pten {
-namespace eigen {

 template <typename DeviceContext, typename T, typename VType>
-void fill(const DeviceContext& context, DenseTensor* tensor, VType val) {
+void fill_(const DeviceContext& context, DenseTensor* tensor, VType val) {
  tensor->mutable_data<T>();
  auto t = pten::EigenVector<T>::Flatten(*tensor);
  t.device(*context.eigen_device()) = t.constant(static_cast<T>(val));
 }

-}  // namespace eigen
+template <typename T, typename ContextT>
+void Full(const ContextT& dev_ctx,
+          const ScalarArray& shape,
+          const Scalar& val,
+          DenseTensor* out) {
+  out->Resize(paddle::framework::make_ddim(shape.GetData()));
+  fill_<ContextT, T>(dev_ctx, out, val.to<T>());
+}
+
+template <typename T, typename ContextT>
+void FullLike(const ContextT& dev_ctx, const Scalar& val, DenseTensor* out) {
+  auto value = val.to<float>();
+  using CommonType = typename std::common_type<
+      float,
+      typename std::conditional<
+          std::is_same<T, paddle::platform::float16>::value,
+          float,
+          T>::type>::type;
+
+  auto common_type_value = static_cast<CommonType>(value);
+
+  PADDLE_ENFORCE_EQ(
+      (common_type_value >=
+       static_cast<CommonType>(std::numeric_limits<T>::lowest())) &&
+          (common_type_value <=
+           static_cast<CommonType>(std::numeric_limits<T>::max())),
+      true,
+      paddle::platform::errors::InvalidArgument(
+          "The filled value is out of range for target type, "
+          "current kernel type is %s, the range should between %f "
+          "and %f, but now value is %f.",
+          typeid(T).name(),
+          static_cast<CommonType>(std::numeric_limits<T>::lowest()),
+          static_cast<CommonType>(std::numeric_limits<T>::max()),
+          static_cast<float>(value)));
+  fill_<ContextT, T>(dev_ctx, out, value);
+}
+
 }  // namespace pten
--- a/paddle/pten/kernels/eigen/full_kernel.cc
+++ b/paddle/pten/kernels/eigen/full_kernel.cc
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/pten/kernels/full_kernel.h"
+#include "paddle/pten/kernels/eigen/full.h"
+
+#include "paddle/pten/core/kernel_registry.h"
+
+#include "paddle/pten/backends/all_context.h"
+
+PT_REGISTER_CTX_KERNEL(full,
+                       CPU,
+                       ALL_LAYOUT,
+                       pten::Full,
+                       float,
+                       double,
+                       uint8_t,
+                       int16_t,
+                       int,
+                       int64_t,
+                       bool,
+                       paddle::platform::float16,
+                       paddle::platform::bfloat16,
+                       paddle::platform::complex<float>,
+                       paddle::platform::complex<double>) {}
+
+PT_REGISTER_CTX_KERNEL(full_like,
+                       CPU,
+                       ALL_LAYOUT,
+                       pten::FullLike,
+                       float,
+                       double,
+                       int,
+                       int64_t,
+                       bool,
+                       paddle::platform::float16) {}
--- a/paddle/pten/kernels/eigen/full_kernel.cu
+++ b/paddle/pten/kernels/eigen/full_kernel.cu
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/pten/kernels/eigen/full.h"
+#include "paddle/pten/kernels/full_kernel.h"
+
+#include "paddle/pten/core/kernel_registry.h"
+
+#include "paddle/pten/backends/all_context.h"
+
+PT_REGISTER_CTX_KERNEL(full,
+                       CUDA,
+                       ALL_LAYOUT,
+                       pten::Full,
+                       float,
+                       double,
+                       uint8_t,
+                       int16_t,
+                       int,
+                       int64_t,
+                       bool,
+                       paddle::platform::float16,
+                       paddle::platform::complex<float>,
+                       paddle::platform::complex<double>) {}
+
+PT_REGISTER_CTX_KERNEL(full_like,
+                       CUDA,
+                       ALL_LAYOUT,
+                       pten::FullLike,
+                       float,
+                       double,
+                       int,
+                       int64_t,
+                       bool,
+                       paddle::platform::float16) {}
--- a/paddle/pten/kernels/cpu/creation.h
+++ b/paddle/pten/kernels/cpu/creation.h
@@ -21,13 +21,13 @@

 namespace pten {

-template <typename T>
-void FullLike(const CPUContext& dev_ctx, const Scalar& val, DenseTensor* out);
-
-template <typename T>
-void Full(const CPUContext& dev_ctx,
+template <typename T, typename ContextT>
+void Full(const ContextT& dev_ctx,
          const ScalarArray& shape,
          const Scalar& val,
          DenseTensor* out);

+template <typename T, typename ContextT>
+void FullLike(const ContextT& dev_ctx, const Scalar& val, DenseTensor* out);
+
 }  // namespace pten