[PTen] Remove eigen and blas directory (#38291)

* remove eigen and blas dir * fix declare error

[PTen] Remove eigen and blas directory (#38291)
* remove eigen and blas dir * fix declare error
d9fcdc3a · Chen Weihang · GitHub · 0d12aa64 · d9fcdc3a · d9fcdc3a
14 changed file
--- a/paddle/pten/CMakeLists.txt
+++ b/paddle/pten/CMakeLists.txt
@@ -24,11 +24,10 @@ add_subdirectory(tests)

 # make an unity target for compile deps
 set(PTEN_DEPS convert_utils dense_tensor pten_context kernel_factory kernel_context)
-set(PTEN_DEPS ${PTEN_DEPS} scale_kernel_eigen full_kernel_eigen)
-set(PTEN_DEPS ${PTEN_DEPS} math_cpu linalg_cpu manipulation_cpu conj_kernel_cpu)
+set(PTEN_DEPS ${PTEN_DEPS} math_cpu linalg_cpu manipulation_cpu conj_kernel_cpu scale_kernel_cpu full_kernel_cpu)
 set(PTEN_DEPS ${PTEN_DEPS} nary unary binary)
 if(WITH_GPU OR WITH_ROCM)
-  set(PTEN_DEPS ${PTEN_DEPS} math_cuda linalg_cuda manipulation_cuda conj_kernel_cuda)
+  set(PTEN_DEPS ${PTEN_DEPS} math_cuda linalg_cuda manipulation_cuda conj_kernel_cuda scale_kernel_cuda full_kernel_cuda)
 endif()
 if(WITH_XPU)
  set(PTEN_DEPS ${PTEN_DEPS} manipulation_xpu)

--- a/paddle/pten/api/lib/kernel_declare.h
+++ b/paddle/pten/api/lib/kernel_declare.h
@@ -32,6 +32,7 @@ PT_DECLARE_KERNEL(full_like, CUDA, ALL_LAYOUT);
 PT_DECLARE_KERNEL(dot, CUDA, ALL_LAYOUT);
 PT_DECLARE_KERNEL(flatten, CUDA, ALL_LAYOUT);
 PT_DECLARE_KERNEL(sign, CUDA, ALL_LAYOUT);
+PT_DECLARE_KERNEL(scale, CUDA, ALL_LAYOUT);
 PT_DECLARE_KERNEL(conj, CUDA, ALL_LAYOUT);
 #endif


--- a/paddle/pten/kernels/CMakeLists.txt
+++ b/paddle/pten/kernels/CMakeLists.txt
@@ -3,14 +3,6 @@ add_subdirectory(primitive)
 # pten hybird functors and functions called by kernels
 add_subdirectory(hybird)

-# pten kernels for different backends
-# NOTE(chenweihang): We need to increase the compilation option of WITH_EIGEN,
-# which will support splitting eigen at compile time on demand in the future
-add_subdirectory(eigen)
-# NOTE(chenweihang): We need to increase the compilation option of WITH_BLAS,
-# which will support splitting eigen at compile time on demand in the future,
-# and if necessary, blas can be split into openblas and cublas
-add_subdirectory(blas)
 add_subdirectory(cpu)
 if(WITH_GPU OR WITH_ROCM)
  # NOTE(chenweihang): if hip can split from cuda impl, we should add hip dir

--- a/paddle/pten/kernels/blas/CMakeLists.txt
+++ b/paddle/pten/kernels/blas/CMakeLists.txt
--- a/paddle/pten/kernels/cpu/CMakeLists.txt
+++ b/paddle/pten/kernels/cpu/CMakeLists.txt
@@ -2,4 +2,6 @@ cc_library(math_cpu SRCS math.cc DEPS dense_tensor kernel_context kernel_factory
 cc_library(linalg_cpu SRCS linalg.cc DEPS dense_tensor kernel_context kernel_factory)
 cc_library(utils_cpu SRCS utils.cc DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
 cc_library(manipulation_cpu SRCS manipulation.cc DEPS dense_tensor kernel_context kernel_factory utils_cpu unary)
+cc_library(scale_kernel_cpu SRCS scale_kernel.cc DEPS dense_tensor kernel_context kernel_factory eigen_function)
+cc_library(full_kernel_cpu SRCS full_kernel.cc DEPS dense_tensor kernel_context kernel_factory eigen_function)
 cc_library(conj_kernel_cpu SRCS conj_kernel.cc DEPS dense_tensor kernel_context kernel_factory)
--- a/paddle/pten/kernels/eigen/full_kernel.cc
+++ b/paddle/pten/kernels/eigen/full_kernel.cc
@@ -13,11 +13,10 @@ See the License for the specific language governing permissions and
 limitations under the License. */

 #include "paddle/pten/kernels/full_kernel.h"
-#include "paddle/pten/kernels/eigen/full.h"

+#include "paddle/pten/backends/cpu/cpu_context.h"
 #include "paddle/pten/core/kernel_registry.h"
-
-#include "paddle/pten/backends/all_context.h"
+#include "paddle/pten/kernels/impl/full_kernel_impl.h"

 PT_REGISTER_CTX_KERNEL(full,
                       CPU,

--- a/paddle/pten/kernels/cpu/scale_kernel.cc
+++ b/paddle/pten/kernels/cpu/scale_kernel.cc
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/pten/kernels/scale_kernel.h"
+#include "paddle/pten/kernels/impl/scale_kernel_impl.h"
+
+#include "paddle/pten/backends/cpu/cpu_context.h"
+#include "paddle/pten/core/kernel_registry.h"
+
+// See Note [ Why still include the fluid headers? ]
+#include "paddle/fluid/platform/bfloat16.h"
+
+PT_REGISTER_CTX_KERNEL(scale,
+                       CPU,
+                       ALL_LAYOUT,
+                       pten::Scale,
+                       float,
+                       double,
+                       paddle::platform::bfloat16,
+                       uint8_t,
+                       int8_t,
+                       int16_t,
+                       int,
+                       int64_t) {}
--- a/paddle/pten/kernels/cuda/CMakeLists.txt
+++ b/paddle/pten/kernels/cuda/CMakeLists.txt
@@ -3,11 +3,15 @@ if(WITH_GPU)
  nv_library(linalg_cuda SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
  nv_library(utils_cuda SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
  nv_library(manipulation_cuda SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_cuda unary)
+  nv_library(scale_kernel_cuda SRCS scale_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function)
+  nv_library(full_kernel_cuda SRCS full_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function)
  nv_library(conj_kernel_cuda SRCS conj_kernel.cu DEPS dense_tensor kernel_context kernel_factory)
 elseif(WITH_ROCM)
  hip_library(math_cuda SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_cuda)
  hip_library(linalg_cuda SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
  hip_library(utils_cuda SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
  hip_library(manipulation_cuda SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_cuda unary)
+  hip_library(scale_kernel_cuda SRCS scale_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function)
+  hip_library(full_kernel_cuda SRCS full_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function)
  hip_library(conj_kernel_cuda SRCS conj_kernel.cu DEPS dense_tensor kernel_context kernel_factory)
 endif()
--- a/paddle/pten/kernels/eigen/full_kernel.cu
+++ b/paddle/pten/kernels/eigen/full_kernel.cu
@@ -12,12 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#include "paddle/pten/kernels/eigen/full.h"
 #include "paddle/pten/kernels/full_kernel.h"

+#include "paddle/pten/backends/cuda/cuda_context.h"
 #include "paddle/pten/core/kernel_registry.h"
-
-#include "paddle/pten/backends/all_context.h"
+#include "paddle/pten/kernels/impl/full_kernel_impl.h"

 PT_REGISTER_CTX_KERNEL(full,
                       CUDA,

--- a/paddle/pten/kernels/cuda/scale_kernel.cu
+++ b/paddle/pten/kernels/cuda/scale_kernel.cu
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/pten/kernels/scale_kernel.h"
+
+#include "paddle/pten/backends/cuda/cuda_context.h"
+#include "paddle/pten/core/kernel_registry.h"
+#include "paddle/pten/kernels/impl/scale_kernel_impl.h"
+
+// See Note [ Why still include the fluid headers? ]
+#include "paddle/fluid/platform/float16.h"
+
+PT_REGISTER_CTX_KERNEL(scale,
+                       CUDA,
+                       ALL_LAYOUT,
+                       pten::Scale,
+                       float,
+                       double,
+                       paddle::platform::float16,
+                       uint8_t,
+                       int8_t,
+                       int16_t,
+                       int,
+                       int64_t) {}
--- a/paddle/pten/kernels/eigen/CMakeLists.txt
+++ b/paddle/pten/kernels/eigen/CMakeLists.txt
-if(WITH_GPU)
-  nv_library(scale_kernel_eigen SRCS scale_kernel.cc DEPS dense_tensor kernel_context kernel_factory eigen_function)
-  nv_library(full_kernel_eigen SRCS full_kernel.cc full_kernel.cu DEPS kernel_context kernel_factory dense_tensor eigen_function)
-elseif(WITH_ROCM)
-  hip_library(scale_kernel_eigen SRCS scale_kernel.cc DEPS dense_tensor kernel_context kernel_factory eigen_function)
-  hip_library(full_kernel_eigen SRCS full_kernel.cc full_kernel.cu DEPS kernel_context kernel_factory dense_tensor eigen_function)
-else()
-  cc_library(scale_kernel_eigen SRCS scale_kernel.cc DEPS dense_tensor kernel_context kernel_factory eigen_function)
-  cc_library(full_kernel_eigen SRCS full_kernel.cc DEPS dense_tensor kernel_context kernel_factory eigen_function)
-endif()
--- a/paddle/pten/kernels/eigen/full.h
+++ b/paddle/pten/kernels/eigen/full.h
@@ -17,7 +17,6 @@ limitations under the License. */
 #include "paddle/pten/common/scalar.h"
 #include "paddle/pten/common/scalar_array.h"
 #include "paddle/pten/core/dense_tensor.h"
-
 #include "paddle/pten/kernels/hybird/eigen/common.h"

 // See Note [ Why still include the fluid headers? ]

--- a/paddle/pten/kernels/eigen/scale_kernel.cc
+++ b/paddle/pten/kernels/eigen/scale_kernel.cc
@@ -12,16 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#include "paddle/pten/kernels/scale_kernel.h"
+#pragma once

-#include "paddle/pten/core/kernel_registry.h"
+#include "paddle/pten/common/scalar.h"
+#include "paddle/pten/core/dense_tensor.h"
 #include "paddle/pten/kernels/hybird/eigen/common.h"

 // See Note [ Why still include the fluid headers? ]
 #include "paddle/fluid/operators/eigen/eigen_function.h"
-#include "paddle/fluid/platform/bfloat16.h"
-#include "paddle/fluid/platform/device_context.h"
-#include "paddle/fluid/platform/float16.h"

 namespace pten {

@@ -50,35 +48,3 @@ void Scale(const ContextT& dev_ctx,
 }

 }  // namespace pten
-
-// TODO(chenweihang): Use EigenContext to specialize the ContextT parameter,
-// and only register the backend as Eigen's kernel during registration,
-// instead of using macros to register the CPU and CUDA kernels separately
-
-PT_REGISTER_CTX_KERNEL(scale,
-                       CPU,
-                       ALL_LAYOUT,
-                       pten::Scale,
-                       float,
-                       double,
-                       paddle::platform::bfloat16,
-                       uint8_t,
-                       int8_t,
-                       int16_t,
-                       int,
-                       int64_t) {}
-
-#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-PT_REGISTER_CTX_KERNEL(scale,
-                       CUDA,
-                       ALL_LAYOUT,
-                       pten::Scale,
-                       float,
-                       double,
-                       paddle::platform::float16,
-                       uint8_t,
-                       int8_t,
-                       int16_t,
-                       int,
-                       int64_t) {}
-#endif
--- a/paddle/pten/tests/api/CMakeLists.txt
+++ b/paddle/pten/tests/api/CMakeLists.txt
@@ -21,5 +21,5 @@ cc_test(test_to_api SRCS test_to_api.cc DEPS pten_tensor pten_api pten_api_utils
 cc_test(test_slice_api SRCS test_slice_api.cc DEPS pten_tensor pten_api pten_api_utils)
 cc_test(test_sum_api SRCS test_sum_api.cc DEPS pten_tensor pten_api pten_api_utils)
 cc_test(test_scale_api SRCS test_scale_api.cc DEPS pten_tensor pten_api pten_api_utils)
-cc_test(test_scale_benchmark SRCS test_scale_benchmark.cc DEPS pten_tensor pten_api pten_api_utils scale_kernel_eigen)
+cc_test(test_scale_benchmark SRCS test_scale_benchmark.cc DEPS pten_tensor pten_api pten_api_utils)
 cc_test(test_conj_api SRCS test_conj_api.cc DEPS pten_tensor pten_api pten_api_utils)