[PTen] Unify device context entrance in pten part 2 (#38182)

* unify device context entrance * move all_context include to header * polish cmake relay for device_context * fix npu compile failed * fix npu compile failed

[PTen] Unify device context entrance in pten part 2 (#38182)
* unify device context entrance * move all_context include to header * polish cmake relay for device_context * fix npu compile failed * fix npu compile failed
e02537f9 · Chen Weihang · GitHub · 55509ae7 · e02537f9 · e02537f9
17 changed file
--- a/paddle/pten/kernels/cpu/creation.h
+++ b/paddle/pten/kernels/cpu/creation.h
@@ -14,16 +14,13 @@
 #pragma once
+#include "paddle/pten/backends/cpu/cpu_context.h"
 #include "paddle/pten/common/scalar.h"
 #include "paddle/pten/common/scalar_array.h"
 #include "paddle/pten/core/dense_tensor.h"
-#include "paddle/fluid/platform/device_context.h"
 namespace pten {
-using CPUContext = paddle::platform::CPUDeviceContext;
 template <typename T>
 void FullLike(const CPUContext& dev_ctx, const Scalar& val, DenseTensor* out);

--- a/paddle/pten/kernels/cpu/linalg.h
+++ b/paddle/pten/kernels/cpu/linalg.h
@@ -14,6 +14,7 @@
 #pragma once
+#include "paddle/pten/backends/cpu/cpu_context.h"
 #include "paddle/pten/core/dense_tensor.h"
 // See Note [ Why still include the fluid headers? ]
@@ -21,8 +22,6 @@
 namespace pten {
-using CPUContext = paddle::platform::CPUDeviceContext;
 template <typename T>
 void Dot(const CPUContext& dev_ctx,
         const DenseTensor& x,

--- a/paddle/pten/kernels/cpu/manipulation.h
+++ b/paddle/pten/kernels/cpu/manipulation.h
@@ -14,17 +14,13 @@ limitations under the License. */
 #pragma once
+#include "paddle/pten/backends/cpu/cpu_context.h"
 #include "paddle/pten/common/scalar_array.h"
 #include "paddle/pten/core/dense_tensor.h"
 #include "paddle/pten/core/kernel_registry.h"
-// See Note [ Why still include the fluid headers? ]
-#include "paddle/fluid/platform/device_context.h"
 namespace pten {
-using CPUContext = paddle::platform::CPUDeviceContext;
 template <typename T>
 void Flatten(const CPUContext& dev_ctx,
             const DenseTensor& x,

--- a/paddle/pten/kernels/cpu/math.h
+++ b/paddle/pten/kernels/cpu/math.h
@@ -14,17 +14,13 @@ limitations under the License. */
 #pragma once
+#include "paddle/pten/backends/cpu/cpu_context.h"
 #include "paddle/pten/common/scalar.h"
 #include "paddle/pten/core/dense_tensor.h"
 #include "paddle/pten/core/kernel_registry.h"
-// See Note [ Why still include the fluid headers? ]
-#include "paddle/fluid/platform/device_context.h"
 namespace pten {
-using CPUContext = paddle::platform::CPUDeviceContext;
 template <typename T>
 void Sign(const CPUContext& dev_ctx, const DenseTensor& x, DenseTensor* out);

--- a/paddle/pten/kernels/cpu/utils.h
+++ b/paddle/pten/kernels/cpu/utils.h
@@ -14,15 +14,12 @@ limitations under the License. */
 #pragma once
+#include "paddle/pten/backends/cpu/cpu_context.h"
 #include "paddle/pten/core/dense_tensor.h"
 #include "paddle/pten/core/kernel_registry.h"
-// See Note [ Why still include the fluid headers? ]
-#include "paddle/fluid/platform/device_context.h"
 namespace pten {
-using CPUContext = paddle::platform::CPUDeviceContext;
 void Copy(const CPUContext& dev_ctx,
          const DenseTensor& src,
          bool blocking,

--- a/paddle/pten/kernels/cuda/creation.h
+++ b/paddle/pten/kernels/cuda/creation.h
@@ -17,16 +17,13 @@
 // CUDA and HIP use same api
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+#include "paddle/pten/backends/cuda/cuda_context.h"
 #include "paddle/pten/common/scalar.h"
 #include "paddle/pten/common/scalar_array.h"
 #include "paddle/pten/core/dense_tensor.h"
-#include "paddle/fluid/platform/device_context.h"
 namespace pten {
-using CUDAContext = paddle::platform::CUDADeviceContext;
 template <typename T>
 void FullLike(const CUDAContext& dev_ctx, const Scalar& val, DenseTensor* out);

--- a/paddle/pten/kernels/cuda/linalg.h
+++ b/paddle/pten/kernels/cuda/linalg.h
@@ -17,15 +17,11 @@
 // CUDA and HIP use same api
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+#include "paddle/pten/backends/cuda/cuda_context.h"
 #include "paddle/pten/core/dense_tensor.h"
-// See Note [ Why still include the fluid headers? ]
-#include "paddle/fluid/platform/device_context.h"
 namespace pten {
-using CUDAContext = paddle::platform::CUDADeviceContext;
 template <typename T>
 void Dot(const CUDAContext& dev_ctx,
         const DenseTensor& x,

--- a/paddle/pten/kernels/cuda/manipulation.h
+++ b/paddle/pten/kernels/cuda/manipulation.h
@@ -17,17 +17,13 @@
 // CUDA and HIP use same api
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+#include "paddle/pten/backends/cuda/cuda_context.h"
 #include "paddle/pten/common/scalar_array.h"
 #include "paddle/pten/core/dense_tensor.h"
 #include "paddle/pten/core/kernel_registry.h"
-// See Note [ Why still include the fluid headers? ]
-#include "paddle/fluid/platform/device_context.h"
 namespace pten {
-using CUDAContext = paddle::platform::CUDADeviceContext;
 template <typename T>
 void Flatten(const CUDAContext& dev_ctx,
             const DenseTensor& x,

--- a/paddle/pten/kernels/cuda/math.h
+++ b/paddle/pten/kernels/cuda/math.h
@@ -17,16 +17,12 @@ limitations under the License. */
 // CUDA and HIP use same api
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+#include "paddle/pten/backends/cuda/cuda_context.h"
 #include "paddle/pten/common/scalar.h"
 #include "paddle/pten/core/dense_tensor.h"
-// See Note [ Why still include the fluid headers? ]
-#include "paddle/fluid/platform/device_context.h"
 namespace pten {
-using CUDAContext = paddle::platform::CUDADeviceContext;
 template <typename T>
 void Sign(const CUDAContext& dev_ctx, const DenseTensor& x, DenseTensor* out);

--- a/paddle/pten/kernels/cuda/utils.h
+++ b/paddle/pten/kernels/cuda/utils.h
@@ -17,15 +17,12 @@ limitations under the License. */
 // CUDA and HIP use same api
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+#include "paddle/pten/backends/cuda/cuda_context.h"
 #include "paddle/pten/core/dense_tensor.h"
 #include "paddle/pten/core/kernel_registry.h"
-// See Note [ Why still include the fluid headers? ]
-#include "paddle/fluid/platform/device_context.h"
 namespace pten {
-using CUDAContext = paddle::platform::CUDADeviceContext;
 void Copy(const CUDAContext& dev_ctx,
          const DenseTensor& src,
          bool blocking,

--- a/paddle/pten/kernels/hybird/CMakeLists.txt
+++ b/paddle/pten/kernels/hybird/CMakeLists.txt
@@ -2,9 +2,9 @@ add_subdirectory(eigen)
 add_subdirectory(blas)
 add_subdirectory(general)
-cc_library(pten_transpose_cpu SRCS transpose.cc DEPS dense_tensor device_context)
+cc_library(pten_transpose_cpu SRCS transpose.cc DEPS dense_tensor pten_context)
 if(WITH_GPU)
-  nv_library(pten_transpose_cuda SRCS transpose.cu DEPS dense_tensor malloc device_context)
+  nv_library(pten_transpose_cuda SRCS transpose.cu DEPS dense_tensor malloc pten_context)
 elseif(WITH_ROCM)
-  hip_library(pten_transpose_cuda SRCS transpose.cu DEPS dense_tensor malloc device_context)
+  hip_library(pten_transpose_cuda SRCS transpose.cu DEPS dense_tensor malloc pten_context)
 endif()
--- a/paddle/pten/kernels/hybird/cuda/cast_kernel_impl.h
+++ b/paddle/pten/kernels/hybird/cuda/cast_kernel_impl.h
@@ -15,13 +15,13 @@
 #pragma once
 #include "paddle/fluid/platform/device/gpu/gpu_helper.h"
 #include "paddle/fluid/platform/float16.h"
+#include "paddle/pten/backends/cuda/cuda_context.h"
 #include "paddle/pten/core/dense_tensor.h"
 #include "paddle/fluid/platform/aligned_vector.h"
 #include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
 namespace pten {
 namespace detail {
-using CUDAContext = paddle::platform::CUDADeviceContext;
 template <typename InT, typename OutT, int VecSize>
 __global__ void VecCastCUDAKernel(const InT* in, const int64_t N, OutT* out) {

--- a/paddle/pten/kernels/hybird/cuda/reduce/reduce.h
+++ b/paddle/pten/kernels/hybird/cuda/reduce/reduce.h
@@ -17,16 +17,13 @@
 // CUDA and HIP use same api
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+#include "paddle/pten/backends/cuda/cuda_context.h"
 #include "paddle/pten/common/scalar.h"
 #include "paddle/pten/core/dense_tensor.h"
-#include "paddle/fluid/platform/device_context.h"
 #include "paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h"
 namespace pten {
-using CUDAContext = paddle::platform::CUDADeviceContext;
 static inline std::vector<int64_t> GetReduceDim(
    const std::vector<int64_t>& dims, int dim_size, bool reduce_all) {
  std::vector<int64_t> reduce_dims;

--- a/paddle/pten/kernels/hybird/general/elementwise_base.h
+++ b/paddle/pten/kernels/hybird/general/elementwise_base.h
@@ -15,13 +15,13 @@ limitations under the License. */
 #pragma once
 #include "paddle/fluid/platform/transform.h"
+#include "paddle/pten/backends/all_context.h"
 #include "paddle/pten/core/dense_tensor.h"
 namespace pten {
 namespace general {
 using DDim = paddle::framework::DDim;
-using CPUContext = paddle::platform::CPUDeviceContext;
 template <typename T, typename DeviceContext>
 class RowwiseTransformIterator;
@@ -131,7 +131,6 @@ class MidWiseTransformIterator<T, CPUContext>
 };
 #if defined(__NVCC__) || defined(__HIPCC__)
-using CUDAContext = paddle::platform::CUDADeviceContext;
 template <typename T>
 class RowwiseTransformIterator<T, CUDAContext>
    : public thrust::iterator_adaptor<RowwiseTransformIterator<T, CUDAContext>,

--- a/paddle/pten/kernels/hybird/transpose.cc
+++ b/paddle/pten/kernels/hybird/transpose.cc
@@ -14,16 +14,16 @@
 #include "paddle/pten/kernels/hybird/transpose.h"
 #include "paddle/fluid/framework/ddim.h"
+#include "paddle/pten/backends/cpu/cpu_context.h"
 #include "paddle/pten/core/dense_tensor.h"
 // See Note [ Why still include the fluid headers? ]
 #include "paddle/fluid/platform/bfloat16.h"
 #include "paddle/fluid/platform/complex.h"
-#include "paddle/fluid/platform/device_context.h"
 #include "paddle/fluid/platform/float16.h"
 namespace pten {
 namespace math {
-using CPUContext = paddle::platform::CPUDeviceContext;
 template <typename T>
 struct TransposeNormal<CPUContext, T> {

--- a/paddle/pten/kernels/hybird/transpose.cu
+++ b/paddle/pten/kernels/hybird/transpose.cu
@@ -14,6 +14,7 @@
 #include "paddle/fluid/framework/ddim.h"
 #include "paddle/fluid/memory/memcpy.h"
+#include "paddle/pten/backends/cuda/cuda_context.h"
 #include "paddle/pten/core/dense_tensor.h"
 #include "paddle/pten/kernels/hybird/math/cast_func.h"
 #include "paddle/pten/kernels/hybird/transpose.h"
@@ -21,13 +22,11 @@
 // See Note [ Why still include the fluid headers? ]
 #include "paddle/fluid/platform/bfloat16.h"
 #include "paddle/fluid/platform/complex.h"
-#include "paddle/fluid/platform/device_context.h"
 #include "paddle/fluid/platform/float16.h"
 namespace pten {
 namespace math {
-using CUDAContext = paddle::platform::CUDADeviceContext;
 #define REINTERPRET(T, DST_PTR, SRC_PTR) \
  T* DST_PTR = reinterpret_cast<T*>(SRC_PTR)

--- a/paddle/pten/kernels/xpu/manipulation.h
+++ b/paddle/pten/kernels/xpu/manipulation.h
@@ -16,17 +16,13 @@ limitations under the License. */
 #ifdef PADDLE_WITH_XPU
+#include "paddle/pten/backends/xpu/xpu_context.h"
 #include "paddle/pten/common/scalar_array.h"
 #include "paddle/pten/core/dense_tensor.h"
 #include "paddle/pten/core/kernel_registry.h"
-// See Note [ Why still include the fluid headers? ]
-#include "paddle/fluid/platform/device_context.h"
 namespace pten {
-using XPUContext = paddle::platform::XPUDeviceContext;
 template <typename T>
 void Flatten(const XPUContext& dev_ctx,
             const DenseTensor& x,