From e02537f9f7dcd51ca3ad8db556c67f57500f126f Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Thu, 16 Dec 2021 00:41:02 -0600 Subject: [PATCH] [PTen] Unify device context entrance in pten part 2 (#38182) * unify device context entrance * move all_context include to header * polish cmake relay for device_context * fix npu compile failed * fix npu compile failed --- paddle/pten/kernels/cpu/creation.h | 5 +---- paddle/pten/kernels/cpu/linalg.h | 3 +-- paddle/pten/kernels/cpu/manipulation.h | 6 +----- paddle/pten/kernels/cpu/math.h | 6 +----- paddle/pten/kernels/cpu/utils.h | 5 +---- paddle/pten/kernels/cuda/creation.h | 5 +---- paddle/pten/kernels/cuda/linalg.h | 6 +----- paddle/pten/kernels/cuda/manipulation.h | 6 +----- paddle/pten/kernels/cuda/math.h | 6 +----- paddle/pten/kernels/cuda/utils.h | 5 +---- paddle/pten/kernels/hybird/CMakeLists.txt | 6 +++--- paddle/pten/kernels/hybird/cuda/cast_kernel_impl.h | 2 +- paddle/pten/kernels/hybird/cuda/reduce/reduce.h | 5 +---- paddle/pten/kernels/hybird/general/elementwise_base.h | 3 +-- paddle/pten/kernels/hybird/transpose.cc | 4 ++-- paddle/pten/kernels/hybird/transpose.cu | 3 +-- paddle/pten/kernels/xpu/manipulation.h | 6 +----- 17 files changed, 20 insertions(+), 62 deletions(-) diff --git a/paddle/pten/kernels/cpu/creation.h b/paddle/pten/kernels/cpu/creation.h index 9d53f78026a..75d99c0794c 100644 --- a/paddle/pten/kernels/cpu/creation.h +++ b/paddle/pten/kernels/cpu/creation.h @@ -14,16 +14,13 @@ #pragma once +#include "paddle/pten/backends/cpu/cpu_context.h" #include "paddle/pten/common/scalar.h" #include "paddle/pten/common/scalar_array.h" #include "paddle/pten/core/dense_tensor.h" -#include "paddle/fluid/platform/device_context.h" - namespace pten { -using CPUContext = paddle::platform::CPUDeviceContext; - template void FullLike(const CPUContext& dev_ctx, const Scalar& val, DenseTensor* out); diff --git a/paddle/pten/kernels/cpu/linalg.h b/paddle/pten/kernels/cpu/linalg.h index a954033866f..29c6cd16cf8 100644 --- a/paddle/pten/kernels/cpu/linalg.h +++ b/paddle/pten/kernels/cpu/linalg.h @@ -14,6 +14,7 @@ #pragma once +#include "paddle/pten/backends/cpu/cpu_context.h" #include "paddle/pten/core/dense_tensor.h" // See Note [ Why still include the fluid headers? ] @@ -21,8 +22,6 @@ namespace pten { -using CPUContext = paddle::platform::CPUDeviceContext; - template void Dot(const CPUContext& dev_ctx, const DenseTensor& x, diff --git a/paddle/pten/kernels/cpu/manipulation.h b/paddle/pten/kernels/cpu/manipulation.h index cc583547875..1a219dc79e6 100644 --- a/paddle/pten/kernels/cpu/manipulation.h +++ b/paddle/pten/kernels/cpu/manipulation.h @@ -14,17 +14,13 @@ limitations under the License. */ #pragma once +#include "paddle/pten/backends/cpu/cpu_context.h" #include "paddle/pten/common/scalar_array.h" #include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/kernel_registry.h" -// See Note [ Why still include the fluid headers? ] -#include "paddle/fluid/platform/device_context.h" - namespace pten { -using CPUContext = paddle::platform::CPUDeviceContext; - template void Flatten(const CPUContext& dev_ctx, const DenseTensor& x, diff --git a/paddle/pten/kernels/cpu/math.h b/paddle/pten/kernels/cpu/math.h index c53e659cf83..67a2feb4eef 100644 --- a/paddle/pten/kernels/cpu/math.h +++ b/paddle/pten/kernels/cpu/math.h @@ -14,17 +14,13 @@ limitations under the License. */ #pragma once +#include "paddle/pten/backends/cpu/cpu_context.h" #include "paddle/pten/common/scalar.h" #include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/kernel_registry.h" -// See Note [ Why still include the fluid headers? ] -#include "paddle/fluid/platform/device_context.h" - namespace pten { -using CPUContext = paddle::platform::CPUDeviceContext; - template void Sign(const CPUContext& dev_ctx, const DenseTensor& x, DenseTensor* out); diff --git a/paddle/pten/kernels/cpu/utils.h b/paddle/pten/kernels/cpu/utils.h index 527346f005f..93730692079 100644 --- a/paddle/pten/kernels/cpu/utils.h +++ b/paddle/pten/kernels/cpu/utils.h @@ -14,15 +14,12 @@ limitations under the License. */ #pragma once +#include "paddle/pten/backends/cpu/cpu_context.h" #include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/kernel_registry.h" -// See Note [ Why still include the fluid headers? ] -#include "paddle/fluid/platform/device_context.h" namespace pten { -using CPUContext = paddle::platform::CPUDeviceContext; - void Copy(const CPUContext& dev_ctx, const DenseTensor& src, bool blocking, diff --git a/paddle/pten/kernels/cuda/creation.h b/paddle/pten/kernels/cuda/creation.h index a8fa7dc84d4..72e8fbd0174 100644 --- a/paddle/pten/kernels/cuda/creation.h +++ b/paddle/pten/kernels/cuda/creation.h @@ -17,16 +17,13 @@ // CUDA and HIP use same api #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +#include "paddle/pten/backends/cuda/cuda_context.h" #include "paddle/pten/common/scalar.h" #include "paddle/pten/common/scalar_array.h" #include "paddle/pten/core/dense_tensor.h" -#include "paddle/fluid/platform/device_context.h" - namespace pten { -using CUDAContext = paddle::platform::CUDADeviceContext; - template void FullLike(const CUDAContext& dev_ctx, const Scalar& val, DenseTensor* out); diff --git a/paddle/pten/kernels/cuda/linalg.h b/paddle/pten/kernels/cuda/linalg.h index a6489efa72e..84f48ca609b 100644 --- a/paddle/pten/kernels/cuda/linalg.h +++ b/paddle/pten/kernels/cuda/linalg.h @@ -17,15 +17,11 @@ // CUDA and HIP use same api #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +#include "paddle/pten/backends/cuda/cuda_context.h" #include "paddle/pten/core/dense_tensor.h" -// See Note [ Why still include the fluid headers? ] -#include "paddle/fluid/platform/device_context.h" - namespace pten { -using CUDAContext = paddle::platform::CUDADeviceContext; - template void Dot(const CUDAContext& dev_ctx, const DenseTensor& x, diff --git a/paddle/pten/kernels/cuda/manipulation.h b/paddle/pten/kernels/cuda/manipulation.h index be935a045f9..165b08ad59a 100644 --- a/paddle/pten/kernels/cuda/manipulation.h +++ b/paddle/pten/kernels/cuda/manipulation.h @@ -17,17 +17,13 @@ // CUDA and HIP use same api #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +#include "paddle/pten/backends/cuda/cuda_context.h" #include "paddle/pten/common/scalar_array.h" #include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/kernel_registry.h" -// See Note [ Why still include the fluid headers? ] -#include "paddle/fluid/platform/device_context.h" - namespace pten { -using CUDAContext = paddle::platform::CUDADeviceContext; - template void Flatten(const CUDAContext& dev_ctx, const DenseTensor& x, diff --git a/paddle/pten/kernels/cuda/math.h b/paddle/pten/kernels/cuda/math.h index 37f61c51249..c9f2f04494f 100644 --- a/paddle/pten/kernels/cuda/math.h +++ b/paddle/pten/kernels/cuda/math.h @@ -17,16 +17,12 @@ limitations under the License. */ // CUDA and HIP use same api #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +#include "paddle/pten/backends/cuda/cuda_context.h" #include "paddle/pten/common/scalar.h" #include "paddle/pten/core/dense_tensor.h" -// See Note [ Why still include the fluid headers? ] -#include "paddle/fluid/platform/device_context.h" - namespace pten { -using CUDAContext = paddle::platform::CUDADeviceContext; - template void Sign(const CUDAContext& dev_ctx, const DenseTensor& x, DenseTensor* out); diff --git a/paddle/pten/kernels/cuda/utils.h b/paddle/pten/kernels/cuda/utils.h index d375c1cea23..bd29347e313 100644 --- a/paddle/pten/kernels/cuda/utils.h +++ b/paddle/pten/kernels/cuda/utils.h @@ -17,15 +17,12 @@ limitations under the License. */ // CUDA and HIP use same api #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +#include "paddle/pten/backends/cuda/cuda_context.h" #include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/kernel_registry.h" -// See Note [ Why still include the fluid headers? ] -#include "paddle/fluid/platform/device_context.h" namespace pten { -using CUDAContext = paddle::platform::CUDADeviceContext; - void Copy(const CUDAContext& dev_ctx, const DenseTensor& src, bool blocking, diff --git a/paddle/pten/kernels/hybird/CMakeLists.txt b/paddle/pten/kernels/hybird/CMakeLists.txt index c82cbd1ef9e..9d4d86f0674 100644 --- a/paddle/pten/kernels/hybird/CMakeLists.txt +++ b/paddle/pten/kernels/hybird/CMakeLists.txt @@ -2,9 +2,9 @@ add_subdirectory(eigen) add_subdirectory(blas) add_subdirectory(general) -cc_library(pten_transpose_cpu SRCS transpose.cc DEPS dense_tensor device_context) +cc_library(pten_transpose_cpu SRCS transpose.cc DEPS dense_tensor pten_context) if(WITH_GPU) - nv_library(pten_transpose_cuda SRCS transpose.cu DEPS dense_tensor malloc device_context) + nv_library(pten_transpose_cuda SRCS transpose.cu DEPS dense_tensor malloc pten_context) elseif(WITH_ROCM) - hip_library(pten_transpose_cuda SRCS transpose.cu DEPS dense_tensor malloc device_context) + hip_library(pten_transpose_cuda SRCS transpose.cu DEPS dense_tensor malloc pten_context) endif() diff --git a/paddle/pten/kernels/hybird/cuda/cast_kernel_impl.h b/paddle/pten/kernels/hybird/cuda/cast_kernel_impl.h index 1bf5bb288e8..54a584d78d2 100644 --- a/paddle/pten/kernels/hybird/cuda/cast_kernel_impl.h +++ b/paddle/pten/kernels/hybird/cuda/cast_kernel_impl.h @@ -15,13 +15,13 @@ #pragma once #include "paddle/fluid/platform/device/gpu/gpu_helper.h" #include "paddle/fluid/platform/float16.h" +#include "paddle/pten/backends/cuda/cuda_context.h" #include "paddle/pten/core/dense_tensor.h" #include "paddle/fluid/platform/aligned_vector.h" #include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" namespace pten { namespace detail { -using CUDAContext = paddle::platform::CUDADeviceContext; template __global__ void VecCastCUDAKernel(const InT* in, const int64_t N, OutT* out) { diff --git a/paddle/pten/kernels/hybird/cuda/reduce/reduce.h b/paddle/pten/kernels/hybird/cuda/reduce/reduce.h index 1e47726333b..c88965e6def 100644 --- a/paddle/pten/kernels/hybird/cuda/reduce/reduce.h +++ b/paddle/pten/kernels/hybird/cuda/reduce/reduce.h @@ -17,16 +17,13 @@ // CUDA and HIP use same api #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +#include "paddle/pten/backends/cuda/cuda_context.h" #include "paddle/pten/common/scalar.h" #include "paddle/pten/core/dense_tensor.h" - -#include "paddle/fluid/platform/device_context.h" #include "paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h" namespace pten { -using CUDAContext = paddle::platform::CUDADeviceContext; - static inline std::vector GetReduceDim( const std::vector& dims, int dim_size, bool reduce_all) { std::vector reduce_dims; diff --git a/paddle/pten/kernels/hybird/general/elementwise_base.h b/paddle/pten/kernels/hybird/general/elementwise_base.h index 8c8a0342f6a..827af86812c 100644 --- a/paddle/pten/kernels/hybird/general/elementwise_base.h +++ b/paddle/pten/kernels/hybird/general/elementwise_base.h @@ -15,13 +15,13 @@ limitations under the License. */ #pragma once #include "paddle/fluid/platform/transform.h" +#include "paddle/pten/backends/all_context.h" #include "paddle/pten/core/dense_tensor.h" namespace pten { namespace general { using DDim = paddle::framework::DDim; -using CPUContext = paddle::platform::CPUDeviceContext; template class RowwiseTransformIterator; @@ -131,7 +131,6 @@ class MidWiseTransformIterator }; #if defined(__NVCC__) || defined(__HIPCC__) -using CUDAContext = paddle::platform::CUDADeviceContext; template class RowwiseTransformIterator : public thrust::iterator_adaptor, diff --git a/paddle/pten/kernels/hybird/transpose.cc b/paddle/pten/kernels/hybird/transpose.cc index 73375a6a379..d1d4350c931 100644 --- a/paddle/pten/kernels/hybird/transpose.cc +++ b/paddle/pten/kernels/hybird/transpose.cc @@ -14,16 +14,16 @@ #include "paddle/pten/kernels/hybird/transpose.h" #include "paddle/fluid/framework/ddim.h" +#include "paddle/pten/backends/cpu/cpu_context.h" #include "paddle/pten/core/dense_tensor.h" // See Note [ Why still include the fluid headers? ] #include "paddle/fluid/platform/bfloat16.h" #include "paddle/fluid/platform/complex.h" -#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/float16.h" + namespace pten { namespace math { -using CPUContext = paddle::platform::CPUDeviceContext; template struct TransposeNormal { diff --git a/paddle/pten/kernels/hybird/transpose.cu b/paddle/pten/kernels/hybird/transpose.cu index bf7a1409938..ad2eda4edd9 100644 --- a/paddle/pten/kernels/hybird/transpose.cu +++ b/paddle/pten/kernels/hybird/transpose.cu @@ -14,6 +14,7 @@ #include "paddle/fluid/framework/ddim.h" #include "paddle/fluid/memory/memcpy.h" +#include "paddle/pten/backends/cuda/cuda_context.h" #include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/kernels/hybird/math/cast_func.h" #include "paddle/pten/kernels/hybird/transpose.h" @@ -21,13 +22,11 @@ // See Note [ Why still include the fluid headers? ] #include "paddle/fluid/platform/bfloat16.h" #include "paddle/fluid/platform/complex.h" -#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/float16.h" namespace pten { namespace math { -using CUDAContext = paddle::platform::CUDADeviceContext; #define REINTERPRET(T, DST_PTR, SRC_PTR) \ T* DST_PTR = reinterpret_cast(SRC_PTR) diff --git a/paddle/pten/kernels/xpu/manipulation.h b/paddle/pten/kernels/xpu/manipulation.h index a9f57025e1e..0b68ae41951 100644 --- a/paddle/pten/kernels/xpu/manipulation.h +++ b/paddle/pten/kernels/xpu/manipulation.h @@ -16,17 +16,13 @@ limitations under the License. */ #ifdef PADDLE_WITH_XPU +#include "paddle/pten/backends/xpu/xpu_context.h" #include "paddle/pten/common/scalar_array.h" #include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/kernel_registry.h" -// See Note [ Why still include the fluid headers? ] -#include "paddle/fluid/platform/device_context.h" - namespace pten { -using XPUContext = paddle::platform::XPUDeviceContext; - template void Flatten(const XPUContext& dev_ctx, const DenseTensor& x, -- GitLab