未验证 提交 e02537f9 编写于 作者: C Chen Weihang 提交者: GitHub

[PTen] Unify device context entrance in pten part 2 (#38182)

* unify device context entrance

* move all_context include to header

* polish cmake relay for device_context

* fix npu compile failed

* fix npu compile failed
上级 55509ae7
...@@ -14,16 +14,13 @@ ...@@ -14,16 +14,13 @@
#pragma once #pragma once
#include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/common/scalar.h" #include "paddle/pten/common/scalar.h"
#include "paddle/pten/common/scalar_array.h" #include "paddle/pten/common/scalar_array.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
#include "paddle/fluid/platform/device_context.h"
namespace pten { namespace pten {
using CPUContext = paddle::platform::CPUDeviceContext;
template <typename T> template <typename T>
void FullLike(const CPUContext& dev_ctx, const Scalar& val, DenseTensor* out); void FullLike(const CPUContext& dev_ctx, const Scalar& val, DenseTensor* out);
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#pragma once #pragma once
#include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
// See Note [ Why still include the fluid headers? ] // See Note [ Why still include the fluid headers? ]
...@@ -21,8 +22,6 @@ ...@@ -21,8 +22,6 @@
namespace pten { namespace pten {
using CPUContext = paddle::platform::CPUDeviceContext;
template <typename T> template <typename T>
void Dot(const CPUContext& dev_ctx, void Dot(const CPUContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
......
...@@ -14,17 +14,13 @@ limitations under the License. */ ...@@ -14,17 +14,13 @@ limitations under the License. */
#pragma once #pragma once
#include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/common/scalar_array.h" #include "paddle/pten/common/scalar_array.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/core/kernel_registry.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/device_context.h"
namespace pten { namespace pten {
using CPUContext = paddle::platform::CPUDeviceContext;
template <typename T> template <typename T>
void Flatten(const CPUContext& dev_ctx, void Flatten(const CPUContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
......
...@@ -14,17 +14,13 @@ limitations under the License. */ ...@@ -14,17 +14,13 @@ limitations under the License. */
#pragma once #pragma once
#include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/common/scalar.h" #include "paddle/pten/common/scalar.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/core/kernel_registry.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/device_context.h"
namespace pten { namespace pten {
using CPUContext = paddle::platform::CPUDeviceContext;
template <typename T> template <typename T>
void Sign(const CPUContext& dev_ctx, const DenseTensor& x, DenseTensor* out); void Sign(const CPUContext& dev_ctx, const DenseTensor& x, DenseTensor* out);
......
...@@ -14,15 +14,12 @@ limitations under the License. */ ...@@ -14,15 +14,12 @@ limitations under the License. */
#pragma once #pragma once
#include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/core/kernel_registry.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/device_context.h"
namespace pten { namespace pten {
using CPUContext = paddle::platform::CPUDeviceContext;
void Copy(const CPUContext& dev_ctx, void Copy(const CPUContext& dev_ctx,
const DenseTensor& src, const DenseTensor& src,
bool blocking, bool blocking,
......
...@@ -17,16 +17,13 @@ ...@@ -17,16 +17,13 @@
// CUDA and HIP use same api // CUDA and HIP use same api
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/pten/backends/cuda/cuda_context.h"
#include "paddle/pten/common/scalar.h" #include "paddle/pten/common/scalar.h"
#include "paddle/pten/common/scalar_array.h" #include "paddle/pten/common/scalar_array.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
#include "paddle/fluid/platform/device_context.h"
namespace pten { namespace pten {
using CUDAContext = paddle::platform::CUDADeviceContext;
template <typename T> template <typename T>
void FullLike(const CUDAContext& dev_ctx, const Scalar& val, DenseTensor* out); void FullLike(const CUDAContext& dev_ctx, const Scalar& val, DenseTensor* out);
......
...@@ -17,15 +17,11 @@ ...@@ -17,15 +17,11 @@
// CUDA and HIP use same api // CUDA and HIP use same api
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/pten/backends/cuda/cuda_context.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/device_context.h"
namespace pten { namespace pten {
using CUDAContext = paddle::platform::CUDADeviceContext;
template <typename T> template <typename T>
void Dot(const CUDAContext& dev_ctx, void Dot(const CUDAContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
......
...@@ -17,17 +17,13 @@ ...@@ -17,17 +17,13 @@
// CUDA and HIP use same api // CUDA and HIP use same api
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/pten/backends/cuda/cuda_context.h"
#include "paddle/pten/common/scalar_array.h" #include "paddle/pten/common/scalar_array.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/core/kernel_registry.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/device_context.h"
namespace pten { namespace pten {
using CUDAContext = paddle::platform::CUDADeviceContext;
template <typename T> template <typename T>
void Flatten(const CUDAContext& dev_ctx, void Flatten(const CUDAContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
......
...@@ -17,16 +17,12 @@ limitations under the License. */ ...@@ -17,16 +17,12 @@ limitations under the License. */
// CUDA and HIP use same api // CUDA and HIP use same api
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/pten/backends/cuda/cuda_context.h"
#include "paddle/pten/common/scalar.h" #include "paddle/pten/common/scalar.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/device_context.h"
namespace pten { namespace pten {
using CUDAContext = paddle::platform::CUDADeviceContext;
template <typename T> template <typename T>
void Sign(const CUDAContext& dev_ctx, const DenseTensor& x, DenseTensor* out); void Sign(const CUDAContext& dev_ctx, const DenseTensor& x, DenseTensor* out);
......
...@@ -17,15 +17,12 @@ limitations under the License. */ ...@@ -17,15 +17,12 @@ limitations under the License. */
// CUDA and HIP use same api // CUDA and HIP use same api
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/pten/backends/cuda/cuda_context.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/core/kernel_registry.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/device_context.h"
namespace pten { namespace pten {
using CUDAContext = paddle::platform::CUDADeviceContext;
void Copy(const CUDAContext& dev_ctx, void Copy(const CUDAContext& dev_ctx,
const DenseTensor& src, const DenseTensor& src,
bool blocking, bool blocking,
......
...@@ -2,9 +2,9 @@ add_subdirectory(eigen) ...@@ -2,9 +2,9 @@ add_subdirectory(eigen)
add_subdirectory(blas) add_subdirectory(blas)
add_subdirectory(general) add_subdirectory(general)
cc_library(pten_transpose_cpu SRCS transpose.cc DEPS dense_tensor device_context) cc_library(pten_transpose_cpu SRCS transpose.cc DEPS dense_tensor pten_context)
if(WITH_GPU) if(WITH_GPU)
nv_library(pten_transpose_cuda SRCS transpose.cu DEPS dense_tensor malloc device_context) nv_library(pten_transpose_cuda SRCS transpose.cu DEPS dense_tensor malloc pten_context)
elseif(WITH_ROCM) elseif(WITH_ROCM)
hip_library(pten_transpose_cuda SRCS transpose.cu DEPS dense_tensor malloc device_context) hip_library(pten_transpose_cuda SRCS transpose.cu DEPS dense_tensor malloc pten_context)
endif() endif()
...@@ -15,13 +15,13 @@ ...@@ -15,13 +15,13 @@
#pragma once #pragma once
#include "paddle/fluid/platform/device/gpu/gpu_helper.h" #include "paddle/fluid/platform/device/gpu/gpu_helper.h"
#include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/float16.h"
#include "paddle/pten/backends/cuda/cuda_context.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
#include "paddle/fluid/platform/aligned_vector.h" #include "paddle/fluid/platform/aligned_vector.h"
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
namespace pten { namespace pten {
namespace detail { namespace detail {
using CUDAContext = paddle::platform::CUDADeviceContext;
template <typename InT, typename OutT, int VecSize> template <typename InT, typename OutT, int VecSize>
__global__ void VecCastCUDAKernel(const InT* in, const int64_t N, OutT* out) { __global__ void VecCastCUDAKernel(const InT* in, const int64_t N, OutT* out) {
......
...@@ -17,16 +17,13 @@ ...@@ -17,16 +17,13 @@
// CUDA and HIP use same api // CUDA and HIP use same api
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/pten/backends/cuda/cuda_context.h"
#include "paddle/pten/common/scalar.h" #include "paddle/pten/common/scalar.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h" #include "paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h"
namespace pten { namespace pten {
using CUDAContext = paddle::platform::CUDADeviceContext;
static inline std::vector<int64_t> GetReduceDim( static inline std::vector<int64_t> GetReduceDim(
const std::vector<int64_t>& dims, int dim_size, bool reduce_all) { const std::vector<int64_t>& dims, int dim_size, bool reduce_all) {
std::vector<int64_t> reduce_dims; std::vector<int64_t> reduce_dims;
......
...@@ -15,13 +15,13 @@ limitations under the License. */ ...@@ -15,13 +15,13 @@ limitations under the License. */
#pragma once #pragma once
#include "paddle/fluid/platform/transform.h" #include "paddle/fluid/platform/transform.h"
#include "paddle/pten/backends/all_context.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
namespace pten { namespace pten {
namespace general { namespace general {
using DDim = paddle::framework::DDim; using DDim = paddle::framework::DDim;
using CPUContext = paddle::platform::CPUDeviceContext;
template <typename T, typename DeviceContext> template <typename T, typename DeviceContext>
class RowwiseTransformIterator; class RowwiseTransformIterator;
...@@ -131,7 +131,6 @@ class MidWiseTransformIterator<T, CPUContext> ...@@ -131,7 +131,6 @@ class MidWiseTransformIterator<T, CPUContext>
}; };
#if defined(__NVCC__) || defined(__HIPCC__) #if defined(__NVCC__) || defined(__HIPCC__)
using CUDAContext = paddle::platform::CUDADeviceContext;
template <typename T> template <typename T>
class RowwiseTransformIterator<T, CUDAContext> class RowwiseTransformIterator<T, CUDAContext>
: public thrust::iterator_adaptor<RowwiseTransformIterator<T, CUDAContext>, : public thrust::iterator_adaptor<RowwiseTransformIterator<T, CUDAContext>,
......
...@@ -14,16 +14,16 @@ ...@@ -14,16 +14,16 @@
#include "paddle/pten/kernels/hybird/transpose.h" #include "paddle/pten/kernels/hybird/transpose.h"
#include "paddle/fluid/framework/ddim.h" #include "paddle/fluid/framework/ddim.h"
#include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
// See Note [ Why still include the fluid headers? ] // See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/bfloat16.h" #include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/complex.h" #include "paddle/fluid/platform/complex.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/float16.h"
namespace pten { namespace pten {
namespace math { namespace math {
using CPUContext = paddle::platform::CPUDeviceContext;
template <typename T> template <typename T>
struct TransposeNormal<CPUContext, T> { struct TransposeNormal<CPUContext, T> {
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include "paddle/fluid/framework/ddim.h" #include "paddle/fluid/framework/ddim.h"
#include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/memory/memcpy.h"
#include "paddle/pten/backends/cuda/cuda_context.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/kernels/hybird/math/cast_func.h" #include "paddle/pten/kernels/hybird/math/cast_func.h"
#include "paddle/pten/kernels/hybird/transpose.h" #include "paddle/pten/kernels/hybird/transpose.h"
...@@ -21,13 +22,11 @@ ...@@ -21,13 +22,11 @@
// See Note [ Why still include the fluid headers? ] // See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/bfloat16.h" #include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/complex.h" #include "paddle/fluid/platform/complex.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/float16.h"
namespace pten { namespace pten {
namespace math { namespace math {
using CUDAContext = paddle::platform::CUDADeviceContext;
#define REINTERPRET(T, DST_PTR, SRC_PTR) \ #define REINTERPRET(T, DST_PTR, SRC_PTR) \
T* DST_PTR = reinterpret_cast<T*>(SRC_PTR) T* DST_PTR = reinterpret_cast<T*>(SRC_PTR)
......
...@@ -16,17 +16,13 @@ limitations under the License. */ ...@@ -16,17 +16,13 @@ limitations under the License. */
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
#include "paddle/pten/backends/xpu/xpu_context.h"
#include "paddle/pten/common/scalar_array.h" #include "paddle/pten/common/scalar_array.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/core/kernel_registry.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/device_context.h"
namespace pten { namespace pten {
using XPUContext = paddle::platform::XPUDeviceContext;
template <typename T> template <typename T>
void Flatten(const XPUContext& dev_ctx, void Flatten(const XPUContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册