未验证 提交 e02537f9 编写于 作者: C Chen Weihang 提交者: GitHub

[PTen] Unify device context entrance in pten part 2 (#38182)

* unify device context entrance

* move all_context include to header

* polish cmake relay for device_context

* fix npu compile failed

* fix npu compile failed
上级 55509ae7
......@@ -14,16 +14,13 @@
#pragma once
#include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/common/scalar.h"
#include "paddle/pten/common/scalar_array.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/fluid/platform/device_context.h"
namespace pten {
using CPUContext = paddle::platform::CPUDeviceContext;
template <typename T>
void FullLike(const CPUContext& dev_ctx, const Scalar& val, DenseTensor* out);
......
......@@ -14,6 +14,7 @@
#pragma once
#include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/core/dense_tensor.h"
// See Note [ Why still include the fluid headers? ]
......@@ -21,8 +22,6 @@
namespace pten {
using CPUContext = paddle::platform::CPUDeviceContext;
template <typename T>
void Dot(const CPUContext& dev_ctx,
const DenseTensor& x,
......
......@@ -14,17 +14,13 @@ limitations under the License. */
#pragma once
#include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/common/scalar_array.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/device_context.h"
namespace pten {
using CPUContext = paddle::platform::CPUDeviceContext;
template <typename T>
void Flatten(const CPUContext& dev_ctx,
const DenseTensor& x,
......
......@@ -14,17 +14,13 @@ limitations under the License. */
#pragma once
#include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/common/scalar.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/device_context.h"
namespace pten {
using CPUContext = paddle::platform::CPUDeviceContext;
template <typename T>
void Sign(const CPUContext& dev_ctx, const DenseTensor& x, DenseTensor* out);
......
......@@ -14,15 +14,12 @@ limitations under the License. */
#pragma once
#include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/device_context.h"
namespace pten {
using CPUContext = paddle::platform::CPUDeviceContext;
void Copy(const CPUContext& dev_ctx,
const DenseTensor& src,
bool blocking,
......
......@@ -17,16 +17,13 @@
// CUDA and HIP use same api
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/pten/backends/cuda/cuda_context.h"
#include "paddle/pten/common/scalar.h"
#include "paddle/pten/common/scalar_array.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/fluid/platform/device_context.h"
namespace pten {
using CUDAContext = paddle::platform::CUDADeviceContext;
template <typename T>
void FullLike(const CUDAContext& dev_ctx, const Scalar& val, DenseTensor* out);
......
......@@ -17,15 +17,11 @@
// CUDA and HIP use same api
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/pten/backends/cuda/cuda_context.h"
#include "paddle/pten/core/dense_tensor.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/device_context.h"
namespace pten {
using CUDAContext = paddle::platform::CUDADeviceContext;
template <typename T>
void Dot(const CUDAContext& dev_ctx,
const DenseTensor& x,
......
......@@ -17,17 +17,13 @@
// CUDA and HIP use same api
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/pten/backends/cuda/cuda_context.h"
#include "paddle/pten/common/scalar_array.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/device_context.h"
namespace pten {
using CUDAContext = paddle::platform::CUDADeviceContext;
template <typename T>
void Flatten(const CUDAContext& dev_ctx,
const DenseTensor& x,
......
......@@ -17,16 +17,12 @@ limitations under the License. */
// CUDA and HIP use same api
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/pten/backends/cuda/cuda_context.h"
#include "paddle/pten/common/scalar.h"
#include "paddle/pten/core/dense_tensor.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/device_context.h"
namespace pten {
using CUDAContext = paddle::platform::CUDADeviceContext;
template <typename T>
void Sign(const CUDAContext& dev_ctx, const DenseTensor& x, DenseTensor* out);
......
......@@ -17,15 +17,12 @@ limitations under the License. */
// CUDA and HIP use same api
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/pten/backends/cuda/cuda_context.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/device_context.h"
namespace pten {
using CUDAContext = paddle::platform::CUDADeviceContext;
void Copy(const CUDAContext& dev_ctx,
const DenseTensor& src,
bool blocking,
......
......@@ -2,9 +2,9 @@ add_subdirectory(eigen)
add_subdirectory(blas)
add_subdirectory(general)
cc_library(pten_transpose_cpu SRCS transpose.cc DEPS dense_tensor device_context)
cc_library(pten_transpose_cpu SRCS transpose.cc DEPS dense_tensor pten_context)
if(WITH_GPU)
nv_library(pten_transpose_cuda SRCS transpose.cu DEPS dense_tensor malloc device_context)
nv_library(pten_transpose_cuda SRCS transpose.cu DEPS dense_tensor malloc pten_context)
elseif(WITH_ROCM)
hip_library(pten_transpose_cuda SRCS transpose.cu DEPS dense_tensor malloc device_context)
hip_library(pten_transpose_cuda SRCS transpose.cu DEPS dense_tensor malloc pten_context)
endif()
......@@ -15,13 +15,13 @@
#pragma once
#include "paddle/fluid/platform/device/gpu/gpu_helper.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/pten/backends/cuda/cuda_context.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/fluid/platform/aligned_vector.h"
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
namespace pten {
namespace detail {
using CUDAContext = paddle::platform::CUDADeviceContext;
template <typename InT, typename OutT, int VecSize>
__global__ void VecCastCUDAKernel(const InT* in, const int64_t N, OutT* out) {
......
......@@ -17,16 +17,13 @@
// CUDA and HIP use same api
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/pten/backends/cuda/cuda_context.h"
#include "paddle/pten/common/scalar.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h"
namespace pten {
using CUDAContext = paddle::platform::CUDADeviceContext;
static inline std::vector<int64_t> GetReduceDim(
const std::vector<int64_t>& dims, int dim_size, bool reduce_all) {
std::vector<int64_t> reduce_dims;
......
......@@ -15,13 +15,13 @@ limitations under the License. */
#pragma once
#include "paddle/fluid/platform/transform.h"
#include "paddle/pten/backends/all_context.h"
#include "paddle/pten/core/dense_tensor.h"
namespace pten {
namespace general {
using DDim = paddle::framework::DDim;
using CPUContext = paddle::platform::CPUDeviceContext;
template <typename T, typename DeviceContext>
class RowwiseTransformIterator;
......@@ -131,7 +131,6 @@ class MidWiseTransformIterator<T, CPUContext>
};
#if defined(__NVCC__) || defined(__HIPCC__)
using CUDAContext = paddle::platform::CUDADeviceContext;
template <typename T>
class RowwiseTransformIterator<T, CUDAContext>
: public thrust::iterator_adaptor<RowwiseTransformIterator<T, CUDAContext>,
......
......@@ -14,16 +14,16 @@
#include "paddle/pten/kernels/hybird/transpose.h"
#include "paddle/fluid/framework/ddim.h"
#include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/core/dense_tensor.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/complex.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/float16.h"
namespace pten {
namespace math {
using CPUContext = paddle::platform::CPUDeviceContext;
template <typename T>
struct TransposeNormal<CPUContext, T> {
......
......@@ -14,6 +14,7 @@
#include "paddle/fluid/framework/ddim.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/pten/backends/cuda/cuda_context.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/kernels/hybird/math/cast_func.h"
#include "paddle/pten/kernels/hybird/transpose.h"
......@@ -21,13 +22,11 @@
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/complex.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/float16.h"
namespace pten {
namespace math {
using CUDAContext = paddle::platform::CUDADeviceContext;
#define REINTERPRET(T, DST_PTR, SRC_PTR) \
T* DST_PTR = reinterpret_cast<T*>(SRC_PTR)
......
......@@ -16,17 +16,13 @@ limitations under the License. */
#ifdef PADDLE_WITH_XPU
#include "paddle/pten/backends/xpu/xpu_context.h"
#include "paddle/pten/common/scalar_array.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/device_context.h"
namespace pten {
using XPUContext = paddle::platform::XPUDeviceContext;
template <typename T>
void Flatten(const XPUContext& dev_ctx,
const DenseTensor& x,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册