From dc7597e3775a8a08b235e184bd780bcb1a1eae9d Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Tue, 21 Dec 2021 05:26:53 -0600 Subject: [PATCH] [PTen] Rename cuda dir and context to gpu (#38296) * rename cuda to gpu * revert CMake change * resolve conflit * rename other cuda to gpu * poish details --- paddle/fluid/operators/conj_op.h | 2 +- paddle/pten/CMakeLists.txt | 2 +- paddle/pten/api/include/tensor.h | 2 +- paddle/pten/api/lib/ext_compat_utils.cc | 2 +- paddle/pten/api/lib/kernel_declare.h | 12 ++++---- paddle/pten/api/lib/utils.cc | 2 +- paddle/pten/backends/all_context.h | 2 +- .../cuda_context.h => gpu/gpu_context.h} | 2 +- paddle/pten/common/backend.h | 6 ++-- paddle/pten/core/convert_utils.cc | 4 +-- paddle/pten/core/kernel_registry.h | 2 +- paddle/pten/core/kernel_utils.h | 2 +- paddle/pten/include/linalg.h | 2 +- paddle/pten/include/manipulation.h | 2 +- paddle/pten/include/math.h | 4 +-- paddle/pten/kernels/CMakeLists.txt | 3 +- paddle/pten/kernels/cuda/CMakeLists.txt | 17 ----------- paddle/pten/kernels/gpu/CMakeLists.txt | 17 +++++++++++ .../pten/kernels/{cuda => gpu}/conj_kernel.cu | 10 +++---- .../pten/kernels/{cuda => gpu}/conj_kernel.h | 4 +-- .../pten/kernels/{cuda => gpu}/full_kernel.cu | 6 ++-- paddle/pten/kernels/{cuda => gpu}/linalg.cu | 14 ++++----- paddle/pten/kernels/{cuda => gpu}/linalg.h | 6 ++-- .../kernels/{cuda => gpu}/manipulation.cu | 24 +++++++-------- .../pten/kernels/{cuda => gpu}/manipulation.h | 10 +++---- paddle/pten/kernels/{cuda => gpu}/math.cu | 25 ++++++++-------- paddle/pten/kernels/{cuda => gpu}/math.h | 18 +++++------ .../kernels/{cuda => gpu}/scale_kernel.cu | 4 +-- paddle/pten/kernels/{cuda => gpu}/utils.cu | 6 ++-- paddle/pten/kernels/{cuda => gpu}/utils.h | 4 +-- paddle/pten/kernels/hybird/CMakeLists.txt | 4 +-- .../kernels/hybird/cuda/cast_kernel_impl.h | 4 +-- .../pten/kernels/hybird/cuda/reduce/reduce.h | 4 +-- .../hybird/cuda/reduce/reduce_cuda_impl.h | 4 +-- .../kernels/hybird/general/elementwise_base.h | 12 ++++---- paddle/pten/kernels/hybird/transpose.cu | 8 ++--- paddle/pten/tests/api/scale_api.h | 30 +++++++++---------- paddle/pten/tests/api/test_matmul_api.cc | 2 +- paddle/pten/tests/api/test_tensor_utils.cc | 2 +- paddle/pten/tests/api/test_to_api.cc | 4 +-- paddle/pten/tests/common/test_backend.cc | 4 +-- 41 files changed, 146 insertions(+), 148 deletions(-) rename paddle/pten/backends/{cuda/cuda_context.h => gpu/gpu_context.h} (93%) delete mode 100644 paddle/pten/kernels/cuda/CMakeLists.txt create mode 100644 paddle/pten/kernels/gpu/CMakeLists.txt rename paddle/pten/kernels/{cuda => gpu}/conj_kernel.cu (81%) rename paddle/pten/kernels/{cuda => gpu}/conj_kernel.h (86%) rename paddle/pten/kernels/{cuda => gpu}/full_kernel.cu (93%) rename paddle/pten/kernels/{cuda => gpu}/linalg.cu (90%) rename paddle/pten/kernels/{cuda => gpu}/linalg.h (89%) rename paddle/pten/kernels/{cuda => gpu}/manipulation.cu (88%) rename paddle/pten/kernels/{cuda => gpu}/manipulation.h (86%) rename paddle/pten/kernels/{cuda => gpu}/math.cu (89%) rename paddle/pten/kernels/{cuda => gpu}/math.h (86%) rename paddle/pten/kernels/{cuda => gpu}/scale_kernel.cu (93%) rename paddle/pten/kernels/{cuda => gpu}/utils.cu (98%) rename paddle/pten/kernels/{cuda => gpu}/utils.h (91%) diff --git a/paddle/fluid/operators/conj_op.h b/paddle/fluid/operators/conj_op.h index 90724403d4..0b5a35f515 100644 --- a/paddle/fluid/operators/conj_op.h +++ b/paddle/fluid/operators/conj_op.h @@ -21,7 +21,7 @@ #include "paddle/pten/api/lib/utils/tensor_utils.h" #include "paddle/pten/include/core.h" #include "paddle/pten/kernels/cpu/conj_kernel.h" -#include "paddle/pten/kernels/cuda/conj_kernel.h" +#include "paddle/pten/kernels/gpu/conj_kernel.h" namespace paddle { namespace operators { diff --git a/paddle/pten/CMakeLists.txt b/paddle/pten/CMakeLists.txt index eb9a149dd6..799ec885b9 100644 --- a/paddle/pten/CMakeLists.txt +++ b/paddle/pten/CMakeLists.txt @@ -27,7 +27,7 @@ set(PTEN_DEPS convert_utils dense_tensor pten_context kernel_factory kernel_cont set(PTEN_DEPS ${PTEN_DEPS} math_cpu linalg_cpu manipulation_cpu conj_kernel_cpu scale_kernel_cpu full_kernel_cpu) set(PTEN_DEPS ${PTEN_DEPS} nary unary binary) if(WITH_GPU OR WITH_ROCM) - set(PTEN_DEPS ${PTEN_DEPS} math_cuda linalg_cuda manipulation_cuda conj_kernel_cuda scale_kernel_cuda full_kernel_cuda) + set(PTEN_DEPS ${PTEN_DEPS} math_gpu linalg_gpu manipulation_gpu conj_kernel_gpu scale_kernel_gpu full_kernel_gpu) endif() if(WITH_XPU) set(PTEN_DEPS ${PTEN_DEPS} manipulation_xpu) diff --git a/paddle/pten/api/include/tensor.h b/paddle/pten/api/include/tensor.h index c8ef22c2ec..935c7d8e32 100644 --- a/paddle/pten/api/include/tensor.h +++ b/paddle/pten/api/include/tensor.h @@ -464,7 +464,7 @@ class PADDLE_API Tensor final { * unified to Tensor, but Tensor itself is heterogeneous. * * Tensor can generally be represented by void* and size_t, place. - * This is suitable for most scenarios including CPU, CUDA, HIP, CPU, etc., + * This is suitable for most scenarios including CPU, GPU, HIP, CPU, etc., * but there are a few cases where this definition cannot be described, * such as the Tensor representation in third-party lib such as Metal, * OpenCL, etc., as well as some special Tensor implementations, including diff --git a/paddle/pten/api/lib/ext_compat_utils.cc b/paddle/pten/api/lib/ext_compat_utils.cc index 791a8526f3..6f1763eac3 100644 --- a/paddle/pten/api/lib/ext_compat_utils.cc +++ b/paddle/pten/api/lib/ext_compat_utils.cc @@ -56,7 +56,7 @@ Backend ConvertExtPlaceToBackend(PlaceType p) { return Backend::CPU; #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) case PlaceType::kGPU: - return Backend::CUDA; + return Backend::GPU; #endif default: PADDLE_THROW( diff --git a/paddle/pten/api/lib/kernel_declare.h b/paddle/pten/api/lib/kernel_declare.h index e748a51082..4dbd46bff6 100644 --- a/paddle/pten/api/lib/kernel_declare.h +++ b/paddle/pten/api/lib/kernel_declare.h @@ -28,12 +28,12 @@ PT_DECLARE_KERNEL(scale, CPU, ALL_LAYOUT); PT_DECLARE_KERNEL(conj, CPU, ALL_LAYOUT); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -PT_DECLARE_KERNEL(full_like, CUDA, ALL_LAYOUT); -PT_DECLARE_KERNEL(dot, CUDA, ALL_LAYOUT); -PT_DECLARE_KERNEL(flatten, CUDA, ALL_LAYOUT); -PT_DECLARE_KERNEL(sign, CUDA, ALL_LAYOUT); -PT_DECLARE_KERNEL(scale, CUDA, ALL_LAYOUT); -PT_DECLARE_KERNEL(conj, CUDA, ALL_LAYOUT); +PT_DECLARE_KERNEL(full_like, GPU, ALL_LAYOUT); +PT_DECLARE_KERNEL(dot, GPU, ALL_LAYOUT); +PT_DECLARE_KERNEL(flatten, GPU, ALL_LAYOUT); +PT_DECLARE_KERNEL(sign, GPU, ALL_LAYOUT); +PT_DECLARE_KERNEL(scale, GPU, ALL_LAYOUT); +PT_DECLARE_KERNEL(conj, GPU, ALL_LAYOUT); #endif #ifdef PADDLE_WITH_XPU diff --git a/paddle/pten/api/lib/utils.cc b/paddle/pten/api/lib/utils.cc index 06d604cb83..ddb29c8833 100644 --- a/paddle/pten/api/lib/utils.cc +++ b/paddle/pten/api/lib/utils.cc @@ -28,7 +28,7 @@ limitations under the License. */ PT_DECLARE_KERNEL(copy, CPU, ALL_LAYOUT); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -PT_DECLARE_KERNEL(copy, CUDA, ALL_LAYOUT); +PT_DECLARE_KERNEL(copy, GPU, ALL_LAYOUT); #endif #ifdef PADDLE_WITH_XPU diff --git a/paddle/pten/backends/all_context.h b/paddle/pten/backends/all_context.h index d056af1b3a..a7cb4abc2f 100644 --- a/paddle/pten/backends/all_context.h +++ b/paddle/pten/backends/all_context.h @@ -21,7 +21,7 @@ limitations under the License. */ // path replacement after implementing pten DeviceContext #include "paddle/pten/backends/cpu/cpu_context.h" -#include "paddle/pten/backends/cuda/cuda_context.h" +#include "paddle/pten/backends/gpu/gpu_context.h" #include "paddle/pten/backends/npu/npu_context.h" #include "paddle/pten/backends/xpu/xpu_context.h" diff --git a/paddle/pten/backends/cuda/cuda_context.h b/paddle/pten/backends/gpu/gpu_context.h similarity index 93% rename from paddle/pten/backends/cuda/cuda_context.h rename to paddle/pten/backends/gpu/gpu_context.h index 332fdd2fda..1adfd155ce 100644 --- a/paddle/pten/backends/cuda/cuda_context.h +++ b/paddle/pten/backends/gpu/gpu_context.h @@ -20,7 +20,7 @@ limitations under the License. */ #include "paddle/fluid/platform/device_context.h" namespace pten { -using CUDAContext = paddle::platform::CUDADeviceContext; +using GPUContext = paddle::platform::CUDADeviceContext; } // namespace pten #endif diff --git a/paddle/pten/common/backend.h b/paddle/pten/common/backend.h index 95bbc88681..9944083248 100644 --- a/paddle/pten/common/backend.h +++ b/paddle/pten/common/backend.h @@ -43,7 +43,7 @@ enum class Backend : uint8_t { CPU, // various acceleration devices' backends - CUDA, + GPU, XPU, // XPU currently does not exist at the same time as CUDA NPU, // NPU currently does not exist at the same time as CUDA @@ -99,8 +99,8 @@ inline std::ostream& operator<<(std::ostream& os, Backend backend) { case Backend::CPU: os << "CPU"; break; - case Backend::CUDA: - os << "CUDA"; + case Backend::GPU: + os << "GPU"; break; case Backend::XPU: os << "XPU"; diff --git a/paddle/pten/core/convert_utils.cc b/paddle/pten/core/convert_utils.cc index 936d4effdf..bb8b416128 100644 --- a/paddle/pten/core/convert_utils.cc +++ b/paddle/pten/core/convert_utils.cc @@ -23,7 +23,7 @@ Backend TransToPtenBackend(const paddle::platform::Place& place) { if (paddle::platform::is_cpu_place(place)) { return Backend::CPU; } else if (paddle::platform::is_gpu_place(place)) { - return Backend::CUDA; + return Backend::GPU; } else { return Backend::UNDEFINED; } @@ -84,7 +84,7 @@ paddle::platform::Place TransToFluidPlace(const Backend& backend) { case pten::Backend::CPU: return paddle::platform::CPUPlace(); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - case pten::Backend::CUDA: + case pten::Backend::GPU: return paddle::platform::CUDAPlace( paddle::platform::GetCurrentDeviceId()); #endif diff --git a/paddle/pten/core/kernel_registry.h b/paddle/pten/core/kernel_registry.h index 62a46e128e..a33b13dac2 100644 --- a/paddle/pten/core/kernel_registry.h +++ b/paddle/pten/core/kernel_registry.h @@ -57,7 +57,7 @@ struct KernelArgsParseFunctor { if (arg_type == std::type_index(typeid(const CPUContext&)) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || - arg_type == std::type_index(typeid(const CUDAContext&))) { + arg_type == std::type_index(typeid(const GPUContext&))) { #else ) { #endif diff --git a/paddle/pten/core/kernel_utils.h b/paddle/pten/core/kernel_utils.h index 82ffa57387..7a7ae28330 100644 --- a/paddle/pten/core/kernel_utils.h +++ b/paddle/pten/core/kernel_utils.h @@ -181,7 +181,7 @@ struct KernelImpl { PT_SPECIALIZE_KernelCallHelper_FOR_DEVICE_CONTEXT(CPUContext); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - PT_SPECIALIZE_KernelCallHelper_FOR_DEVICE_CONTEXT(CUDAContext); + PT_SPECIALIZE_KernelCallHelper_FOR_DEVICE_CONTEXT(GPUContext); #endif #ifdef PADDLE_WITH_ASCEND_CL PT_SPECIALIZE_KernelCallHelper_FOR_DEVICE_CONTEXT(NPUContext); diff --git a/paddle/pten/include/linalg.h b/paddle/pten/include/linalg.h index 60ec451be2..8f627f5fc8 100644 --- a/paddle/pten/include/linalg.h +++ b/paddle/pten/include/linalg.h @@ -18,7 +18,7 @@ #include "paddle/pten/api/lib/utils/storage.h" #include "paddle/pten/include/infermeta.h" #include "paddle/pten/kernels/cpu/linalg.h" -#include "paddle/pten/kernels/cuda/linalg.h" +#include "paddle/pten/kernels/gpu/linalg.h" namespace pten { diff --git a/paddle/pten/include/manipulation.h b/paddle/pten/include/manipulation.h index e138c51e30..e94f2a6180 100644 --- a/paddle/pten/include/manipulation.h +++ b/paddle/pten/include/manipulation.h @@ -18,7 +18,7 @@ #include "paddle/pten/api/lib/utils/storage.h" #include "paddle/pten/include/infermeta.h" #include "paddle/pten/kernels/cpu/manipulation.h" -#include "paddle/pten/kernels/cuda/manipulation.h" +#include "paddle/pten/kernels/gpu/manipulation.h" #include "paddle/pten/kernels/xpu/manipulation.h" namespace pten { diff --git a/paddle/pten/include/math.h b/paddle/pten/include/math.h index 8295c57654..83471692c8 100644 --- a/paddle/pten/include/math.h +++ b/paddle/pten/include/math.h @@ -19,8 +19,8 @@ limitations under the License. */ #include "paddle/pten/include/infermeta.h" #include "paddle/pten/kernels/cpu/conj_kernel.h" #include "paddle/pten/kernels/cpu/math.h" -#include "paddle/pten/kernels/cuda/conj_kernel.h" -#include "paddle/pten/kernels/cuda/math.h" +#include "paddle/pten/kernels/gpu/conj_kernel.h" +#include "paddle/pten/kernels/gpu/math.h" #include "paddle/pten/kernels/scale_kernel.h" namespace pten { diff --git a/paddle/pten/kernels/CMakeLists.txt b/paddle/pten/kernels/CMakeLists.txt index d87def812d..818ce6cb77 100644 --- a/paddle/pten/kernels/CMakeLists.txt +++ b/paddle/pten/kernels/CMakeLists.txt @@ -5,8 +5,7 @@ add_subdirectory(hybird) add_subdirectory(cpu) if(WITH_GPU OR WITH_ROCM) - # NOTE(chenweihang): if hip can split from cuda impl, we should add hip dir - add_subdirectory(cuda) + add_subdirectory(gpu) endif() if(WITH_MKLDNN) # mkldnn will be deprecated and use the new name dnnl diff --git a/paddle/pten/kernels/cuda/CMakeLists.txt b/paddle/pten/kernels/cuda/CMakeLists.txt deleted file mode 100644 index 428b2762ca..0000000000 --- a/paddle/pten/kernels/cuda/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -if(WITH_GPU) - nv_library(math_cuda SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_cuda) - nv_library(linalg_cuda SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory) - nv_library(utils_cuda SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils) - nv_library(manipulation_cuda SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_cuda unary) - nv_library(scale_kernel_cuda SRCS scale_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function) - nv_library(full_kernel_cuda SRCS full_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function) - nv_library(conj_kernel_cuda SRCS conj_kernel.cu DEPS dense_tensor kernel_context kernel_factory) -elseif(WITH_ROCM) - hip_library(math_cuda SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_cuda) - hip_library(linalg_cuda SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory) - hip_library(utils_cuda SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils) - hip_library(manipulation_cuda SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_cuda unary) - hip_library(scale_kernel_cuda SRCS scale_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function) - hip_library(full_kernel_cuda SRCS full_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function) - hip_library(conj_kernel_cuda SRCS conj_kernel.cu DEPS dense_tensor kernel_context kernel_factory) -endif() diff --git a/paddle/pten/kernels/gpu/CMakeLists.txt b/paddle/pten/kernels/gpu/CMakeLists.txt new file mode 100644 index 0000000000..11ff1608b8 --- /dev/null +++ b/paddle/pten/kernels/gpu/CMakeLists.txt @@ -0,0 +1,17 @@ +if(WITH_GPU) + nv_library(math_gpu SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_gpu) + nv_library(linalg_gpu SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory) + nv_library(utils_gpu SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils) + nv_library(manipulation_gpu SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_gpu unary) + nv_library(scale_kernel_gpu SRCS scale_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function) + nv_library(full_kernel_gpu SRCS full_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function) + nv_library(conj_kernel_gpu SRCS conj_kernel.cu DEPS dense_tensor kernel_context kernel_factory) +elseif(WITH_ROCM) + hip_library(math_gpu SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_gpu) + hip_library(linalg_gpu SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory) + hip_library(utils_gpu SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils) + hip_library(manipulation_gpu SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_gpu unary) + hip_library(scale_kernel_gpu SRCS scale_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function) + hip_library(full_kernel_gpu SRCS full_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function) + hip_library(conj_kernel_gpu SRCS conj_kernel.cu DEPS dense_tensor kernel_context kernel_factory) +endif() diff --git a/paddle/pten/kernels/cuda/conj_kernel.cu b/paddle/pten/kernels/gpu/conj_kernel.cu similarity index 81% rename from paddle/pten/kernels/cuda/conj_kernel.cu rename to paddle/pten/kernels/gpu/conj_kernel.cu index f3d2296f56..cb4fef883f 100644 --- a/paddle/pten/kernels/cuda/conj_kernel.cu +++ b/paddle/pten/kernels/gpu/conj_kernel.cu @@ -12,23 +12,23 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/pten/kernels/cuda/conj_kernel.h" +#include "paddle/pten/kernels/gpu/conj_kernel.h" -#include "paddle/pten/backends/cuda/cuda_context.h" +#include "paddle/pten/backends/gpu/gpu_context.h" #include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/kernels/hybird/math/conj_impl.h" namespace pten { template -void Conj(const CUDAContext& dev_ctx, const DenseTensor& x, DenseTensor* out) { - ConjImpl(dev_ctx, x, out); +void Conj(const GPUContext& dev_ctx, const DenseTensor& x, DenseTensor* out) { + ConjImpl(dev_ctx, x, out); } } // namespace pten PT_REGISTER_KERNEL(conj, - CUDA, + GPU, ALL_LAYOUT, pten::Conj, paddle::platform::complex, diff --git a/paddle/pten/kernels/cuda/conj_kernel.h b/paddle/pten/kernels/gpu/conj_kernel.h similarity index 86% rename from paddle/pten/kernels/cuda/conj_kernel.h rename to paddle/pten/kernels/gpu/conj_kernel.h index 8ed0049d87..7541f9290d 100644 --- a/paddle/pten/kernels/cuda/conj_kernel.h +++ b/paddle/pten/kernels/gpu/conj_kernel.h @@ -17,13 +17,13 @@ limitations under the License. */ // CUDA and HIP use same api #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -#include "paddle/pten/backends/cuda/cuda_context.h" +#include "paddle/pten/backends/gpu/gpu_context.h" #include "paddle/pten/core/dense_tensor.h" namespace pten { template -void Conj(const CUDAContext& dev_ctx, const DenseTensor& x, DenseTensor* out); +void Conj(const GPUContext& dev_ctx, const DenseTensor& x, DenseTensor* out); } // namespace pten diff --git a/paddle/pten/kernels/cuda/full_kernel.cu b/paddle/pten/kernels/gpu/full_kernel.cu similarity index 93% rename from paddle/pten/kernels/cuda/full_kernel.cu rename to paddle/pten/kernels/gpu/full_kernel.cu index 8a6639a2dc..16389d7749 100644 --- a/paddle/pten/kernels/cuda/full_kernel.cu +++ b/paddle/pten/kernels/gpu/full_kernel.cu @@ -14,12 +14,12 @@ limitations under the License. */ #include "paddle/pten/kernels/full_kernel.h" -#include "paddle/pten/backends/cuda/cuda_context.h" +#include "paddle/pten/backends/gpu/gpu_context.h" #include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/kernels/impl/full_kernel_impl.h" PT_REGISTER_CTX_KERNEL(full, - CUDA, + GPU, ALL_LAYOUT, pten::Full, float, @@ -34,7 +34,7 @@ PT_REGISTER_CTX_KERNEL(full, paddle::platform::complex) {} PT_REGISTER_CTX_KERNEL(full_like, - CUDA, + GPU, ALL_LAYOUT, pten::FullLike, float, diff --git a/paddle/pten/kernels/cuda/linalg.cu b/paddle/pten/kernels/gpu/linalg.cu similarity index 90% rename from paddle/pten/kernels/cuda/linalg.cu rename to paddle/pten/kernels/gpu/linalg.cu index da6511e2c8..c9bc4cbd07 100644 --- a/paddle/pten/kernels/cuda/linalg.cu +++ b/paddle/pten/kernels/gpu/linalg.cu @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/pten/kernels/cuda/linalg.h" +#include "paddle/pten/kernels/gpu/linalg.h" #include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/kernels/hybird/eigen/dot.h" @@ -24,15 +24,15 @@ namespace pten { template -void Dot(const CUDAContext& dev_ctx, +void Dot(const GPUContext& dev_ctx, const DenseTensor& x, const DenseTensor& y, DenseTensor* out) { - eigen::Dot(dev_ctx, x, y, out); + eigen::Dot(dev_ctx, x, y, out); } template -void Matmul(const CUDAContext& dev_ctx, +void Matmul(const GPUContext& dev_ctx, const DenseTensor& x, const DenseTensor& y, bool transpose_x, @@ -48,7 +48,7 @@ void Matmul(const CUDAContext& dev_ctx, paddle::platform::errors::InvalidArgument( "The Input(Y) dims size must not be equal 0," " but reviced dims size is 0. ")); - math::MatMulFunction( + math::MatMulFunction( dev_ctx, x, y, out, transpose_x, transpose_y); } @@ -59,7 +59,7 @@ using complex64 = ::paddle::platform::complex; using complex128 = ::paddle::platform::complex; PT_REGISTER_KERNEL(dot, - CUDA, + GPU, ALL_LAYOUT, pten::Dot, float, @@ -70,7 +70,7 @@ PT_REGISTER_KERNEL(dot, complex128) {} PT_REGISTER_KERNEL(matmul, - CUDA, + GPU, ALL_LAYOUT, pten::Matmul, float, diff --git a/paddle/pten/kernels/cuda/linalg.h b/paddle/pten/kernels/gpu/linalg.h similarity index 89% rename from paddle/pten/kernels/cuda/linalg.h rename to paddle/pten/kernels/gpu/linalg.h index 84f48ca609..a848f55c7b 100644 --- a/paddle/pten/kernels/cuda/linalg.h +++ b/paddle/pten/kernels/gpu/linalg.h @@ -17,19 +17,19 @@ // CUDA and HIP use same api #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -#include "paddle/pten/backends/cuda/cuda_context.h" +#include "paddle/pten/backends/gpu/gpu_context.h" #include "paddle/pten/core/dense_tensor.h" namespace pten { template -void Dot(const CUDAContext& dev_ctx, +void Dot(const GPUContext& dev_ctx, const DenseTensor& x, const DenseTensor& y, DenseTensor* out); template -void Matmul(const CUDAContext& dev_ctx, +void Matmul(const GPUContext& dev_ctx, const DenseTensor& x, const DenseTensor& y, bool transpose_x, diff --git a/paddle/pten/kernels/cuda/manipulation.cu b/paddle/pten/kernels/gpu/manipulation.cu similarity index 88% rename from paddle/pten/kernels/cuda/manipulation.cu rename to paddle/pten/kernels/gpu/manipulation.cu index 49bbf1b61c..5a82e3e030 100644 --- a/paddle/pten/kernels/cuda/manipulation.cu +++ b/paddle/pten/kernels/gpu/manipulation.cu @@ -14,15 +14,15 @@ #include "paddle/pten/api/ext/dispatch.h" #include "paddle/pten/infermeta/unary.h" -#include "paddle/pten/kernels/cuda/manipulation.h" -#include "paddle/pten/kernels/cuda/utils.h" +#include "paddle/pten/kernels/gpu/manipulation.h" +#include "paddle/pten/kernels/gpu/utils.h" #include "paddle/pten/kernels/hybird/cuda/cast_kernel_impl.h" #include "paddle/pten/kernels/hybird/general/manipulation.h" namespace pten { template -void Flatten(const CUDAContext& dev_ctx, +void Flatten(const GPUContext& dev_ctx, const DenseTensor& x, int start_axis, int stop_axis, @@ -36,7 +36,7 @@ void Flatten(const CUDAContext& dev_ctx, // Output Tensor, // is there a more flexible way to deal with this case? template -void FlattenWithXShape(const CUDAContext& dev_ctx, +void FlattenWithXShape(const GPUContext& dev_ctx, const DenseTensor& x, int start_axis, int stop_axis, @@ -46,7 +46,7 @@ void FlattenWithXShape(const CUDAContext& dev_ctx, general::SetXShape(x, xshape); } -void Reshape(const CUDAContext& dev_ctx, +void Reshape(const GPUContext& dev_ctx, const DenseTensor& x, const ScalarArray& shape, DenseTensor* out) { @@ -60,7 +60,7 @@ void Reshape(const CUDAContext& dev_ctx, out->ResetLoD(x.lod()); } -void ReshapeWithXShape(const CUDAContext& dev_ctx, +void ReshapeWithXShape(const GPUContext& dev_ctx, const DenseTensor& x, const ScalarArray& shape, DenseTensor* xshape, @@ -70,7 +70,7 @@ void ReshapeWithXShape(const CUDAContext& dev_ctx, } template -void Cast(const CUDAContext& dev_ctx, +void Cast(const GPUContext& dev_ctx, const DenseTensor& x, DataType out_dtype, DataType in_dtype, @@ -85,7 +85,7 @@ void Cast(const CUDAContext& dev_ctx, using float16 = paddle::platform::float16; PT_REGISTER_KERNEL(flatten, - CUDA, + GPU, ALL_LAYOUT, pten::Flatten, float, @@ -96,7 +96,7 @@ PT_REGISTER_KERNEL(flatten, int, int64_t) {} PT_REGISTER_KERNEL(flatten_with_xshape, - CUDA, + GPU, ALL_LAYOUT, pten::FlattenWithXShape, float, @@ -108,7 +108,7 @@ PT_REGISTER_KERNEL(flatten_with_xshape, #define PTEN_REGISTER_CAST_CUDA_BASE_TYPE(op_name, ...) \ PT_REGISTER_KERNEL(cast, \ - CUDA, \ + GPU, \ ALL_LAYOUT, \ pten::Cast, \ float, \ @@ -132,6 +132,6 @@ PTEN_REGISTER_CAST_CUDA_BASE_TYPE(cast, paddle::platform::bfloat16) PTEN_REGISTER_CAST_CUDA_BASE_TYPE(cast) #endif -PT_REGISTER_NO_TEMPLATE_KERNEL(reshape, CUDA, ANY, pten::Reshape, ALL_DTYPE) {} +PT_REGISTER_NO_TEMPLATE_KERNEL(reshape, GPU, ANY, pten::Reshape, ALL_DTYPE) {} PT_REGISTER_NO_TEMPLATE_KERNEL( - reshape_with_xshape, CUDA, ANY, pten::ReshapeWithXShape, ALL_DTYPE) {} + reshape_with_xshape, GPU, ANY, pten::ReshapeWithXShape, ALL_DTYPE) {} diff --git a/paddle/pten/kernels/cuda/manipulation.h b/paddle/pten/kernels/gpu/manipulation.h similarity index 86% rename from paddle/pten/kernels/cuda/manipulation.h rename to paddle/pten/kernels/gpu/manipulation.h index 165b08ad59..b47fadd70b 100644 --- a/paddle/pten/kernels/cuda/manipulation.h +++ b/paddle/pten/kernels/gpu/manipulation.h @@ -17,7 +17,7 @@ // CUDA and HIP use same api #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -#include "paddle/pten/backends/cuda/cuda_context.h" +#include "paddle/pten/backends/gpu/gpu_context.h" #include "paddle/pten/common/scalar_array.h" #include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/kernel_registry.h" @@ -25,25 +25,25 @@ namespace pten { template -void Flatten(const CUDAContext& dev_ctx, +void Flatten(const GPUContext& dev_ctx, const DenseTensor& x, int start_axis, int stop_axis, DenseTensor* out); template -void Cast(const CUDAContext& dev_ctx, +void Cast(const GPUContext& dev_ctx, const DenseTensor& x, DataType out_dtype, DataType in_dtype, DenseTensor* out); -void Reshape(const CUDAContext& dev_ctx, +void Reshape(const GPUContext& dev_ctx, const DenseTensor& x, const ScalarArray& shape, DenseTensor* out); -void ReshapeWithXShape(const CUDAContext& dev_ctx, +void ReshapeWithXShape(const GPUContext& dev_ctx, const DenseTensor& x, const ScalarArray& shape, DenseTensor* xshape, diff --git a/paddle/pten/kernels/cuda/math.cu b/paddle/pten/kernels/gpu/math.cu similarity index 89% rename from paddle/pten/kernels/cuda/math.cu rename to paddle/pten/kernels/gpu/math.cu index 3dacc01e8b..59d816d237 100644 --- a/paddle/pten/kernels/cuda/math.cu +++ b/paddle/pten/kernels/gpu/math.cu @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/pten/kernels/cuda/math.h" +#include "paddle/pten/kernels/gpu/math.h" #include "paddle/fluid/operators/reduce_ops/reduce_functor_op.h" #include "paddle/pten/kernels/hybird/cuda/elementwise/elementwise.h" @@ -58,12 +58,12 @@ struct DivideFunctor { */ template -void Sign(const CUDAContext& dev_ctx, const DenseTensor& x, DenseTensor* out) { - eigen::Sign(dev_ctx, x, out); +void Sign(const GPUContext& dev_ctx, const DenseTensor& x, DenseTensor* out) { + eigen::Sign(dev_ctx, x, out); } template -void Mean(const CUDAContext& dev_ctx, +void Mean(const GPUContext& dev_ctx, const DenseTensor& x, const std::vector& dims, bool keep_dim, @@ -84,7 +84,7 @@ DEFINE_CUDA_ELEMENTWISE_OP(Multiply) DEFINE_CUDA_ELEMENTWISE_OP(Divide) template -void Sum(const CUDAContext& dev_ctx, +void Sum(const GPUContext& dev_ctx, const DenseTensor& x, const std::vector& dims, bool keep_dim, @@ -101,11 +101,10 @@ using float16 = paddle::platform::float16; using complex64 = ::paddle::platform::complex; using complex128 = ::paddle::platform::complex; -PT_REGISTER_KERNEL(sign, CUDA, ALL_LAYOUT, pten::Sign, float, double, float16) { -} -PT_REGISTER_KERNEL(mean, CUDA, ALL_LAYOUT, pten::Mean, float, double, bool) {} +PT_REGISTER_KERNEL(sign, GPU, ALL_LAYOUT, pten::Sign, float, double, float16) {} +PT_REGISTER_KERNEL(mean, GPU, ALL_LAYOUT, pten::Mean, float, double, bool) {} PT_REGISTER_KERNEL(add, - CUDA, + GPU, ALL_LAYOUT, pten::Add, float, @@ -116,7 +115,7 @@ PT_REGISTER_KERNEL(add, complex64, complex128) {} PT_REGISTER_KERNEL(subtract, - CUDA, + GPU, ALL_LAYOUT, pten::Subtract, float, @@ -127,7 +126,7 @@ PT_REGISTER_KERNEL(subtract, complex64, complex128) {} PT_REGISTER_KERNEL(divide, - CUDA, + GPU, ALL_LAYOUT, pten::Divide, float, @@ -138,7 +137,7 @@ PT_REGISTER_KERNEL(divide, complex64, complex128) {} PT_REGISTER_KERNEL(multiply, - CUDA, + GPU, ALL_LAYOUT, pten::Multiply, float, @@ -150,7 +149,7 @@ PT_REGISTER_KERNEL(multiply, complex64, complex128) {} PT_REGISTER_KERNEL(sum, - CUDA, + GPU, ALL_LAYOUT, pten::Sum, bool, diff --git a/paddle/pten/kernels/cuda/math.h b/paddle/pten/kernels/gpu/math.h similarity index 86% rename from paddle/pten/kernels/cuda/math.h rename to paddle/pten/kernels/gpu/math.h index 9cb379bcf7..5a872542fb 100644 --- a/paddle/pten/kernels/cuda/math.h +++ b/paddle/pten/kernels/gpu/math.h @@ -17,17 +17,17 @@ limitations under the License. */ // CUDA and HIP use same api #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -#include "paddle/pten/backends/cuda/cuda_context.h" +#include "paddle/pten/backends/gpu/gpu_context.h" #include "paddle/pten/common/scalar.h" #include "paddle/pten/core/dense_tensor.h" namespace pten { template -void Sign(const CUDAContext& dev_ctx, const DenseTensor& x, DenseTensor* out); +void Sign(const GPUContext& dev_ctx, const DenseTensor& x, DenseTensor* out); template -void Mean(const CUDAContext& dev_ctx, +void Mean(const GPUContext& dev_ctx, const DenseTensor& x, const std::vector& dims, bool keep_dim, @@ -35,35 +35,35 @@ void Mean(const CUDAContext& dev_ctx, DenseTensor* out); template -void Add(const CUDAContext& dev_ctx, +void Add(const GPUContext& dev_ctx, const DenseTensor& x, const DenseTensor& y, int axis, DenseTensor* out); template -void Subtract(const CUDAContext& dev_ctx, +void Subtract(const GPUContext& dev_ctx, const DenseTensor& x, const DenseTensor& y, int axis, DenseTensor* out); template -void Divide(const CUDAContext& dev_ctx, +void Divide(const GPUContext& dev_ctx, const DenseTensor& x, const DenseTensor& y, int axis, DenseTensor* out); template -void Multiply(const CUDAContext& dev_ctx, +void Multiply(const GPUContext& dev_ctx, const DenseTensor& x, const DenseTensor& y, int axis, DenseTensor* out); template -void Sum(const CUDAContext& dev_ctx, +void Sum(const GPUContext& dev_ctx, const DenseTensor& x, const std::vector& dims, bool keep_dim, @@ -75,7 +75,7 @@ void Sum(const CUDAContext& dev_ctx, #define DEFINE_CUDA_ELEMENTWISE_OP(name) \ template \ - void name(const CUDAContext& dev_ctx, \ + void name(const GPUContext& dev_ctx, \ const DenseTensor& x, \ const DenseTensor& y, \ int axis, \ diff --git a/paddle/pten/kernels/cuda/scale_kernel.cu b/paddle/pten/kernels/gpu/scale_kernel.cu similarity index 93% rename from paddle/pten/kernels/cuda/scale_kernel.cu rename to paddle/pten/kernels/gpu/scale_kernel.cu index 904976ae95..e67fd4cfdc 100644 --- a/paddle/pten/kernels/cuda/scale_kernel.cu +++ b/paddle/pten/kernels/gpu/scale_kernel.cu @@ -14,7 +14,7 @@ limitations under the License. */ #include "paddle/pten/kernels/scale_kernel.h" -#include "paddle/pten/backends/cuda/cuda_context.h" +#include "paddle/pten/backends/gpu/gpu_context.h" #include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/kernels/impl/scale_kernel_impl.h" @@ -22,7 +22,7 @@ limitations under the License. */ #include "paddle/fluid/platform/float16.h" PT_REGISTER_CTX_KERNEL(scale, - CUDA, + GPU, ALL_LAYOUT, pten::Scale, float, diff --git a/paddle/pten/kernels/cuda/utils.cu b/paddle/pten/kernels/gpu/utils.cu similarity index 98% rename from paddle/pten/kernels/cuda/utils.cu rename to paddle/pten/kernels/gpu/utils.cu index cf1407e720..4d080be11e 100644 --- a/paddle/pten/kernels/cuda/utils.cu +++ b/paddle/pten/kernels/gpu/utils.cu @@ -16,11 +16,11 @@ limitations under the License. */ #include "paddle/pten/common/data_type.h" #include "paddle/pten/core/convert_utils.h" #include "paddle/pten/core/kernel_registry.h" -#include "paddle/pten/kernels/cuda/utils.h" +#include "paddle/pten/kernels/gpu/utils.h" namespace pten { -void Copy(const CUDAContext& dev_ctx, +void Copy(const GPUContext& dev_ctx, const DenseTensor& src, bool blocking, DenseTensor* dst) { @@ -234,4 +234,4 @@ void Copy(const CUDAContext& dev_ctx, } } // namespace pten -PT_REGISTER_NO_TEMPLATE_KERNEL(copy, CUDA, ALL_LAYOUT, pten::Copy, ALL_DTYPE) {} +PT_REGISTER_NO_TEMPLATE_KERNEL(copy, GPU, ALL_LAYOUT, pten::Copy, ALL_DTYPE) {} diff --git a/paddle/pten/kernels/cuda/utils.h b/paddle/pten/kernels/gpu/utils.h similarity index 91% rename from paddle/pten/kernels/cuda/utils.h rename to paddle/pten/kernels/gpu/utils.h index bd29347e31..3a455ad70c 100644 --- a/paddle/pten/kernels/cuda/utils.h +++ b/paddle/pten/kernels/gpu/utils.h @@ -17,13 +17,13 @@ limitations under the License. */ // CUDA and HIP use same api #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -#include "paddle/pten/backends/cuda/cuda_context.h" +#include "paddle/pten/backends/gpu/gpu_context.h" #include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/kernel_registry.h" namespace pten { -void Copy(const CUDAContext& dev_ctx, +void Copy(const GPUContext& dev_ctx, const DenseTensor& src, bool blocking, DenseTensor* dst); diff --git a/paddle/pten/kernels/hybird/CMakeLists.txt b/paddle/pten/kernels/hybird/CMakeLists.txt index 9d4d86f067..1304aa1798 100644 --- a/paddle/pten/kernels/hybird/CMakeLists.txt +++ b/paddle/pten/kernels/hybird/CMakeLists.txt @@ -4,7 +4,7 @@ add_subdirectory(general) cc_library(pten_transpose_cpu SRCS transpose.cc DEPS dense_tensor pten_context) if(WITH_GPU) - nv_library(pten_transpose_cuda SRCS transpose.cu DEPS dense_tensor malloc pten_context) + nv_library(pten_transpose_gpu SRCS transpose.cu DEPS dense_tensor malloc pten_context) elseif(WITH_ROCM) - hip_library(pten_transpose_cuda SRCS transpose.cu DEPS dense_tensor malloc pten_context) + hip_library(pten_transpose_gpu SRCS transpose.cu DEPS dense_tensor malloc pten_context) endif() diff --git a/paddle/pten/kernels/hybird/cuda/cast_kernel_impl.h b/paddle/pten/kernels/hybird/cuda/cast_kernel_impl.h index 54a584d78d..d8c58448c9 100644 --- a/paddle/pten/kernels/hybird/cuda/cast_kernel_impl.h +++ b/paddle/pten/kernels/hybird/cuda/cast_kernel_impl.h @@ -15,7 +15,7 @@ #pragma once #include "paddle/fluid/platform/device/gpu/gpu_helper.h" #include "paddle/fluid/platform/float16.h" -#include "paddle/pten/backends/cuda/cuda_context.h" +#include "paddle/pten/backends/gpu/gpu_context.h" #include "paddle/pten/core/dense_tensor.h" #include "paddle/fluid/platform/aligned_vector.h" @@ -50,7 +50,7 @@ __global__ void CastCUDAKernel(const InT* in, const int64_t N, OutT* out) { } template -void CastCUDAKernelImpl(const CUDAContext& dev_ctx, +void CastCUDAKernelImpl(const GPUContext& dev_ctx, const DenseTensor& x, DenseTensor* out) { auto* in_data = x.data(); diff --git a/paddle/pten/kernels/hybird/cuda/reduce/reduce.h b/paddle/pten/kernels/hybird/cuda/reduce/reduce.h index f55d483de1..793e8505ec 100644 --- a/paddle/pten/kernels/hybird/cuda/reduce/reduce.h +++ b/paddle/pten/kernels/hybird/cuda/reduce/reduce.h @@ -17,7 +17,7 @@ // CUDA and HIP use same api #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -#include "paddle/pten/backends/cuda/cuda_context.h" +#include "paddle/pten/backends/gpu/gpu_context.h" #include "paddle/pten/common/scalar.h" #include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h" @@ -49,7 +49,7 @@ static inline std::vector GetReduceDim( } template class ReduceFunctor> -void Reduce(const CUDAContext& dev_ctx, +void Reduce(const GPUContext& dev_ctx, const DenseTensor& x, bool reduce_all, const std::vector& dims, diff --git a/paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h b/paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h index e7aecf3b27..bdb883c1df 100644 --- a/paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h +++ b/paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h @@ -42,7 +42,7 @@ namespace cub = hipcub; #include "paddle/fluid/operators/kernel_primitives/compute_primitives.h" #include "paddle/pten/api/ext/dispatch.h" #include "paddle/pten/api/include/tensor.h" -#include "paddle/pten/kernels/cuda/utils.h" +#include "paddle/pten/kernels/gpu/utils.h" #include "paddle/pten/kernels/hybird/math/cast_func.h" // Reduce split or not, Whether to use ReduceHigherDim @@ -820,7 +820,7 @@ void TensorReduceFunctorImpl(const pten::DenseTensor& x, y->Resize(out_dims); } else { PD_VISIT_ALL_TYPES(y->dtype(), "CastKernelImpl", ([&] { - pten::math::CastKernelImpl( + pten::math::CastKernelImpl( *dev_ctx, x, y); })); } diff --git a/paddle/pten/kernels/hybird/general/elementwise_base.h b/paddle/pten/kernels/hybird/general/elementwise_base.h index 827af86812..20154a8744 100644 --- a/paddle/pten/kernels/hybird/general/elementwise_base.h +++ b/paddle/pten/kernels/hybird/general/elementwise_base.h @@ -132,11 +132,11 @@ class MidWiseTransformIterator #if defined(__NVCC__) || defined(__HIPCC__) template -class RowwiseTransformIterator - : public thrust::iterator_adaptor, +class RowwiseTransformIterator + : public thrust::iterator_adaptor, const T *> { public: - typedef thrust::iterator_adaptor, + typedef thrust::iterator_adaptor, const T *> super_t; HOSTDEVICE RowwiseTransformIterator(const T *x, int n) @@ -152,11 +152,11 @@ class RowwiseTransformIterator }; template -class MidWiseTransformIterator - : public thrust::iterator_adaptor, +class MidWiseTransformIterator + : public thrust::iterator_adaptor, const T *> { public: - typedef thrust::iterator_adaptor, + typedef thrust::iterator_adaptor, const T *> super_t; HOSTDEVICE MidWiseTransformIterator(const T *x, int n, int post) diff --git a/paddle/pten/kernels/hybird/transpose.cu b/paddle/pten/kernels/hybird/transpose.cu index 5c5bd29117..195277c216 100644 --- a/paddle/pten/kernels/hybird/transpose.cu +++ b/paddle/pten/kernels/hybird/transpose.cu @@ -14,7 +14,7 @@ #include "paddle/fluid/framework/ddim.h" #include "paddle/fluid/memory/memcpy.h" -#include "paddle/pten/backends/cuda/cuda_context.h" +#include "paddle/pten/backends/gpu/gpu_context.h" #include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/kernels/hybird/math/cast_func.h" #include "paddle/pten/kernels/hybird/transpose.h" @@ -52,9 +52,9 @@ __global__ void TransposeNormalKernel(const T* in_ptr, } template -struct TransposeNormal { +struct TransposeNormal { // for dims >= 7 situation - void operator()(const CUDAContext& dev_ctx, + void operator()(const GPUContext& dev_ctx, const pten::DenseTensor& in, pten::DenseTensor* out, const std::vector& axis) { @@ -106,7 +106,7 @@ struct TransposeNormal { // define transpose normal #define DEFINE_GPU_TRANS_NORMAL(TYPE) \ - template struct TransposeNormal + template struct TransposeNormal DEFINE_GPU_TRANS_NORMAL(bool); DEFINE_GPU_TRANS_NORMAL(int8_t); diff --git a/paddle/pten/tests/api/scale_api.h b/paddle/pten/tests/api/scale_api.h index 1defbd02dd..d525b305c7 100644 --- a/paddle/pten/tests/api/scale_api.h +++ b/paddle/pten/tests/api/scale_api.h @@ -142,13 +142,13 @@ static void ScaleCPU(DataType kernel_dtype, } #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -static void ScaleCUDA(DataType kernel_dtype, - const pten::CUDAContext& dev_ctx, - const pten::DenseTensor& x, - const Scalar& scale, - float bias, - bool bias_after_scale, - pten::DenseTensor* dense_out) { +static void ScaleGPU(DataType kernel_dtype, + const pten::GPUContext& dev_ctx, + const pten::DenseTensor& x, + const Scalar& scale, + float bias, + bool bias_after_scale, + pten::DenseTensor* dense_out) { switch (kernel_dtype) { case pten::DataType::FLOAT64: { pten::Scale( @@ -255,14 +255,14 @@ Tensor scale_switch_case(const Tensor& x, dense_out.get()); break; #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - case Backend::CUDA: - ScaleCUDA(kernel_data_type, - static_cast(*dev_ctx), - *dense_x, - scale, - bias, - bias_after_scale, - dense_out.get()); + case Backend::GPU: + ScaleGPU(kernel_data_type, + static_cast(*dev_ctx), + *dense_x, + scale, + bias, + bias_after_scale, + dense_out.get()); break; #endif default: diff --git a/paddle/pten/tests/api/test_matmul_api.cc b/paddle/pten/tests/api/test_matmul_api.cc index 01ca4aad64..e29fa11d58 100644 --- a/paddle/pten/tests/api/test_matmul_api.cc +++ b/paddle/pten/tests/api/test_matmul_api.cc @@ -20,7 +20,7 @@ limitations under the License. */ #include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/kernel_registry.h" -#include "paddle/pten/kernels/cuda/utils.h" +#include "paddle/pten/kernels/gpu/utils.h" namespace paddle { namespace tests { diff --git a/paddle/pten/tests/api/test_tensor_utils.cc b/paddle/pten/tests/api/test_tensor_utils.cc index c445e18679..b59cee5dc7 100644 --- a/paddle/pten/tests/api/test_tensor_utils.cc +++ b/paddle/pten/tests/api/test_tensor_utils.cc @@ -110,7 +110,7 @@ TEST(PtenUtils, VarToPtTensor) { pten::Backend expect_backend = pten::Backend::CPU; #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - expect_backend = pten::Backend::CUDA; + expect_backend = pten::Backend::GPU; #endif auto tensor_def = pten::TensorArgDef( expect_backend, pten::DataLayout::NCHW, pten::DataType::INT32); diff --git a/paddle/pten/tests/api/test_to_api.cc b/paddle/pten/tests/api/test_to_api.cc index 5bb6f386c7..47e8ff7c2c 100644 --- a/paddle/pten/tests/api/test_to_api.cc +++ b/paddle/pten/tests/api/test_to_api.cc @@ -64,7 +64,7 @@ TEST(API, copy_to) { // 2. test API #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - auto tmp = paddle::experimental::copy_to(x, pten::Backend::CUDA, false); + auto tmp = paddle::experimental::copy_to(x, pten::Backend::GPU, false); auto out = paddle::experimental::copy_to(tmp, pten::Backend::CPU, true); #else auto out = paddle::experimental::copy_to(x, pten::Backend::CPU, false); @@ -80,7 +80,7 @@ TEST(Tensor, copy_to) { // 2. test API #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - auto tmp = x.copy_to(pten::Backend::CUDA, false); + auto tmp = x.copy_to(pten::Backend::GPU, false); auto out = tmp.copy_to(pten::Backend::CPU, true); #else auto out = x.copy_to(pten::Backend::CPU, false); diff --git a/paddle/pten/tests/common/test_backend.cc b/paddle/pten/tests/common/test_backend.cc index 148785d4b6..8b006ee0aa 100644 --- a/paddle/pten/tests/common/test_backend.cc +++ b/paddle/pten/tests/common/test_backend.cc @@ -29,8 +29,8 @@ TEST(Backend, OStream) { oss << pten::Backend::CPU; EXPECT_EQ(oss.str(), "CPU"); oss.str(""); - oss << pten::Backend::CUDA; - EXPECT_EQ(oss.str(), "CUDA"); + oss << pten::Backend::GPU; + EXPECT_EQ(oss.str(), "GPU"); oss.str(""); oss << pten::Backend::XPU; EXPECT_EQ(oss.str(), "XPU"); -- GitLab