未验证 提交 dc7597e3 编写于 作者: C Chen Weihang 提交者: GitHub

[PTen] Rename cuda dir and context to gpu (#38296)

* rename cuda to gpu

* revert CMake change

* resolve conflit

* rename other cuda to gpu

* poish details
上级 aff43684
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
#include "paddle/pten/api/lib/utils/tensor_utils.h" #include "paddle/pten/api/lib/utils/tensor_utils.h"
#include "paddle/pten/include/core.h" #include "paddle/pten/include/core.h"
#include "paddle/pten/kernels/cpu/conj_kernel.h" #include "paddle/pten/kernels/cpu/conj_kernel.h"
#include "paddle/pten/kernels/cuda/conj_kernel.h" #include "paddle/pten/kernels/gpu/conj_kernel.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -27,7 +27,7 @@ set(PTEN_DEPS convert_utils dense_tensor pten_context kernel_factory kernel_cont ...@@ -27,7 +27,7 @@ set(PTEN_DEPS convert_utils dense_tensor pten_context kernel_factory kernel_cont
set(PTEN_DEPS ${PTEN_DEPS} math_cpu linalg_cpu manipulation_cpu conj_kernel_cpu scale_kernel_cpu full_kernel_cpu) set(PTEN_DEPS ${PTEN_DEPS} math_cpu linalg_cpu manipulation_cpu conj_kernel_cpu scale_kernel_cpu full_kernel_cpu)
set(PTEN_DEPS ${PTEN_DEPS} nary unary binary) set(PTEN_DEPS ${PTEN_DEPS} nary unary binary)
if(WITH_GPU OR WITH_ROCM) if(WITH_GPU OR WITH_ROCM)
set(PTEN_DEPS ${PTEN_DEPS} math_cuda linalg_cuda manipulation_cuda conj_kernel_cuda scale_kernel_cuda full_kernel_cuda) set(PTEN_DEPS ${PTEN_DEPS} math_gpu linalg_gpu manipulation_gpu conj_kernel_gpu scale_kernel_gpu full_kernel_gpu)
endif() endif()
if(WITH_XPU) if(WITH_XPU)
set(PTEN_DEPS ${PTEN_DEPS} manipulation_xpu) set(PTEN_DEPS ${PTEN_DEPS} manipulation_xpu)
......
...@@ -464,7 +464,7 @@ class PADDLE_API Tensor final { ...@@ -464,7 +464,7 @@ class PADDLE_API Tensor final {
* unified to Tensor, but Tensor itself is heterogeneous. * unified to Tensor, but Tensor itself is heterogeneous.
* *
* Tensor can generally be represented by void* and size_t, place. * Tensor can generally be represented by void* and size_t, place.
* This is suitable for most scenarios including CPU, CUDA, HIP, CPU, etc., * This is suitable for most scenarios including CPU, GPU, HIP, CPU, etc.,
* but there are a few cases where this definition cannot be described, * but there are a few cases where this definition cannot be described,
* such as the Tensor representation in third-party lib such as Metal, * such as the Tensor representation in third-party lib such as Metal,
* OpenCL, etc., as well as some special Tensor implementations, including * OpenCL, etc., as well as some special Tensor implementations, including
......
...@@ -56,7 +56,7 @@ Backend ConvertExtPlaceToBackend(PlaceType p) { ...@@ -56,7 +56,7 @@ Backend ConvertExtPlaceToBackend(PlaceType p) {
return Backend::CPU; return Backend::CPU;
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
case PlaceType::kGPU: case PlaceType::kGPU:
return Backend::CUDA; return Backend::GPU;
#endif #endif
default: default:
PADDLE_THROW( PADDLE_THROW(
......
...@@ -28,12 +28,12 @@ PT_DECLARE_KERNEL(scale, CPU, ALL_LAYOUT); ...@@ -28,12 +28,12 @@ PT_DECLARE_KERNEL(scale, CPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(conj, CPU, ALL_LAYOUT); PT_DECLARE_KERNEL(conj, CPU, ALL_LAYOUT);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PT_DECLARE_KERNEL(full_like, CUDA, ALL_LAYOUT); PT_DECLARE_KERNEL(full_like, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(dot, CUDA, ALL_LAYOUT); PT_DECLARE_KERNEL(dot, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(flatten, CUDA, ALL_LAYOUT); PT_DECLARE_KERNEL(flatten, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(sign, CUDA, ALL_LAYOUT); PT_DECLARE_KERNEL(sign, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(scale, CUDA, ALL_LAYOUT); PT_DECLARE_KERNEL(scale, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(conj, CUDA, ALL_LAYOUT); PT_DECLARE_KERNEL(conj, GPU, ALL_LAYOUT);
#endif #endif
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
......
...@@ -28,7 +28,7 @@ limitations under the License. */ ...@@ -28,7 +28,7 @@ limitations under the License. */
PT_DECLARE_KERNEL(copy, CPU, ALL_LAYOUT); PT_DECLARE_KERNEL(copy, CPU, ALL_LAYOUT);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PT_DECLARE_KERNEL(copy, CUDA, ALL_LAYOUT); PT_DECLARE_KERNEL(copy, GPU, ALL_LAYOUT);
#endif #endif
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
......
...@@ -21,7 +21,7 @@ limitations under the License. */ ...@@ -21,7 +21,7 @@ limitations under the License. */
// path replacement after implementing pten DeviceContext // path replacement after implementing pten DeviceContext
#include "paddle/pten/backends/cpu/cpu_context.h" #include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/backends/cuda/cuda_context.h" #include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/backends/npu/npu_context.h" #include "paddle/pten/backends/npu/npu_context.h"
#include "paddle/pten/backends/xpu/xpu_context.h" #include "paddle/pten/backends/xpu/xpu_context.h"
......
...@@ -20,7 +20,7 @@ limitations under the License. */ ...@@ -20,7 +20,7 @@ limitations under the License. */
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
namespace pten { namespace pten {
using CUDAContext = paddle::platform::CUDADeviceContext; using GPUContext = paddle::platform::CUDADeviceContext;
} // namespace pten } // namespace pten
#endif #endif
...@@ -43,7 +43,7 @@ enum class Backend : uint8_t { ...@@ -43,7 +43,7 @@ enum class Backend : uint8_t {
CPU, CPU,
// various acceleration devices' backends // various acceleration devices' backends
CUDA, GPU,
XPU, // XPU currently does not exist at the same time as CUDA XPU, // XPU currently does not exist at the same time as CUDA
NPU, // NPU currently does not exist at the same time as CUDA NPU, // NPU currently does not exist at the same time as CUDA
...@@ -99,8 +99,8 @@ inline std::ostream& operator<<(std::ostream& os, Backend backend) { ...@@ -99,8 +99,8 @@ inline std::ostream& operator<<(std::ostream& os, Backend backend) {
case Backend::CPU: case Backend::CPU:
os << "CPU"; os << "CPU";
break; break;
case Backend::CUDA: case Backend::GPU:
os << "CUDA"; os << "GPU";
break; break;
case Backend::XPU: case Backend::XPU:
os << "XPU"; os << "XPU";
......
...@@ -23,7 +23,7 @@ Backend TransToPtenBackend(const paddle::platform::Place& place) { ...@@ -23,7 +23,7 @@ Backend TransToPtenBackend(const paddle::platform::Place& place) {
if (paddle::platform::is_cpu_place(place)) { if (paddle::platform::is_cpu_place(place)) {
return Backend::CPU; return Backend::CPU;
} else if (paddle::platform::is_gpu_place(place)) { } else if (paddle::platform::is_gpu_place(place)) {
return Backend::CUDA; return Backend::GPU;
} else { } else {
return Backend::UNDEFINED; return Backend::UNDEFINED;
} }
...@@ -84,7 +84,7 @@ paddle::platform::Place TransToFluidPlace(const Backend& backend) { ...@@ -84,7 +84,7 @@ paddle::platform::Place TransToFluidPlace(const Backend& backend) {
case pten::Backend::CPU: case pten::Backend::CPU:
return paddle::platform::CPUPlace(); return paddle::platform::CPUPlace();
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
case pten::Backend::CUDA: case pten::Backend::GPU:
return paddle::platform::CUDAPlace( return paddle::platform::CUDAPlace(
paddle::platform::GetCurrentDeviceId()); paddle::platform::GetCurrentDeviceId());
#endif #endif
......
...@@ -57,7 +57,7 @@ struct KernelArgsParseFunctor<Return_ (*)(Args_...)> { ...@@ -57,7 +57,7 @@ struct KernelArgsParseFunctor<Return_ (*)(Args_...)> {
if (arg_type == std::type_index(typeid(const CPUContext&)) if (arg_type == std::type_index(typeid(const CPUContext&))
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
|| ||
arg_type == std::type_index(typeid(const CUDAContext&))) { arg_type == std::type_index(typeid(const GPUContext&))) {
#else #else
) { ) {
#endif #endif
......
...@@ -181,7 +181,7 @@ struct KernelImpl<Return (*)(DevCtx, Args...), kernel_fn> { ...@@ -181,7 +181,7 @@ struct KernelImpl<Return (*)(DevCtx, Args...), kernel_fn> {
PT_SPECIALIZE_KernelCallHelper_FOR_DEVICE_CONTEXT(CPUContext); PT_SPECIALIZE_KernelCallHelper_FOR_DEVICE_CONTEXT(CPUContext);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PT_SPECIALIZE_KernelCallHelper_FOR_DEVICE_CONTEXT(CUDAContext); PT_SPECIALIZE_KernelCallHelper_FOR_DEVICE_CONTEXT(GPUContext);
#endif #endif
#ifdef PADDLE_WITH_ASCEND_CL #ifdef PADDLE_WITH_ASCEND_CL
PT_SPECIALIZE_KernelCallHelper_FOR_DEVICE_CONTEXT(NPUContext); PT_SPECIALIZE_KernelCallHelper_FOR_DEVICE_CONTEXT(NPUContext);
......
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
#include "paddle/pten/api/lib/utils/storage.h" #include "paddle/pten/api/lib/utils/storage.h"
#include "paddle/pten/include/infermeta.h" #include "paddle/pten/include/infermeta.h"
#include "paddle/pten/kernels/cpu/linalg.h" #include "paddle/pten/kernels/cpu/linalg.h"
#include "paddle/pten/kernels/cuda/linalg.h" #include "paddle/pten/kernels/gpu/linalg.h"
namespace pten { namespace pten {
......
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
#include "paddle/pten/api/lib/utils/storage.h" #include "paddle/pten/api/lib/utils/storage.h"
#include "paddle/pten/include/infermeta.h" #include "paddle/pten/include/infermeta.h"
#include "paddle/pten/kernels/cpu/manipulation.h" #include "paddle/pten/kernels/cpu/manipulation.h"
#include "paddle/pten/kernels/cuda/manipulation.h" #include "paddle/pten/kernels/gpu/manipulation.h"
#include "paddle/pten/kernels/xpu/manipulation.h" #include "paddle/pten/kernels/xpu/manipulation.h"
namespace pten { namespace pten {
......
...@@ -19,8 +19,8 @@ limitations under the License. */ ...@@ -19,8 +19,8 @@ limitations under the License. */
#include "paddle/pten/include/infermeta.h" #include "paddle/pten/include/infermeta.h"
#include "paddle/pten/kernels/cpu/conj_kernel.h" #include "paddle/pten/kernels/cpu/conj_kernel.h"
#include "paddle/pten/kernels/cpu/math.h" #include "paddle/pten/kernels/cpu/math.h"
#include "paddle/pten/kernels/cuda/conj_kernel.h" #include "paddle/pten/kernels/gpu/conj_kernel.h"
#include "paddle/pten/kernels/cuda/math.h" #include "paddle/pten/kernels/gpu/math.h"
#include "paddle/pten/kernels/scale_kernel.h" #include "paddle/pten/kernels/scale_kernel.h"
namespace pten { namespace pten {
......
...@@ -5,8 +5,7 @@ add_subdirectory(hybird) ...@@ -5,8 +5,7 @@ add_subdirectory(hybird)
add_subdirectory(cpu) add_subdirectory(cpu)
if(WITH_GPU OR WITH_ROCM) if(WITH_GPU OR WITH_ROCM)
# NOTE(chenweihang): if hip can split from cuda impl, we should add hip dir add_subdirectory(gpu)
add_subdirectory(cuda)
endif() endif()
if(WITH_MKLDNN) if(WITH_MKLDNN)
# mkldnn will be deprecated and use the new name dnnl # mkldnn will be deprecated and use the new name dnnl
......
if(WITH_GPU)
nv_library(math_cuda SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_cuda)
nv_library(linalg_cuda SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
nv_library(utils_cuda SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
nv_library(manipulation_cuda SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_cuda unary)
nv_library(scale_kernel_cuda SRCS scale_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function)
nv_library(full_kernel_cuda SRCS full_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function)
nv_library(conj_kernel_cuda SRCS conj_kernel.cu DEPS dense_tensor kernel_context kernel_factory)
elseif(WITH_ROCM)
hip_library(math_cuda SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_cuda)
hip_library(linalg_cuda SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
hip_library(utils_cuda SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
hip_library(manipulation_cuda SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_cuda unary)
hip_library(scale_kernel_cuda SRCS scale_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function)
hip_library(full_kernel_cuda SRCS full_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function)
hip_library(conj_kernel_cuda SRCS conj_kernel.cu DEPS dense_tensor kernel_context kernel_factory)
endif()
if(WITH_GPU)
nv_library(math_gpu SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_gpu)
nv_library(linalg_gpu SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
nv_library(utils_gpu SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
nv_library(manipulation_gpu SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_gpu unary)
nv_library(scale_kernel_gpu SRCS scale_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function)
nv_library(full_kernel_gpu SRCS full_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function)
nv_library(conj_kernel_gpu SRCS conj_kernel.cu DEPS dense_tensor kernel_context kernel_factory)
elseif(WITH_ROCM)
hip_library(math_gpu SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_gpu)
hip_library(linalg_gpu SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
hip_library(utils_gpu SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
hip_library(manipulation_gpu SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_gpu unary)
hip_library(scale_kernel_gpu SRCS scale_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function)
hip_library(full_kernel_gpu SRCS full_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function)
hip_library(conj_kernel_gpu SRCS conj_kernel.cu DEPS dense_tensor kernel_context kernel_factory)
endif()
...@@ -12,23 +12,23 @@ ...@@ -12,23 +12,23 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/pten/kernels/cuda/conj_kernel.h" #include "paddle/pten/kernels/gpu/conj_kernel.h"
#include "paddle/pten/backends/cuda/cuda_context.h" #include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/hybird/math/conj_impl.h" #include "paddle/pten/kernels/hybird/math/conj_impl.h"
namespace pten { namespace pten {
template <typename T> template <typename T>
void Conj(const CUDAContext& dev_ctx, const DenseTensor& x, DenseTensor* out) { void Conj(const GPUContext& dev_ctx, const DenseTensor& x, DenseTensor* out) {
ConjImpl<T, CUDAContext>(dev_ctx, x, out); ConjImpl<T, GPUContext>(dev_ctx, x, out);
} }
} // namespace pten } // namespace pten
PT_REGISTER_KERNEL(conj, PT_REGISTER_KERNEL(conj,
CUDA, GPU,
ALL_LAYOUT, ALL_LAYOUT,
pten::Conj, pten::Conj,
paddle::platform::complex<float>, paddle::platform::complex<float>,
......
...@@ -17,13 +17,13 @@ limitations under the License. */ ...@@ -17,13 +17,13 @@ limitations under the License. */
// CUDA and HIP use same api // CUDA and HIP use same api
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/pten/backends/cuda/cuda_context.h" #include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
namespace pten { namespace pten {
template <typename T> template <typename T>
void Conj(const CUDAContext& dev_ctx, const DenseTensor& x, DenseTensor* out); void Conj(const GPUContext& dev_ctx, const DenseTensor& x, DenseTensor* out);
} // namespace pten } // namespace pten
......
...@@ -14,12 +14,12 @@ limitations under the License. */ ...@@ -14,12 +14,12 @@ limitations under the License. */
#include "paddle/pten/kernels/full_kernel.h" #include "paddle/pten/kernels/full_kernel.h"
#include "paddle/pten/backends/cuda/cuda_context.h" #include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/impl/full_kernel_impl.h" #include "paddle/pten/kernels/impl/full_kernel_impl.h"
PT_REGISTER_CTX_KERNEL(full, PT_REGISTER_CTX_KERNEL(full,
CUDA, GPU,
ALL_LAYOUT, ALL_LAYOUT,
pten::Full, pten::Full,
float, float,
...@@ -34,7 +34,7 @@ PT_REGISTER_CTX_KERNEL(full, ...@@ -34,7 +34,7 @@ PT_REGISTER_CTX_KERNEL(full,
paddle::platform::complex<double>) {} paddle::platform::complex<double>) {}
PT_REGISTER_CTX_KERNEL(full_like, PT_REGISTER_CTX_KERNEL(full_like,
CUDA, GPU,
ALL_LAYOUT, ALL_LAYOUT,
pten::FullLike, pten::FullLike,
float, float,
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/pten/kernels/cuda/linalg.h" #include "paddle/pten/kernels/gpu/linalg.h"
#include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/hybird/eigen/dot.h" #include "paddle/pten/kernels/hybird/eigen/dot.h"
...@@ -24,15 +24,15 @@ ...@@ -24,15 +24,15 @@
namespace pten { namespace pten {
template <typename T> template <typename T>
void Dot(const CUDAContext& dev_ctx, void Dot(const GPUContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& y, const DenseTensor& y,
DenseTensor* out) { DenseTensor* out) {
eigen::Dot<CUDAContext, T>(dev_ctx, x, y, out); eigen::Dot<GPUContext, T>(dev_ctx, x, y, out);
} }
template <typename T> template <typename T>
void Matmul(const CUDAContext& dev_ctx, void Matmul(const GPUContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& y, const DenseTensor& y,
bool transpose_x, bool transpose_x,
...@@ -48,7 +48,7 @@ void Matmul(const CUDAContext& dev_ctx, ...@@ -48,7 +48,7 @@ void Matmul(const CUDAContext& dev_ctx,
paddle::platform::errors::InvalidArgument( paddle::platform::errors::InvalidArgument(
"The Input(Y) dims size must not be equal 0," "The Input(Y) dims size must not be equal 0,"
" but reviced dims size is 0. ")); " but reviced dims size is 0. "));
math::MatMulFunction<CUDAContext, T>( math::MatMulFunction<GPUContext, T>(
dev_ctx, x, y, out, transpose_x, transpose_y); dev_ctx, x, y, out, transpose_x, transpose_y);
} }
...@@ -59,7 +59,7 @@ using complex64 = ::paddle::platform::complex<float>; ...@@ -59,7 +59,7 @@ using complex64 = ::paddle::platform::complex<float>;
using complex128 = ::paddle::platform::complex<double>; using complex128 = ::paddle::platform::complex<double>;
PT_REGISTER_KERNEL(dot, PT_REGISTER_KERNEL(dot,
CUDA, GPU,
ALL_LAYOUT, ALL_LAYOUT,
pten::Dot, pten::Dot,
float, float,
...@@ -70,7 +70,7 @@ PT_REGISTER_KERNEL(dot, ...@@ -70,7 +70,7 @@ PT_REGISTER_KERNEL(dot,
complex128) {} complex128) {}
PT_REGISTER_KERNEL(matmul, PT_REGISTER_KERNEL(matmul,
CUDA, GPU,
ALL_LAYOUT, ALL_LAYOUT,
pten::Matmul, pten::Matmul,
float, float,
......
...@@ -17,19 +17,19 @@ ...@@ -17,19 +17,19 @@
// CUDA and HIP use same api // CUDA and HIP use same api
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/pten/backends/cuda/cuda_context.h" #include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
namespace pten { namespace pten {
template <typename T> template <typename T>
void Dot(const CUDAContext& dev_ctx, void Dot(const GPUContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& y, const DenseTensor& y,
DenseTensor* out); DenseTensor* out);
template <typename T> template <typename T>
void Matmul(const CUDAContext& dev_ctx, void Matmul(const GPUContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& y, const DenseTensor& y,
bool transpose_x, bool transpose_x,
......
...@@ -14,15 +14,15 @@ ...@@ -14,15 +14,15 @@
#include "paddle/pten/api/ext/dispatch.h" #include "paddle/pten/api/ext/dispatch.h"
#include "paddle/pten/infermeta/unary.h" #include "paddle/pten/infermeta/unary.h"
#include "paddle/pten/kernels/cuda/manipulation.h" #include "paddle/pten/kernels/gpu/manipulation.h"
#include "paddle/pten/kernels/cuda/utils.h" #include "paddle/pten/kernels/gpu/utils.h"
#include "paddle/pten/kernels/hybird/cuda/cast_kernel_impl.h" #include "paddle/pten/kernels/hybird/cuda/cast_kernel_impl.h"
#include "paddle/pten/kernels/hybird/general/manipulation.h" #include "paddle/pten/kernels/hybird/general/manipulation.h"
namespace pten { namespace pten {
template <typename T> template <typename T>
void Flatten(const CUDAContext& dev_ctx, void Flatten(const GPUContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
int start_axis, int start_axis,
int stop_axis, int stop_axis,
...@@ -36,7 +36,7 @@ void Flatten(const CUDAContext& dev_ctx, ...@@ -36,7 +36,7 @@ void Flatten(const CUDAContext& dev_ctx,
// Output Tensor, // Output Tensor,
// is there a more flexible way to deal with this case? // is there a more flexible way to deal with this case?
template <typename T> template <typename T>
void FlattenWithXShape(const CUDAContext& dev_ctx, void FlattenWithXShape(const GPUContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
int start_axis, int start_axis,
int stop_axis, int stop_axis,
...@@ -46,7 +46,7 @@ void FlattenWithXShape(const CUDAContext& dev_ctx, ...@@ -46,7 +46,7 @@ void FlattenWithXShape(const CUDAContext& dev_ctx,
general::SetXShape(x, xshape); general::SetXShape(x, xshape);
} }
void Reshape(const CUDAContext& dev_ctx, void Reshape(const GPUContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const ScalarArray& shape, const ScalarArray& shape,
DenseTensor* out) { DenseTensor* out) {
...@@ -60,7 +60,7 @@ void Reshape(const CUDAContext& dev_ctx, ...@@ -60,7 +60,7 @@ void Reshape(const CUDAContext& dev_ctx,
out->ResetLoD(x.lod()); out->ResetLoD(x.lod());
} }
void ReshapeWithXShape(const CUDAContext& dev_ctx, void ReshapeWithXShape(const GPUContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const ScalarArray& shape, const ScalarArray& shape,
DenseTensor* xshape, DenseTensor* xshape,
...@@ -70,7 +70,7 @@ void ReshapeWithXShape(const CUDAContext& dev_ctx, ...@@ -70,7 +70,7 @@ void ReshapeWithXShape(const CUDAContext& dev_ctx,
} }
template <typename T> template <typename T>
void Cast(const CUDAContext& dev_ctx, void Cast(const GPUContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
DataType out_dtype, DataType out_dtype,
DataType in_dtype, DataType in_dtype,
...@@ -85,7 +85,7 @@ void Cast(const CUDAContext& dev_ctx, ...@@ -85,7 +85,7 @@ void Cast(const CUDAContext& dev_ctx,
using float16 = paddle::platform::float16; using float16 = paddle::platform::float16;
PT_REGISTER_KERNEL(flatten, PT_REGISTER_KERNEL(flatten,
CUDA, GPU,
ALL_LAYOUT, ALL_LAYOUT,
pten::Flatten, pten::Flatten,
float, float,
...@@ -96,7 +96,7 @@ PT_REGISTER_KERNEL(flatten, ...@@ -96,7 +96,7 @@ PT_REGISTER_KERNEL(flatten,
int, int,
int64_t) {} int64_t) {}
PT_REGISTER_KERNEL(flatten_with_xshape, PT_REGISTER_KERNEL(flatten_with_xshape,
CUDA, GPU,
ALL_LAYOUT, ALL_LAYOUT,
pten::FlattenWithXShape, pten::FlattenWithXShape,
float, float,
...@@ -108,7 +108,7 @@ PT_REGISTER_KERNEL(flatten_with_xshape, ...@@ -108,7 +108,7 @@ PT_REGISTER_KERNEL(flatten_with_xshape,
#define PTEN_REGISTER_CAST_CUDA_BASE_TYPE(op_name, ...) \ #define PTEN_REGISTER_CAST_CUDA_BASE_TYPE(op_name, ...) \
PT_REGISTER_KERNEL(cast, \ PT_REGISTER_KERNEL(cast, \
CUDA, \ GPU, \
ALL_LAYOUT, \ ALL_LAYOUT, \
pten::Cast, \ pten::Cast, \
float, \ float, \
...@@ -132,6 +132,6 @@ PTEN_REGISTER_CAST_CUDA_BASE_TYPE(cast, paddle::platform::bfloat16) ...@@ -132,6 +132,6 @@ PTEN_REGISTER_CAST_CUDA_BASE_TYPE(cast, paddle::platform::bfloat16)
PTEN_REGISTER_CAST_CUDA_BASE_TYPE(cast) PTEN_REGISTER_CAST_CUDA_BASE_TYPE(cast)
#endif #endif
PT_REGISTER_NO_TEMPLATE_KERNEL(reshape, CUDA, ANY, pten::Reshape, ALL_DTYPE) {} PT_REGISTER_NO_TEMPLATE_KERNEL(reshape, GPU, ANY, pten::Reshape, ALL_DTYPE) {}
PT_REGISTER_NO_TEMPLATE_KERNEL( PT_REGISTER_NO_TEMPLATE_KERNEL(
reshape_with_xshape, CUDA, ANY, pten::ReshapeWithXShape, ALL_DTYPE) {} reshape_with_xshape, GPU, ANY, pten::ReshapeWithXShape, ALL_DTYPE) {}
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
// CUDA and HIP use same api // CUDA and HIP use same api
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/pten/backends/cuda/cuda_context.h" #include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/common/scalar_array.h" #include "paddle/pten/common/scalar_array.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/core/kernel_registry.h"
...@@ -25,25 +25,25 @@ ...@@ -25,25 +25,25 @@
namespace pten { namespace pten {
template <typename T> template <typename T>
void Flatten(const CUDAContext& dev_ctx, void Flatten(const GPUContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
int start_axis, int start_axis,
int stop_axis, int stop_axis,
DenseTensor* out); DenseTensor* out);
template <typename T> template <typename T>
void Cast(const CUDAContext& dev_ctx, void Cast(const GPUContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
DataType out_dtype, DataType out_dtype,
DataType in_dtype, DataType in_dtype,
DenseTensor* out); DenseTensor* out);
void Reshape(const CUDAContext& dev_ctx, void Reshape(const GPUContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const ScalarArray& shape, const ScalarArray& shape,
DenseTensor* out); DenseTensor* out);
void ReshapeWithXShape(const CUDAContext& dev_ctx, void ReshapeWithXShape(const GPUContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const ScalarArray& shape, const ScalarArray& shape,
DenseTensor* xshape, DenseTensor* xshape,
......
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/pten/kernels/cuda/math.h" #include "paddle/pten/kernels/gpu/math.h"
#include "paddle/fluid/operators/reduce_ops/reduce_functor_op.h" #include "paddle/fluid/operators/reduce_ops/reduce_functor_op.h"
#include "paddle/pten/kernels/hybird/cuda/elementwise/elementwise.h" #include "paddle/pten/kernels/hybird/cuda/elementwise/elementwise.h"
...@@ -58,12 +58,12 @@ struct DivideFunctor { ...@@ -58,12 +58,12 @@ struct DivideFunctor {
*/ */
template <typename T> template <typename T>
void Sign(const CUDAContext& dev_ctx, const DenseTensor& x, DenseTensor* out) { void Sign(const GPUContext& dev_ctx, const DenseTensor& x, DenseTensor* out) {
eigen::Sign<CUDAContext, T>(dev_ctx, x, out); eigen::Sign<GPUContext, T>(dev_ctx, x, out);
} }
template <typename T> template <typename T>
void Mean(const CUDAContext& dev_ctx, void Mean(const GPUContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const std::vector<int64_t>& dims, const std::vector<int64_t>& dims,
bool keep_dim, bool keep_dim,
...@@ -84,7 +84,7 @@ DEFINE_CUDA_ELEMENTWISE_OP(Multiply) ...@@ -84,7 +84,7 @@ DEFINE_CUDA_ELEMENTWISE_OP(Multiply)
DEFINE_CUDA_ELEMENTWISE_OP(Divide) DEFINE_CUDA_ELEMENTWISE_OP(Divide)
template <typename T> template <typename T>
void Sum(const CUDAContext& dev_ctx, void Sum(const GPUContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const std::vector<int64_t>& dims, const std::vector<int64_t>& dims,
bool keep_dim, bool keep_dim,
...@@ -101,11 +101,10 @@ using float16 = paddle::platform::float16; ...@@ -101,11 +101,10 @@ using float16 = paddle::platform::float16;
using complex64 = ::paddle::platform::complex<float>; using complex64 = ::paddle::platform::complex<float>;
using complex128 = ::paddle::platform::complex<double>; using complex128 = ::paddle::platform::complex<double>;
PT_REGISTER_KERNEL(sign, CUDA, ALL_LAYOUT, pten::Sign, float, double, float16) { PT_REGISTER_KERNEL(sign, GPU, ALL_LAYOUT, pten::Sign, float, double, float16) {}
} PT_REGISTER_KERNEL(mean, GPU, ALL_LAYOUT, pten::Mean, float, double, bool) {}
PT_REGISTER_KERNEL(mean, CUDA, ALL_LAYOUT, pten::Mean, float, double, bool) {}
PT_REGISTER_KERNEL(add, PT_REGISTER_KERNEL(add,
CUDA, GPU,
ALL_LAYOUT, ALL_LAYOUT,
pten::Add, pten::Add,
float, float,
...@@ -116,7 +115,7 @@ PT_REGISTER_KERNEL(add, ...@@ -116,7 +115,7 @@ PT_REGISTER_KERNEL(add,
complex64, complex64,
complex128) {} complex128) {}
PT_REGISTER_KERNEL(subtract, PT_REGISTER_KERNEL(subtract,
CUDA, GPU,
ALL_LAYOUT, ALL_LAYOUT,
pten::Subtract, pten::Subtract,
float, float,
...@@ -127,7 +126,7 @@ PT_REGISTER_KERNEL(subtract, ...@@ -127,7 +126,7 @@ PT_REGISTER_KERNEL(subtract,
complex64, complex64,
complex128) {} complex128) {}
PT_REGISTER_KERNEL(divide, PT_REGISTER_KERNEL(divide,
CUDA, GPU,
ALL_LAYOUT, ALL_LAYOUT,
pten::Divide, pten::Divide,
float, float,
...@@ -138,7 +137,7 @@ PT_REGISTER_KERNEL(divide, ...@@ -138,7 +137,7 @@ PT_REGISTER_KERNEL(divide,
complex64, complex64,
complex128) {} complex128) {}
PT_REGISTER_KERNEL(multiply, PT_REGISTER_KERNEL(multiply,
CUDA, GPU,
ALL_LAYOUT, ALL_LAYOUT,
pten::Multiply, pten::Multiply,
float, float,
...@@ -150,7 +149,7 @@ PT_REGISTER_KERNEL(multiply, ...@@ -150,7 +149,7 @@ PT_REGISTER_KERNEL(multiply,
complex64, complex64,
complex128) {} complex128) {}
PT_REGISTER_KERNEL(sum, PT_REGISTER_KERNEL(sum,
CUDA, GPU,
ALL_LAYOUT, ALL_LAYOUT,
pten::Sum, pten::Sum,
bool, bool,
......
...@@ -17,17 +17,17 @@ limitations under the License. */ ...@@ -17,17 +17,17 @@ limitations under the License. */
// CUDA and HIP use same api // CUDA and HIP use same api
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/pten/backends/cuda/cuda_context.h" #include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/common/scalar.h" #include "paddle/pten/common/scalar.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
namespace pten { namespace pten {
template <typename T> template <typename T>
void Sign(const CUDAContext& dev_ctx, const DenseTensor& x, DenseTensor* out); void Sign(const GPUContext& dev_ctx, const DenseTensor& x, DenseTensor* out);
template <typename T> template <typename T>
void Mean(const CUDAContext& dev_ctx, void Mean(const GPUContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const std::vector<int64_t>& dims, const std::vector<int64_t>& dims,
bool keep_dim, bool keep_dim,
...@@ -35,35 +35,35 @@ void Mean(const CUDAContext& dev_ctx, ...@@ -35,35 +35,35 @@ void Mean(const CUDAContext& dev_ctx,
DenseTensor* out); DenseTensor* out);
template <typename T> template <typename T>
void Add(const CUDAContext& dev_ctx, void Add(const GPUContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& y, const DenseTensor& y,
int axis, int axis,
DenseTensor* out); DenseTensor* out);
template <typename T> template <typename T>
void Subtract(const CUDAContext& dev_ctx, void Subtract(const GPUContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& y, const DenseTensor& y,
int axis, int axis,
DenseTensor* out); DenseTensor* out);
template <typename T> template <typename T>
void Divide(const CUDAContext& dev_ctx, void Divide(const GPUContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& y, const DenseTensor& y,
int axis, int axis,
DenseTensor* out); DenseTensor* out);
template <typename T> template <typename T>
void Multiply(const CUDAContext& dev_ctx, void Multiply(const GPUContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& y, const DenseTensor& y,
int axis, int axis,
DenseTensor* out); DenseTensor* out);
template <typename T> template <typename T>
void Sum(const CUDAContext& dev_ctx, void Sum(const GPUContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const std::vector<int64_t>& dims, const std::vector<int64_t>& dims,
bool keep_dim, bool keep_dim,
...@@ -75,7 +75,7 @@ void Sum(const CUDAContext& dev_ctx, ...@@ -75,7 +75,7 @@ void Sum(const CUDAContext& dev_ctx,
#define DEFINE_CUDA_ELEMENTWISE_OP(name) \ #define DEFINE_CUDA_ELEMENTWISE_OP(name) \
template <typename T> \ template <typename T> \
void name(const CUDAContext& dev_ctx, \ void name(const GPUContext& dev_ctx, \
const DenseTensor& x, \ const DenseTensor& x, \
const DenseTensor& y, \ const DenseTensor& y, \
int axis, \ int axis, \
......
...@@ -14,7 +14,7 @@ limitations under the License. */ ...@@ -14,7 +14,7 @@ limitations under the License. */
#include "paddle/pten/kernels/scale_kernel.h" #include "paddle/pten/kernels/scale_kernel.h"
#include "paddle/pten/backends/cuda/cuda_context.h" #include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/impl/scale_kernel_impl.h" #include "paddle/pten/kernels/impl/scale_kernel_impl.h"
...@@ -22,7 +22,7 @@ limitations under the License. */ ...@@ -22,7 +22,7 @@ limitations under the License. */
#include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/float16.h"
PT_REGISTER_CTX_KERNEL(scale, PT_REGISTER_CTX_KERNEL(scale,
CUDA, GPU,
ALL_LAYOUT, ALL_LAYOUT,
pten::Scale, pten::Scale,
float, float,
......
...@@ -16,11 +16,11 @@ limitations under the License. */ ...@@ -16,11 +16,11 @@ limitations under the License. */
#include "paddle/pten/common/data_type.h" #include "paddle/pten/common/data_type.h"
#include "paddle/pten/core/convert_utils.h" #include "paddle/pten/core/convert_utils.h"
#include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/cuda/utils.h" #include "paddle/pten/kernels/gpu/utils.h"
namespace pten { namespace pten {
void Copy(const CUDAContext& dev_ctx, void Copy(const GPUContext& dev_ctx,
const DenseTensor& src, const DenseTensor& src,
bool blocking, bool blocking,
DenseTensor* dst) { DenseTensor* dst) {
...@@ -234,4 +234,4 @@ void Copy(const CUDAContext& dev_ctx, ...@@ -234,4 +234,4 @@ void Copy(const CUDAContext& dev_ctx,
} }
} // namespace pten } // namespace pten
PT_REGISTER_NO_TEMPLATE_KERNEL(copy, CUDA, ALL_LAYOUT, pten::Copy, ALL_DTYPE) {} PT_REGISTER_NO_TEMPLATE_KERNEL(copy, GPU, ALL_LAYOUT, pten::Copy, ALL_DTYPE) {}
...@@ -17,13 +17,13 @@ limitations under the License. */ ...@@ -17,13 +17,13 @@ limitations under the License. */
// CUDA and HIP use same api // CUDA and HIP use same api
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/pten/backends/cuda/cuda_context.h" #include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/core/kernel_registry.h"
namespace pten { namespace pten {
void Copy(const CUDAContext& dev_ctx, void Copy(const GPUContext& dev_ctx,
const DenseTensor& src, const DenseTensor& src,
bool blocking, bool blocking,
DenseTensor* dst); DenseTensor* dst);
......
...@@ -4,7 +4,7 @@ add_subdirectory(general) ...@@ -4,7 +4,7 @@ add_subdirectory(general)
cc_library(pten_transpose_cpu SRCS transpose.cc DEPS dense_tensor pten_context) cc_library(pten_transpose_cpu SRCS transpose.cc DEPS dense_tensor pten_context)
if(WITH_GPU) if(WITH_GPU)
nv_library(pten_transpose_cuda SRCS transpose.cu DEPS dense_tensor malloc pten_context) nv_library(pten_transpose_gpu SRCS transpose.cu DEPS dense_tensor malloc pten_context)
elseif(WITH_ROCM) elseif(WITH_ROCM)
hip_library(pten_transpose_cuda SRCS transpose.cu DEPS dense_tensor malloc pten_context) hip_library(pten_transpose_gpu SRCS transpose.cu DEPS dense_tensor malloc pten_context)
endif() endif()
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#pragma once #pragma once
#include "paddle/fluid/platform/device/gpu/gpu_helper.h" #include "paddle/fluid/platform/device/gpu/gpu_helper.h"
#include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/float16.h"
#include "paddle/pten/backends/cuda/cuda_context.h" #include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
#include "paddle/fluid/platform/aligned_vector.h" #include "paddle/fluid/platform/aligned_vector.h"
...@@ -50,7 +50,7 @@ __global__ void CastCUDAKernel(const InT* in, const int64_t N, OutT* out) { ...@@ -50,7 +50,7 @@ __global__ void CastCUDAKernel(const InT* in, const int64_t N, OutT* out) {
} }
template <typename InT, typename OutT> template <typename InT, typename OutT>
void CastCUDAKernelImpl(const CUDAContext& dev_ctx, void CastCUDAKernelImpl(const GPUContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
DenseTensor* out) { DenseTensor* out) {
auto* in_data = x.data<InT>(); auto* in_data = x.data<InT>();
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
// CUDA and HIP use same api // CUDA and HIP use same api
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/pten/backends/cuda/cuda_context.h" #include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/common/scalar.h" #include "paddle/pten/common/scalar.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h" #include "paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h"
...@@ -49,7 +49,7 @@ static inline std::vector<int64_t> GetReduceDim( ...@@ -49,7 +49,7 @@ static inline std::vector<int64_t> GetReduceDim(
} }
template <typename T, template <typename, typename> class ReduceFunctor> template <typename T, template <typename, typename> class ReduceFunctor>
void Reduce(const CUDAContext& dev_ctx, void Reduce(const GPUContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
bool reduce_all, bool reduce_all,
const std::vector<int64_t>& dims, const std::vector<int64_t>& dims,
......
...@@ -42,7 +42,7 @@ namespace cub = hipcub; ...@@ -42,7 +42,7 @@ namespace cub = hipcub;
#include "paddle/fluid/operators/kernel_primitives/compute_primitives.h" #include "paddle/fluid/operators/kernel_primitives/compute_primitives.h"
#include "paddle/pten/api/ext/dispatch.h" #include "paddle/pten/api/ext/dispatch.h"
#include "paddle/pten/api/include/tensor.h" #include "paddle/pten/api/include/tensor.h"
#include "paddle/pten/kernels/cuda/utils.h" #include "paddle/pten/kernels/gpu/utils.h"
#include "paddle/pten/kernels/hybird/math/cast_func.h" #include "paddle/pten/kernels/hybird/math/cast_func.h"
// Reduce split or not, Whether to use ReduceHigherDim // Reduce split or not, Whether to use ReduceHigherDim
...@@ -820,7 +820,7 @@ void TensorReduceFunctorImpl(const pten::DenseTensor& x, ...@@ -820,7 +820,7 @@ void TensorReduceFunctorImpl(const pten::DenseTensor& x,
y->Resize(out_dims); y->Resize(out_dims);
} else { } else {
PD_VISIT_ALL_TYPES(y->dtype(), "CastKernelImpl", ([&] { PD_VISIT_ALL_TYPES(y->dtype(), "CastKernelImpl", ([&] {
pten::math::CastKernelImpl<CUDAContext, Tx, data_t>( pten::math::CastKernelImpl<GPUContext, Tx, data_t>(
*dev_ctx, x, y); *dev_ctx, x, y);
})); }));
} }
......
...@@ -132,11 +132,11 @@ class MidWiseTransformIterator<T, CPUContext> ...@@ -132,11 +132,11 @@ class MidWiseTransformIterator<T, CPUContext>
#if defined(__NVCC__) || defined(__HIPCC__) #if defined(__NVCC__) || defined(__HIPCC__)
template <typename T> template <typename T>
class RowwiseTransformIterator<T, CUDAContext> class RowwiseTransformIterator<T, GPUContext>
: public thrust::iterator_adaptor<RowwiseTransformIterator<T, CUDAContext>, : public thrust::iterator_adaptor<RowwiseTransformIterator<T, GPUContext>,
const T *> { const T *> {
public: public:
typedef thrust::iterator_adaptor<RowwiseTransformIterator<T, CUDAContext>, typedef thrust::iterator_adaptor<RowwiseTransformIterator<T, GPUContext>,
const T *> const T *>
super_t; super_t;
HOSTDEVICE RowwiseTransformIterator(const T *x, int n) HOSTDEVICE RowwiseTransformIterator(const T *x, int n)
...@@ -152,11 +152,11 @@ class RowwiseTransformIterator<T, CUDAContext> ...@@ -152,11 +152,11 @@ class RowwiseTransformIterator<T, CUDAContext>
}; };
template <typename T> template <typename T>
class MidWiseTransformIterator<T, CUDAContext> class MidWiseTransformIterator<T, GPUContext>
: public thrust::iterator_adaptor<MidWiseTransformIterator<T, CUDAContext>, : public thrust::iterator_adaptor<MidWiseTransformIterator<T, GPUContext>,
const T *> { const T *> {
public: public:
typedef thrust::iterator_adaptor<MidWiseTransformIterator<T, CUDAContext>, typedef thrust::iterator_adaptor<MidWiseTransformIterator<T, GPUContext>,
const T *> const T *>
super_t; super_t;
HOSTDEVICE MidWiseTransformIterator(const T *x, int n, int post) HOSTDEVICE MidWiseTransformIterator(const T *x, int n, int post)
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
#include "paddle/fluid/framework/ddim.h" #include "paddle/fluid/framework/ddim.h"
#include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/memory/memcpy.h"
#include "paddle/pten/backends/cuda/cuda_context.h" #include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/kernels/hybird/math/cast_func.h" #include "paddle/pten/kernels/hybird/math/cast_func.h"
#include "paddle/pten/kernels/hybird/transpose.h" #include "paddle/pten/kernels/hybird/transpose.h"
...@@ -52,9 +52,9 @@ __global__ void TransposeNormalKernel(const T* in_ptr, ...@@ -52,9 +52,9 @@ __global__ void TransposeNormalKernel(const T* in_ptr,
} }
template <typename T> template <typename T>
struct TransposeNormal<CUDAContext, T> { struct TransposeNormal<GPUContext, T> {
// for dims >= 7 situation // for dims >= 7 situation
void operator()(const CUDAContext& dev_ctx, void operator()(const GPUContext& dev_ctx,
const pten::DenseTensor& in, const pten::DenseTensor& in,
pten::DenseTensor* out, pten::DenseTensor* out,
const std::vector<int64_t>& axis) { const std::vector<int64_t>& axis) {
...@@ -106,7 +106,7 @@ struct TransposeNormal<CUDAContext, T> { ...@@ -106,7 +106,7 @@ struct TransposeNormal<CUDAContext, T> {
// define transpose normal // define transpose normal
#define DEFINE_GPU_TRANS_NORMAL(TYPE) \ #define DEFINE_GPU_TRANS_NORMAL(TYPE) \
template struct TransposeNormal<CUDAContext, TYPE> template struct TransposeNormal<GPUContext, TYPE>
DEFINE_GPU_TRANS_NORMAL(bool); DEFINE_GPU_TRANS_NORMAL(bool);
DEFINE_GPU_TRANS_NORMAL(int8_t); DEFINE_GPU_TRANS_NORMAL(int8_t);
......
...@@ -142,13 +142,13 @@ static void ScaleCPU(DataType kernel_dtype, ...@@ -142,13 +142,13 @@ static void ScaleCPU(DataType kernel_dtype,
} }
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
static void ScaleCUDA(DataType kernel_dtype, static void ScaleGPU(DataType kernel_dtype,
const pten::CUDAContext& dev_ctx, const pten::GPUContext& dev_ctx,
const pten::DenseTensor& x, const pten::DenseTensor& x,
const Scalar& scale, const Scalar& scale,
float bias, float bias,
bool bias_after_scale, bool bias_after_scale,
pten::DenseTensor* dense_out) { pten::DenseTensor* dense_out) {
switch (kernel_dtype) { switch (kernel_dtype) {
case pten::DataType::FLOAT64: { case pten::DataType::FLOAT64: {
pten::Scale<double>( pten::Scale<double>(
...@@ -255,14 +255,14 @@ Tensor scale_switch_case(const Tensor& x, ...@@ -255,14 +255,14 @@ Tensor scale_switch_case(const Tensor& x,
dense_out.get()); dense_out.get());
break; break;
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
case Backend::CUDA: case Backend::GPU:
ScaleCUDA(kernel_data_type, ScaleGPU(kernel_data_type,
static_cast<const pten::CUDAContext&>(*dev_ctx), static_cast<const pten::GPUContext&>(*dev_ctx),
*dense_x, *dense_x,
scale, scale,
bias, bias,
bias_after_scale, bias_after_scale,
dense_out.get()); dense_out.get());
break; break;
#endif #endif
default: default:
......
...@@ -20,7 +20,7 @@ limitations under the License. */ ...@@ -20,7 +20,7 @@ limitations under the License. */
#include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/api/lib/utils/allocator.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/cuda/utils.h" #include "paddle/pten/kernels/gpu/utils.h"
namespace paddle { namespace paddle {
namespace tests { namespace tests {
......
...@@ -110,7 +110,7 @@ TEST(PtenUtils, VarToPtTensor) { ...@@ -110,7 +110,7 @@ TEST(PtenUtils, VarToPtTensor) {
pten::Backend expect_backend = pten::Backend::CPU; pten::Backend expect_backend = pten::Backend::CPU;
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
expect_backend = pten::Backend::CUDA; expect_backend = pten::Backend::GPU;
#endif #endif
auto tensor_def = pten::TensorArgDef( auto tensor_def = pten::TensorArgDef(
expect_backend, pten::DataLayout::NCHW, pten::DataType::INT32); expect_backend, pten::DataLayout::NCHW, pten::DataType::INT32);
......
...@@ -64,7 +64,7 @@ TEST(API, copy_to) { ...@@ -64,7 +64,7 @@ TEST(API, copy_to) {
// 2. test API // 2. test API
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
auto tmp = paddle::experimental::copy_to(x, pten::Backend::CUDA, false); auto tmp = paddle::experimental::copy_to(x, pten::Backend::GPU, false);
auto out = paddle::experimental::copy_to(tmp, pten::Backend::CPU, true); auto out = paddle::experimental::copy_to(tmp, pten::Backend::CPU, true);
#else #else
auto out = paddle::experimental::copy_to(x, pten::Backend::CPU, false); auto out = paddle::experimental::copy_to(x, pten::Backend::CPU, false);
...@@ -80,7 +80,7 @@ TEST(Tensor, copy_to) { ...@@ -80,7 +80,7 @@ TEST(Tensor, copy_to) {
// 2. test API // 2. test API
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
auto tmp = x.copy_to(pten::Backend::CUDA, false); auto tmp = x.copy_to(pten::Backend::GPU, false);
auto out = tmp.copy_to(pten::Backend::CPU, true); auto out = tmp.copy_to(pten::Backend::CPU, true);
#else #else
auto out = x.copy_to(pten::Backend::CPU, false); auto out = x.copy_to(pten::Backend::CPU, false);
......
...@@ -29,8 +29,8 @@ TEST(Backend, OStream) { ...@@ -29,8 +29,8 @@ TEST(Backend, OStream) {
oss << pten::Backend::CPU; oss << pten::Backend::CPU;
EXPECT_EQ(oss.str(), "CPU"); EXPECT_EQ(oss.str(), "CPU");
oss.str(""); oss.str("");
oss << pten::Backend::CUDA; oss << pten::Backend::GPU;
EXPECT_EQ(oss.str(), "CUDA"); EXPECT_EQ(oss.str(), "GPU");
oss.str(""); oss.str("");
oss << pten::Backend::XPU; oss << pten::Backend::XPU;
EXPECT_EQ(oss.str(), "XPU"); EXPECT_EQ(oss.str(), "XPU");
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册