未验证 提交 dc7597e3 编写于 作者: C Chen Weihang 提交者: GitHub

[PTen] Rename cuda dir and context to gpu (#38296)

* rename cuda to gpu

* revert CMake change

* resolve conflit

* rename other cuda to gpu

* poish details
上级 aff43684
......@@ -21,7 +21,7 @@
#include "paddle/pten/api/lib/utils/tensor_utils.h"
#include "paddle/pten/include/core.h"
#include "paddle/pten/kernels/cpu/conj_kernel.h"
#include "paddle/pten/kernels/cuda/conj_kernel.h"
#include "paddle/pten/kernels/gpu/conj_kernel.h"
namespace paddle {
namespace operators {
......
......@@ -27,7 +27,7 @@ set(PTEN_DEPS convert_utils dense_tensor pten_context kernel_factory kernel_cont
set(PTEN_DEPS ${PTEN_DEPS} math_cpu linalg_cpu manipulation_cpu conj_kernel_cpu scale_kernel_cpu full_kernel_cpu)
set(PTEN_DEPS ${PTEN_DEPS} nary unary binary)
if(WITH_GPU OR WITH_ROCM)
set(PTEN_DEPS ${PTEN_DEPS} math_cuda linalg_cuda manipulation_cuda conj_kernel_cuda scale_kernel_cuda full_kernel_cuda)
set(PTEN_DEPS ${PTEN_DEPS} math_gpu linalg_gpu manipulation_gpu conj_kernel_gpu scale_kernel_gpu full_kernel_gpu)
endif()
if(WITH_XPU)
set(PTEN_DEPS ${PTEN_DEPS} manipulation_xpu)
......
......@@ -464,7 +464,7 @@ class PADDLE_API Tensor final {
* unified to Tensor, but Tensor itself is heterogeneous.
*
* Tensor can generally be represented by void* and size_t, place.
* This is suitable for most scenarios including CPU, CUDA, HIP, CPU, etc.,
* This is suitable for most scenarios including CPU, GPU, HIP, CPU, etc.,
* but there are a few cases where this definition cannot be described,
* such as the Tensor representation in third-party lib such as Metal,
* OpenCL, etc., as well as some special Tensor implementations, including
......
......@@ -56,7 +56,7 @@ Backend ConvertExtPlaceToBackend(PlaceType p) {
return Backend::CPU;
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
case PlaceType::kGPU:
return Backend::CUDA;
return Backend::GPU;
#endif
default:
PADDLE_THROW(
......
......@@ -28,12 +28,12 @@ PT_DECLARE_KERNEL(scale, CPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(conj, CPU, ALL_LAYOUT);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PT_DECLARE_KERNEL(full_like, CUDA, ALL_LAYOUT);
PT_DECLARE_KERNEL(dot, CUDA, ALL_LAYOUT);
PT_DECLARE_KERNEL(flatten, CUDA, ALL_LAYOUT);
PT_DECLARE_KERNEL(sign, CUDA, ALL_LAYOUT);
PT_DECLARE_KERNEL(scale, CUDA, ALL_LAYOUT);
PT_DECLARE_KERNEL(conj, CUDA, ALL_LAYOUT);
PT_DECLARE_KERNEL(full_like, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(dot, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(flatten, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(sign, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(scale, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(conj, GPU, ALL_LAYOUT);
#endif
#ifdef PADDLE_WITH_XPU
......
......@@ -28,7 +28,7 @@ limitations under the License. */
PT_DECLARE_KERNEL(copy, CPU, ALL_LAYOUT);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PT_DECLARE_KERNEL(copy, CUDA, ALL_LAYOUT);
PT_DECLARE_KERNEL(copy, GPU, ALL_LAYOUT);
#endif
#ifdef PADDLE_WITH_XPU
......
......@@ -21,7 +21,7 @@ limitations under the License. */
// path replacement after implementing pten DeviceContext
#include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/backends/cuda/cuda_context.h"
#include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/backends/npu/npu_context.h"
#include "paddle/pten/backends/xpu/xpu_context.h"
......
......@@ -20,7 +20,7 @@ limitations under the License. */
#include "paddle/fluid/platform/device_context.h"
namespace pten {
using CUDAContext = paddle::platform::CUDADeviceContext;
using GPUContext = paddle::platform::CUDADeviceContext;
} // namespace pten
#endif
......@@ -43,7 +43,7 @@ enum class Backend : uint8_t {
CPU,
// various acceleration devices' backends
CUDA,
GPU,
XPU, // XPU currently does not exist at the same time as CUDA
NPU, // NPU currently does not exist at the same time as CUDA
......@@ -99,8 +99,8 @@ inline std::ostream& operator<<(std::ostream& os, Backend backend) {
case Backend::CPU:
os << "CPU";
break;
case Backend::CUDA:
os << "CUDA";
case Backend::GPU:
os << "GPU";
break;
case Backend::XPU:
os << "XPU";
......
......@@ -23,7 +23,7 @@ Backend TransToPtenBackend(const paddle::platform::Place& place) {
if (paddle::platform::is_cpu_place(place)) {
return Backend::CPU;
} else if (paddle::platform::is_gpu_place(place)) {
return Backend::CUDA;
return Backend::GPU;
} else {
return Backend::UNDEFINED;
}
......@@ -84,7 +84,7 @@ paddle::platform::Place TransToFluidPlace(const Backend& backend) {
case pten::Backend::CPU:
return paddle::platform::CPUPlace();
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
case pten::Backend::CUDA:
case pten::Backend::GPU:
return paddle::platform::CUDAPlace(
paddle::platform::GetCurrentDeviceId());
#endif
......
......@@ -57,7 +57,7 @@ struct KernelArgsParseFunctor<Return_ (*)(Args_...)> {
if (arg_type == std::type_index(typeid(const CPUContext&))
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
||
arg_type == std::type_index(typeid(const CUDAContext&))) {
arg_type == std::type_index(typeid(const GPUContext&))) {
#else
) {
#endif
......
......@@ -181,7 +181,7 @@ struct KernelImpl<Return (*)(DevCtx, Args...), kernel_fn> {
PT_SPECIALIZE_KernelCallHelper_FOR_DEVICE_CONTEXT(CPUContext);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PT_SPECIALIZE_KernelCallHelper_FOR_DEVICE_CONTEXT(CUDAContext);
PT_SPECIALIZE_KernelCallHelper_FOR_DEVICE_CONTEXT(GPUContext);
#endif
#ifdef PADDLE_WITH_ASCEND_CL
PT_SPECIALIZE_KernelCallHelper_FOR_DEVICE_CONTEXT(NPUContext);
......
......@@ -18,7 +18,7 @@
#include "paddle/pten/api/lib/utils/storage.h"
#include "paddle/pten/include/infermeta.h"
#include "paddle/pten/kernels/cpu/linalg.h"
#include "paddle/pten/kernels/cuda/linalg.h"
#include "paddle/pten/kernels/gpu/linalg.h"
namespace pten {
......
......@@ -18,7 +18,7 @@
#include "paddle/pten/api/lib/utils/storage.h"
#include "paddle/pten/include/infermeta.h"
#include "paddle/pten/kernels/cpu/manipulation.h"
#include "paddle/pten/kernels/cuda/manipulation.h"
#include "paddle/pten/kernels/gpu/manipulation.h"
#include "paddle/pten/kernels/xpu/manipulation.h"
namespace pten {
......
......@@ -19,8 +19,8 @@ limitations under the License. */
#include "paddle/pten/include/infermeta.h"
#include "paddle/pten/kernels/cpu/conj_kernel.h"
#include "paddle/pten/kernels/cpu/math.h"
#include "paddle/pten/kernels/cuda/conj_kernel.h"
#include "paddle/pten/kernels/cuda/math.h"
#include "paddle/pten/kernels/gpu/conj_kernel.h"
#include "paddle/pten/kernels/gpu/math.h"
#include "paddle/pten/kernels/scale_kernel.h"
namespace pten {
......
......@@ -5,8 +5,7 @@ add_subdirectory(hybird)
add_subdirectory(cpu)
if(WITH_GPU OR WITH_ROCM)
# NOTE(chenweihang): if hip can split from cuda impl, we should add hip dir
add_subdirectory(cuda)
add_subdirectory(gpu)
endif()
if(WITH_MKLDNN)
# mkldnn will be deprecated and use the new name dnnl
......
if(WITH_GPU)
nv_library(math_cuda SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_cuda)
nv_library(linalg_cuda SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
nv_library(utils_cuda SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
nv_library(manipulation_cuda SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_cuda unary)
nv_library(scale_kernel_cuda SRCS scale_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function)
nv_library(full_kernel_cuda SRCS full_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function)
nv_library(conj_kernel_cuda SRCS conj_kernel.cu DEPS dense_tensor kernel_context kernel_factory)
elseif(WITH_ROCM)
hip_library(math_cuda SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_cuda)
hip_library(linalg_cuda SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
hip_library(utils_cuda SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
hip_library(manipulation_cuda SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_cuda unary)
hip_library(scale_kernel_cuda SRCS scale_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function)
hip_library(full_kernel_cuda SRCS full_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function)
hip_library(conj_kernel_cuda SRCS conj_kernel.cu DEPS dense_tensor kernel_context kernel_factory)
endif()
if(WITH_GPU)
nv_library(math_gpu SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_gpu)
nv_library(linalg_gpu SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
nv_library(utils_gpu SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
nv_library(manipulation_gpu SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_gpu unary)
nv_library(scale_kernel_gpu SRCS scale_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function)
nv_library(full_kernel_gpu SRCS full_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function)
nv_library(conj_kernel_gpu SRCS conj_kernel.cu DEPS dense_tensor kernel_context kernel_factory)
elseif(WITH_ROCM)
hip_library(math_gpu SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_gpu)
hip_library(linalg_gpu SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
hip_library(utils_gpu SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
hip_library(manipulation_gpu SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_gpu unary)
hip_library(scale_kernel_gpu SRCS scale_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function)
hip_library(full_kernel_gpu SRCS full_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function)
hip_library(conj_kernel_gpu SRCS conj_kernel.cu DEPS dense_tensor kernel_context kernel_factory)
endif()
......@@ -12,23 +12,23 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/pten/kernels/cuda/conj_kernel.h"
#include "paddle/pten/kernels/gpu/conj_kernel.h"
#include "paddle/pten/backends/cuda/cuda_context.h"
#include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/hybird/math/conj_impl.h"
namespace pten {
template <typename T>
void Conj(const CUDAContext& dev_ctx, const DenseTensor& x, DenseTensor* out) {
ConjImpl<T, CUDAContext>(dev_ctx, x, out);
void Conj(const GPUContext& dev_ctx, const DenseTensor& x, DenseTensor* out) {
ConjImpl<T, GPUContext>(dev_ctx, x, out);
}
} // namespace pten
PT_REGISTER_KERNEL(conj,
CUDA,
GPU,
ALL_LAYOUT,
pten::Conj,
paddle::platform::complex<float>,
......
......@@ -17,13 +17,13 @@ limitations under the License. */
// CUDA and HIP use same api
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/pten/backends/cuda/cuda_context.h"
#include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/core/dense_tensor.h"
namespace pten {
template <typename T>
void Conj(const CUDAContext& dev_ctx, const DenseTensor& x, DenseTensor* out);
void Conj(const GPUContext& dev_ctx, const DenseTensor& x, DenseTensor* out);
} // namespace pten
......
......@@ -14,12 +14,12 @@ limitations under the License. */
#include "paddle/pten/kernels/full_kernel.h"
#include "paddle/pten/backends/cuda/cuda_context.h"
#include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/impl/full_kernel_impl.h"
PT_REGISTER_CTX_KERNEL(full,
CUDA,
GPU,
ALL_LAYOUT,
pten::Full,
float,
......@@ -34,7 +34,7 @@ PT_REGISTER_CTX_KERNEL(full,
paddle::platform::complex<double>) {}
PT_REGISTER_CTX_KERNEL(full_like,
CUDA,
GPU,
ALL_LAYOUT,
pten::FullLike,
float,
......
......@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/pten/kernels/cuda/linalg.h"
#include "paddle/pten/kernels/gpu/linalg.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/hybird/eigen/dot.h"
......@@ -24,15 +24,15 @@
namespace pten {
template <typename T>
void Dot(const CUDAContext& dev_ctx,
void Dot(const GPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
eigen::Dot<CUDAContext, T>(dev_ctx, x, y, out);
eigen::Dot<GPUContext, T>(dev_ctx, x, y, out);
}
template <typename T>
void Matmul(const CUDAContext& dev_ctx,
void Matmul(const GPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
bool transpose_x,
......@@ -48,7 +48,7 @@ void Matmul(const CUDAContext& dev_ctx,
paddle::platform::errors::InvalidArgument(
"The Input(Y) dims size must not be equal 0,"
" but reviced dims size is 0. "));
math::MatMulFunction<CUDAContext, T>(
math::MatMulFunction<GPUContext, T>(
dev_ctx, x, y, out, transpose_x, transpose_y);
}
......@@ -59,7 +59,7 @@ using complex64 = ::paddle::platform::complex<float>;
using complex128 = ::paddle::platform::complex<double>;
PT_REGISTER_KERNEL(dot,
CUDA,
GPU,
ALL_LAYOUT,
pten::Dot,
float,
......@@ -70,7 +70,7 @@ PT_REGISTER_KERNEL(dot,
complex128) {}
PT_REGISTER_KERNEL(matmul,
CUDA,
GPU,
ALL_LAYOUT,
pten::Matmul,
float,
......
......@@ -17,19 +17,19 @@
// CUDA and HIP use same api
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/pten/backends/cuda/cuda_context.h"
#include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/core/dense_tensor.h"
namespace pten {
template <typename T>
void Dot(const CUDAContext& dev_ctx,
void Dot(const GPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out);
template <typename T>
void Matmul(const CUDAContext& dev_ctx,
void Matmul(const GPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
bool transpose_x,
......
......@@ -14,15 +14,15 @@
#include "paddle/pten/api/ext/dispatch.h"
#include "paddle/pten/infermeta/unary.h"
#include "paddle/pten/kernels/cuda/manipulation.h"
#include "paddle/pten/kernels/cuda/utils.h"
#include "paddle/pten/kernels/gpu/manipulation.h"
#include "paddle/pten/kernels/gpu/utils.h"
#include "paddle/pten/kernels/hybird/cuda/cast_kernel_impl.h"
#include "paddle/pten/kernels/hybird/general/manipulation.h"
namespace pten {
template <typename T>
void Flatten(const CUDAContext& dev_ctx,
void Flatten(const GPUContext& dev_ctx,
const DenseTensor& x,
int start_axis,
int stop_axis,
......@@ -36,7 +36,7 @@ void Flatten(const CUDAContext& dev_ctx,
// Output Tensor,
// is there a more flexible way to deal with this case?
template <typename T>
void FlattenWithXShape(const CUDAContext& dev_ctx,
void FlattenWithXShape(const GPUContext& dev_ctx,
const DenseTensor& x,
int start_axis,
int stop_axis,
......@@ -46,7 +46,7 @@ void FlattenWithXShape(const CUDAContext& dev_ctx,
general::SetXShape(x, xshape);
}
void Reshape(const CUDAContext& dev_ctx,
void Reshape(const GPUContext& dev_ctx,
const DenseTensor& x,
const ScalarArray& shape,
DenseTensor* out) {
......@@ -60,7 +60,7 @@ void Reshape(const CUDAContext& dev_ctx,
out->ResetLoD(x.lod());
}
void ReshapeWithXShape(const CUDAContext& dev_ctx,
void ReshapeWithXShape(const GPUContext& dev_ctx,
const DenseTensor& x,
const ScalarArray& shape,
DenseTensor* xshape,
......@@ -70,7 +70,7 @@ void ReshapeWithXShape(const CUDAContext& dev_ctx,
}
template <typename T>
void Cast(const CUDAContext& dev_ctx,
void Cast(const GPUContext& dev_ctx,
const DenseTensor& x,
DataType out_dtype,
DataType in_dtype,
......@@ -85,7 +85,7 @@ void Cast(const CUDAContext& dev_ctx,
using float16 = paddle::platform::float16;
PT_REGISTER_KERNEL(flatten,
CUDA,
GPU,
ALL_LAYOUT,
pten::Flatten,
float,
......@@ -96,7 +96,7 @@ PT_REGISTER_KERNEL(flatten,
int,
int64_t) {}
PT_REGISTER_KERNEL(flatten_with_xshape,
CUDA,
GPU,
ALL_LAYOUT,
pten::FlattenWithXShape,
float,
......@@ -108,7 +108,7 @@ PT_REGISTER_KERNEL(flatten_with_xshape,
#define PTEN_REGISTER_CAST_CUDA_BASE_TYPE(op_name, ...) \
PT_REGISTER_KERNEL(cast, \
CUDA, \
GPU, \
ALL_LAYOUT, \
pten::Cast, \
float, \
......@@ -132,6 +132,6 @@ PTEN_REGISTER_CAST_CUDA_BASE_TYPE(cast, paddle::platform::bfloat16)
PTEN_REGISTER_CAST_CUDA_BASE_TYPE(cast)
#endif
PT_REGISTER_NO_TEMPLATE_KERNEL(reshape, CUDA, ANY, pten::Reshape, ALL_DTYPE) {}
PT_REGISTER_NO_TEMPLATE_KERNEL(reshape, GPU, ANY, pten::Reshape, ALL_DTYPE) {}
PT_REGISTER_NO_TEMPLATE_KERNEL(
reshape_with_xshape, CUDA, ANY, pten::ReshapeWithXShape, ALL_DTYPE) {}
reshape_with_xshape, GPU, ANY, pten::ReshapeWithXShape, ALL_DTYPE) {}
......@@ -17,7 +17,7 @@
// CUDA and HIP use same api
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/pten/backends/cuda/cuda_context.h"
#include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/common/scalar_array.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h"
......@@ -25,25 +25,25 @@
namespace pten {
template <typename T>
void Flatten(const CUDAContext& dev_ctx,
void Flatten(const GPUContext& dev_ctx,
const DenseTensor& x,
int start_axis,
int stop_axis,
DenseTensor* out);
template <typename T>
void Cast(const CUDAContext& dev_ctx,
void Cast(const GPUContext& dev_ctx,
const DenseTensor& x,
DataType out_dtype,
DataType in_dtype,
DenseTensor* out);
void Reshape(const CUDAContext& dev_ctx,
void Reshape(const GPUContext& dev_ctx,
const DenseTensor& x,
const ScalarArray& shape,
DenseTensor* out);
void ReshapeWithXShape(const CUDAContext& dev_ctx,
void ReshapeWithXShape(const GPUContext& dev_ctx,
const DenseTensor& x,
const ScalarArray& shape,
DenseTensor* xshape,
......
......@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/kernels/cuda/math.h"
#include "paddle/pten/kernels/gpu/math.h"
#include "paddle/fluid/operators/reduce_ops/reduce_functor_op.h"
#include "paddle/pten/kernels/hybird/cuda/elementwise/elementwise.h"
......@@ -58,12 +58,12 @@ struct DivideFunctor {
*/
template <typename T>
void Sign(const CUDAContext& dev_ctx, const DenseTensor& x, DenseTensor* out) {
eigen::Sign<CUDAContext, T>(dev_ctx, x, out);
void Sign(const GPUContext& dev_ctx, const DenseTensor& x, DenseTensor* out) {
eigen::Sign<GPUContext, T>(dev_ctx, x, out);
}
template <typename T>
void Mean(const CUDAContext& dev_ctx,
void Mean(const GPUContext& dev_ctx,
const DenseTensor& x,
const std::vector<int64_t>& dims,
bool keep_dim,
......@@ -84,7 +84,7 @@ DEFINE_CUDA_ELEMENTWISE_OP(Multiply)
DEFINE_CUDA_ELEMENTWISE_OP(Divide)
template <typename T>
void Sum(const CUDAContext& dev_ctx,
void Sum(const GPUContext& dev_ctx,
const DenseTensor& x,
const std::vector<int64_t>& dims,
bool keep_dim,
......@@ -101,11 +101,10 @@ using float16 = paddle::platform::float16;
using complex64 = ::paddle::platform::complex<float>;
using complex128 = ::paddle::platform::complex<double>;
PT_REGISTER_KERNEL(sign, CUDA, ALL_LAYOUT, pten::Sign, float, double, float16) {
}
PT_REGISTER_KERNEL(mean, CUDA, ALL_LAYOUT, pten::Mean, float, double, bool) {}
PT_REGISTER_KERNEL(sign, GPU, ALL_LAYOUT, pten::Sign, float, double, float16) {}
PT_REGISTER_KERNEL(mean, GPU, ALL_LAYOUT, pten::Mean, float, double, bool) {}
PT_REGISTER_KERNEL(add,
CUDA,
GPU,
ALL_LAYOUT,
pten::Add,
float,
......@@ -116,7 +115,7 @@ PT_REGISTER_KERNEL(add,
complex64,
complex128) {}
PT_REGISTER_KERNEL(subtract,
CUDA,
GPU,
ALL_LAYOUT,
pten::Subtract,
float,
......@@ -127,7 +126,7 @@ PT_REGISTER_KERNEL(subtract,
complex64,
complex128) {}
PT_REGISTER_KERNEL(divide,
CUDA,
GPU,
ALL_LAYOUT,
pten::Divide,
float,
......@@ -138,7 +137,7 @@ PT_REGISTER_KERNEL(divide,
complex64,
complex128) {}
PT_REGISTER_KERNEL(multiply,
CUDA,
GPU,
ALL_LAYOUT,
pten::Multiply,
float,
......@@ -150,7 +149,7 @@ PT_REGISTER_KERNEL(multiply,
complex64,
complex128) {}
PT_REGISTER_KERNEL(sum,
CUDA,
GPU,
ALL_LAYOUT,
pten::Sum,
bool,
......
......@@ -17,17 +17,17 @@ limitations under the License. */
// CUDA and HIP use same api
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/pten/backends/cuda/cuda_context.h"
#include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/common/scalar.h"
#include "paddle/pten/core/dense_tensor.h"
namespace pten {
template <typename T>
void Sign(const CUDAContext& dev_ctx, const DenseTensor& x, DenseTensor* out);
void Sign(const GPUContext& dev_ctx, const DenseTensor& x, DenseTensor* out);
template <typename T>
void Mean(const CUDAContext& dev_ctx,
void Mean(const GPUContext& dev_ctx,
const DenseTensor& x,
const std::vector<int64_t>& dims,
bool keep_dim,
......@@ -35,35 +35,35 @@ void Mean(const CUDAContext& dev_ctx,
DenseTensor* out);
template <typename T>
void Add(const CUDAContext& dev_ctx,
void Add(const GPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T>
void Subtract(const CUDAContext& dev_ctx,
void Subtract(const GPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T>
void Divide(const CUDAContext& dev_ctx,
void Divide(const GPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T>
void Multiply(const CUDAContext& dev_ctx,
void Multiply(const GPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T>
void Sum(const CUDAContext& dev_ctx,
void Sum(const GPUContext& dev_ctx,
const DenseTensor& x,
const std::vector<int64_t>& dims,
bool keep_dim,
......@@ -75,7 +75,7 @@ void Sum(const CUDAContext& dev_ctx,
#define DEFINE_CUDA_ELEMENTWISE_OP(name) \
template <typename T> \
void name(const CUDAContext& dev_ctx, \
void name(const GPUContext& dev_ctx, \
const DenseTensor& x, \
const DenseTensor& y, \
int axis, \
......
......@@ -14,7 +14,7 @@ limitations under the License. */
#include "paddle/pten/kernels/scale_kernel.h"
#include "paddle/pten/backends/cuda/cuda_context.h"
#include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/impl/scale_kernel_impl.h"
......@@ -22,7 +22,7 @@ limitations under the License. */
#include "paddle/fluid/platform/float16.h"
PT_REGISTER_CTX_KERNEL(scale,
CUDA,
GPU,
ALL_LAYOUT,
pten::Scale,
float,
......
......@@ -16,11 +16,11 @@ limitations under the License. */
#include "paddle/pten/common/data_type.h"
#include "paddle/pten/core/convert_utils.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/cuda/utils.h"
#include "paddle/pten/kernels/gpu/utils.h"
namespace pten {
void Copy(const CUDAContext& dev_ctx,
void Copy(const GPUContext& dev_ctx,
const DenseTensor& src,
bool blocking,
DenseTensor* dst) {
......@@ -234,4 +234,4 @@ void Copy(const CUDAContext& dev_ctx,
}
} // namespace pten
PT_REGISTER_NO_TEMPLATE_KERNEL(copy, CUDA, ALL_LAYOUT, pten::Copy, ALL_DTYPE) {}
PT_REGISTER_NO_TEMPLATE_KERNEL(copy, GPU, ALL_LAYOUT, pten::Copy, ALL_DTYPE) {}
......@@ -17,13 +17,13 @@ limitations under the License. */
// CUDA and HIP use same api
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/pten/backends/cuda/cuda_context.h"
#include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h"
namespace pten {
void Copy(const CUDAContext& dev_ctx,
void Copy(const GPUContext& dev_ctx,
const DenseTensor& src,
bool blocking,
DenseTensor* dst);
......
......@@ -4,7 +4,7 @@ add_subdirectory(general)
cc_library(pten_transpose_cpu SRCS transpose.cc DEPS dense_tensor pten_context)
if(WITH_GPU)
nv_library(pten_transpose_cuda SRCS transpose.cu DEPS dense_tensor malloc pten_context)
nv_library(pten_transpose_gpu SRCS transpose.cu DEPS dense_tensor malloc pten_context)
elseif(WITH_ROCM)
hip_library(pten_transpose_cuda SRCS transpose.cu DEPS dense_tensor malloc pten_context)
hip_library(pten_transpose_gpu SRCS transpose.cu DEPS dense_tensor malloc pten_context)
endif()
......@@ -15,7 +15,7 @@
#pragma once
#include "paddle/fluid/platform/device/gpu/gpu_helper.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/pten/backends/cuda/cuda_context.h"
#include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/fluid/platform/aligned_vector.h"
......@@ -50,7 +50,7 @@ __global__ void CastCUDAKernel(const InT* in, const int64_t N, OutT* out) {
}
template <typename InT, typename OutT>
void CastCUDAKernelImpl(const CUDAContext& dev_ctx,
void CastCUDAKernelImpl(const GPUContext& dev_ctx,
const DenseTensor& x,
DenseTensor* out) {
auto* in_data = x.data<InT>();
......
......@@ -17,7 +17,7 @@
// CUDA and HIP use same api
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/pten/backends/cuda/cuda_context.h"
#include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/common/scalar.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h"
......@@ -49,7 +49,7 @@ static inline std::vector<int64_t> GetReduceDim(
}
template <typename T, template <typename, typename> class ReduceFunctor>
void Reduce(const CUDAContext& dev_ctx,
void Reduce(const GPUContext& dev_ctx,
const DenseTensor& x,
bool reduce_all,
const std::vector<int64_t>& dims,
......
......@@ -42,7 +42,7 @@ namespace cub = hipcub;
#include "paddle/fluid/operators/kernel_primitives/compute_primitives.h"
#include "paddle/pten/api/ext/dispatch.h"
#include "paddle/pten/api/include/tensor.h"
#include "paddle/pten/kernels/cuda/utils.h"
#include "paddle/pten/kernels/gpu/utils.h"
#include "paddle/pten/kernels/hybird/math/cast_func.h"
// Reduce split or not, Whether to use ReduceHigherDim
......@@ -820,7 +820,7 @@ void TensorReduceFunctorImpl(const pten::DenseTensor& x,
y->Resize(out_dims);
} else {
PD_VISIT_ALL_TYPES(y->dtype(), "CastKernelImpl", ([&] {
pten::math::CastKernelImpl<CUDAContext, Tx, data_t>(
pten::math::CastKernelImpl<GPUContext, Tx, data_t>(
*dev_ctx, x, y);
}));
}
......
......@@ -132,11 +132,11 @@ class MidWiseTransformIterator<T, CPUContext>
#if defined(__NVCC__) || defined(__HIPCC__)
template <typename T>
class RowwiseTransformIterator<T, CUDAContext>
: public thrust::iterator_adaptor<RowwiseTransformIterator<T, CUDAContext>,
class RowwiseTransformIterator<T, GPUContext>
: public thrust::iterator_adaptor<RowwiseTransformIterator<T, GPUContext>,
const T *> {
public:
typedef thrust::iterator_adaptor<RowwiseTransformIterator<T, CUDAContext>,
typedef thrust::iterator_adaptor<RowwiseTransformIterator<T, GPUContext>,
const T *>
super_t;
HOSTDEVICE RowwiseTransformIterator(const T *x, int n)
......@@ -152,11 +152,11 @@ class RowwiseTransformIterator<T, CUDAContext>
};
template <typename T>
class MidWiseTransformIterator<T, CUDAContext>
: public thrust::iterator_adaptor<MidWiseTransformIterator<T, CUDAContext>,
class MidWiseTransformIterator<T, GPUContext>
: public thrust::iterator_adaptor<MidWiseTransformIterator<T, GPUContext>,
const T *> {
public:
typedef thrust::iterator_adaptor<MidWiseTransformIterator<T, CUDAContext>,
typedef thrust::iterator_adaptor<MidWiseTransformIterator<T, GPUContext>,
const T *>
super_t;
HOSTDEVICE MidWiseTransformIterator(const T *x, int n, int post)
......
......@@ -14,7 +14,7 @@
#include "paddle/fluid/framework/ddim.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/pten/backends/cuda/cuda_context.h"
#include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/kernels/hybird/math/cast_func.h"
#include "paddle/pten/kernels/hybird/transpose.h"
......@@ -52,9 +52,9 @@ __global__ void TransposeNormalKernel(const T* in_ptr,
}
template <typename T>
struct TransposeNormal<CUDAContext, T> {
struct TransposeNormal<GPUContext, T> {
// for dims >= 7 situation
void operator()(const CUDAContext& dev_ctx,
void operator()(const GPUContext& dev_ctx,
const pten::DenseTensor& in,
pten::DenseTensor* out,
const std::vector<int64_t>& axis) {
......@@ -106,7 +106,7 @@ struct TransposeNormal<CUDAContext, T> {
// define transpose normal
#define DEFINE_GPU_TRANS_NORMAL(TYPE) \
template struct TransposeNormal<CUDAContext, TYPE>
template struct TransposeNormal<GPUContext, TYPE>
DEFINE_GPU_TRANS_NORMAL(bool);
DEFINE_GPU_TRANS_NORMAL(int8_t);
......
......@@ -142,13 +142,13 @@ static void ScaleCPU(DataType kernel_dtype,
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
static void ScaleCUDA(DataType kernel_dtype,
const pten::CUDAContext& dev_ctx,
const pten::DenseTensor& x,
const Scalar& scale,
float bias,
bool bias_after_scale,
pten::DenseTensor* dense_out) {
static void ScaleGPU(DataType kernel_dtype,
const pten::GPUContext& dev_ctx,
const pten::DenseTensor& x,
const Scalar& scale,
float bias,
bool bias_after_scale,
pten::DenseTensor* dense_out) {
switch (kernel_dtype) {
case pten::DataType::FLOAT64: {
pten::Scale<double>(
......@@ -255,14 +255,14 @@ Tensor scale_switch_case(const Tensor& x,
dense_out.get());
break;
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
case Backend::CUDA:
ScaleCUDA(kernel_data_type,
static_cast<const pten::CUDAContext&>(*dev_ctx),
*dense_x,
scale,
bias,
bias_after_scale,
dense_out.get());
case Backend::GPU:
ScaleGPU(kernel_data_type,
static_cast<const pten::GPUContext&>(*dev_ctx),
*dense_x,
scale,
bias,
bias_after_scale,
dense_out.get());
break;
#endif
default:
......
......@@ -20,7 +20,7 @@ limitations under the License. */
#include "paddle/pten/api/lib/utils/allocator.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/cuda/utils.h"
#include "paddle/pten/kernels/gpu/utils.h"
namespace paddle {
namespace tests {
......
......@@ -110,7 +110,7 @@ TEST(PtenUtils, VarToPtTensor) {
pten::Backend expect_backend = pten::Backend::CPU;
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
expect_backend = pten::Backend::CUDA;
expect_backend = pten::Backend::GPU;
#endif
auto tensor_def = pten::TensorArgDef(
expect_backend, pten::DataLayout::NCHW, pten::DataType::INT32);
......
......@@ -64,7 +64,7 @@ TEST(API, copy_to) {
// 2. test API
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
auto tmp = paddle::experimental::copy_to(x, pten::Backend::CUDA, false);
auto tmp = paddle::experimental::copy_to(x, pten::Backend::GPU, false);
auto out = paddle::experimental::copy_to(tmp, pten::Backend::CPU, true);
#else
auto out = paddle::experimental::copy_to(x, pten::Backend::CPU, false);
......@@ -80,7 +80,7 @@ TEST(Tensor, copy_to) {
// 2. test API
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
auto tmp = x.copy_to(pten::Backend::CUDA, false);
auto tmp = x.copy_to(pten::Backend::GPU, false);
auto out = tmp.copy_to(pten::Backend::CPU, true);
#else
auto out = x.copy_to(pten::Backend::CPU, false);
......
......@@ -29,8 +29,8 @@ TEST(Backend, OStream) {
oss << pten::Backend::CPU;
EXPECT_EQ(oss.str(), "CPU");
oss.str("");
oss << pten::Backend::CUDA;
EXPECT_EQ(oss.str(), "CUDA");
oss << pten::Backend::GPU;
EXPECT_EQ(oss.str(), "GPU");
oss.str("");
oss << pten::Backend::XPU;
EXPECT_EQ(oss.str(), "XPU");
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册