diff --git a/paddle/fluid/extension/include/ext_place.h b/paddle/fluid/extension/include/ext_place.h index 91d4f41c2135145062fb98b2df77b14ec4e1a32b..c9ed40a382417f6a85c4213f1d5d965797863c92 100644 --- a/paddle/fluid/extension/include/ext_place.h +++ b/paddle/fluid/extension/include/ext_place.h @@ -17,6 +17,6 @@ limitations under the License. */ namespace paddle { // TODO(yangjiabin): Add other place support in next PR -enum class PlaceType { kUNK = -1, kCPU, kGPU }; +enum class PlaceType { kUNK = -1, kCPU, kGPU, kHIP }; } // namespace paddle diff --git a/paddle/fluid/extension/include/ext_tensor.h b/paddle/fluid/extension/include/ext_tensor.h index fa91490e6cd8af7c3a4ff4b2b3013b519c59aa2a..d40503409fbdc1dafe5476c742521da7792f0211 100644 --- a/paddle/fluid/extension/include/ext_tensor.h +++ b/paddle/fluid/extension/include/ext_tensor.h @@ -116,9 +116,11 @@ class PD_DLL_DECL Tensor { /// \brief Check Tensor is initialized bool is_initialized() const; -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) /// \bref Get current stream of Tensor cudaStream_t stream() const; +#elif defined(PADDLE_WITH_HIP) + hipStream_t stream() const; #endif private: diff --git a/paddle/fluid/extension/src/ext_tensor.cc b/paddle/fluid/extension/src/ext_tensor.cc index ab98bdc0bfb47e07e5742ac1ee9cebe60f5c7a69..a9e286b4f9b231091c71cb06a2bc3996ab0fbbed 100644 --- a/paddle/fluid/extension/src/ext_tensor.cc +++ b/paddle/fluid/extension/src/ext_tensor.cc @@ -53,7 +53,7 @@ struct CastDataType { auto *context = static_cast(ctx_); trans(*context, in_begin, in_end, out_begin, CastDataTypeFunctor()); -#ifdef __NVCC__ +#if defined(__NVCC__) || defined(__HIPCC__) } else if (platform::is_gpu_place(in_.place())) { platform::Transform trans; auto *context = static_cast(ctx_); @@ -67,10 +67,11 @@ struct CastDataType { } } }; + template -void GpuCopy(T *src, T *dst, PlaceType src_plc, PlaceType dst_plc, - int64_t ele_size) { -#ifdef PADDLE_WITH_CUDA +void DeviceCopy(T *src, T *dst, PlaceType src_plc, PlaceType dst_plc, + int64_t ele_size) { +#if defined(PADDLE_WITH_CUDA) platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); int device_num = paddle::platform::GetCurrentDeviceId(); platform::CUDAPlace gpu_place(device_num); @@ -90,6 +91,30 @@ void GpuCopy(T *src, T *dst, PlaceType src_plc, PlaceType dst_plc, "Only GPU related Copy can reach this func.")); } cudaStreamSynchronize(dev_ctx->stream()); +#elif defined(PADDLE_WITH_HIP) + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + int device_num = paddle::platform::GetCurrentDeviceId(); + platform::CUDAPlace gpu_place(device_num); + auto *dev_ctx = + static_cast(pool.Get(gpu_place)); + if ((src_plc == PlaceType::kHIP) && (dst_plc == PlaceType::kCPU)) { + memory::Copy(platform::CPUPlace(), static_cast(dst), gpu_place, src, + ele_size, dev_ctx->stream()); + } else if ((src_plc == PlaceType::kHIP) && (dst_plc == PlaceType::kHIP)) { + memory::Copy(gpu_place, static_cast(dst), gpu_place, src, ele_size, + dev_ctx->stream()); + } else if ((src_plc == PlaceType::kCPU) && (dst_plc == PlaceType::kHIP)) { + memory::Copy(gpu_place, static_cast(dst), platform::CPUPlace(), src, + ele_size, dev_ctx->stream()); + } else { + PADDLE_THROW(platform::errors::Unavailable( + "Only GPU related Copy can reach this func.")); + } + hipStreamSynchronize(dev_ctx->stream()); +#else + PADDLE_THROW(platform::errors::Unavailable( + "This function can only be used if compiled with" + "either -DWITH_ROCM=ON or -DWITH_GPU=ON")); #endif } @@ -137,11 +162,16 @@ T *Tensor::mutable_data() { case static_cast(PlaceType::kCPU): { return tensor->mutable_data(platform::CPUPlace()); } -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) case static_cast(PlaceType::kGPU): { int device_num = platform::GetCurrentDeviceId(); return tensor->mutable_data(platform::CUDAPlace(device_num)); } +#elif defined(PADDLE_WITH_HIP) + case static_cast(PlaceType::kHIP): { + int device_num = platform::GetCurrentDeviceId(); + return tensor->mutable_data(platform::CUDAPlace(device_num)); + } #endif default: PADDLE_THROW(platform::errors::Unavailable( @@ -202,17 +232,23 @@ Tensor Tensor::copy_to(const PlaceType &target_place) const { target.reshape(shape()); auto *p_target_data = target.template mutable_data(); + bool supported_gpu_transform = false; +#if defined(PADDLE_WITH_CUDA) + supported_gpu_transform = + (src_place == PlaceType::kGPU && target_place == PlaceType::kCPU) || + (src_place == PlaceType::kCPU && target_place == PlaceType::kGPU) || + (src_place == PlaceType::kGPU && target_place == PlaceType::kGPU); +#elif defined(PADDLE_WITH_HIP) + supported_gpu_transform = + (src_place == PlaceType::kHIP && target_place == PlaceType::kCPU) || + (src_place == PlaceType::kCPU && target_place == PlaceType::kHIP) || + (src_place == PlaceType::kHIP && target_place == PlaceType::kHIP); +#endif + if ((src_place == PlaceType::kCPU) && (target_place == PlaceType::kCPU)) { std::memcpy(static_cast(p_target_data), p_src_data, ele_size); - } else if ((src_place == PlaceType::kGPU) && - (target_place == PlaceType::kCPU)) { - GpuCopy(p_src_data, p_target_data, src_place, target_place, ele_size); - } else if ((src_place == PlaceType::kCPU) && - (target_place == PlaceType::kGPU)) { - GpuCopy(p_src_data, p_target_data, src_place, target_place, ele_size); - } else if ((src_place == PlaceType::kGPU) && - (target_place == PlaceType::kGPU)) { - GpuCopy(p_src_data, p_target_data, src_place, target_place, ele_size); + } else if (supported_gpu_transform) { + DeviceCopy(p_src_data, p_target_data, src_place, target_place, ele_size); } else { PADDLE_THROW(platform::errors::Unavailable( "Not supported place transform of place: %d to place: %d", @@ -304,13 +340,18 @@ const PlaceType &Tensor::place() const { GET_CASTED_TENSOR; if (platform::is_cpu_place(tensor->place())) { place_ = PlaceType::kCPU; +#if defined(PADDLE_WITH_CUDA) } else if (platform::is_gpu_place(tensor->place())) { place_ = PlaceType::kGPU; +#elif defined(PADDLE_WITH_HIP) + } else if (platform::is_gpu_place(tensor->place())) { + place_ = PlaceType::kHIP; +#endif } else { PADDLE_THROW(platform::errors::Unimplemented( "Current Tensor hold unsupported Place Type, Please Init it" - "using Tensor::mutable_data(PaddlePlace) which T is" - "either Place::kCPU or Place::kGPU")); + "using Tensor::mutable_data(PaddlePlace) with T among:" + "Place::kCPU or Place::kGPU or Place::kHIP")); } return place_; } @@ -392,16 +433,21 @@ bool Tensor::is_initialized() const { } } -#ifdef PADDLE_WITH_CUDA -cudaStream_t Tensor::stream() const { - if (!stream_.IsStreamSet()) { - PADDLE_THROW(platform::errors::PreconditionNotMet( - "Stream is not Set, only input tensor will have " - "stream which is set by framework ")); - } else { - return reinterpret_cast(stream_.GetStream()); +#define DEFINE_STREAM(_stream_t_) \ + _stream_t_ Tensor::stream() const { \ + if (!stream_.IsStreamSet()) { \ + PADDLE_THROW(platform::errors::PreconditionNotMet( \ + "Stream is not Set, only input tensor will have " \ + "stream which is set by framework ")); \ + } else { \ + return reinterpret_cast<_stream_t_>(stream_.GetStream()); \ + } \ } -} + +#if defined(PADDLE_WITH_CUDA) +DEFINE_STREAM(cudaStream_t) +#elif defined(PADDLE_WITH_HIP) +DEFINE_STREAM(hipStream_t) #endif namespace framework { diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 284c438e395d38a0e8f98a564320adf92affe6d6..ed4d042062e368105d1e18d69419a001e94c4163 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -403,10 +403,20 @@ configure_file(commit.h.in commit.h) include_directories(${PADDLE_SOURCE_DIR}/paddle/fluid/platform) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../extension/include) -cc_library(custom_tensor SRCS ../extension/src/ext_tensor.cc DEPS lod_tensor memory enforce) +if(WITH_ROCM) + hip_library(custom_tensor SRCS ../extension/src/ext_tensor.cc DEPS lod_tensor memory enforce) +else() + cc_library(custom_tensor SRCS ../extension/src/ext_tensor.cc DEPS lod_tensor memory enforce) +endif() + cc_library(op_meta_info SRCS ../extension/src/ext_op_meta_info.cc DEPS custom_tensor) cc_library(custom_operator SRCS custom_operator.cc DEPS tensor attribute framework_proto op_registry operator dynamic_loader string_helper custom_tensor op_meta_info) -cc_test(custom_tensor_test SRCS custom_tensor_test.cc DEPS custom_tensor glog) + +if(WITH_ROCM) + hip_test(custom_tensor_test SRCS custom_tensor_test.cc DEPS custom_tensor glog) +else() + cc_test(custom_tensor_test SRCS custom_tensor_test.cc DEPS custom_tensor glog) +endif() set(FLUID_FRAMEWORK_MODULES proto_desc memory lod_tensor executor data_feed_proto layer dynamic_loader custom_operator) diff --git a/paddle/fluid/framework/custom_tensor_test.cc b/paddle/fluid/framework/custom_tensor_test.cc index 733831263a184f5060cca58c26866ac3350c155c..b2896c74c39a6e27822446029a7884164369bb4f 100644 --- a/paddle/fluid/framework/custom_tensor_test.cc +++ b/paddle/fluid/framework/custom_tensor_test.cc @@ -45,7 +45,19 @@ void TestCopyTensor() { auto t1_gpu_cp_cp = t1_gpu_cp.template copy_to(paddle::PlaceType::kGPU); CHECK((paddle::PlaceType::kGPU == t1_gpu_cp_cp.place())); auto t1_gpu_cp_cp_cpu = - t1_gpu_cp.template copy_to(paddle::PlaceType::kCPU); + t1_gpu_cp_cp.template copy_to(paddle::PlaceType::kCPU); + CHECK((paddle::PlaceType::kCPU == t1_gpu_cp_cp_cpu.place())); + for (int64_t i = 0; i < t1.size(); i++) { + CHECK_EQ(t1_gpu_cp_cp_cpu.template data()[i], T(5)); + } +#elif defined(PADDLE_WITH_HIP) + VLOG(2) << "Do HIP copy test"; + auto t1_gpu_cp = t1_cpu_cp.template copy_to(paddle::PlaceType::kHIP); + CHECK((paddle::PlaceType::kHIP == t1_gpu_cp.place())); + auto t1_gpu_cp_cp = t1_gpu_cp.template copy_to(paddle::PlaceType::kHIP); + CHECK((paddle::PlaceType::kHIP == t1_gpu_cp_cp.place())); + auto t1_gpu_cp_cp_cpu = + t1_gpu_cp_cp.template copy_to(paddle::PlaceType::kCPU); CHECK((paddle::PlaceType::kCPU == t1_gpu_cp_cp_cpu.place())); for (int64_t i = 0; i < t1.size(); i++) { CHECK_EQ(t1_gpu_cp_cp_cpu.template data()[i], T(5)); @@ -60,6 +72,11 @@ void TestAPIPlace() { t1.reshape(tensor_shape); t1.mutable_data(); CHECK((paddle::PlaceType::kGPU == t1.place())); +#elif defined(PADDLE_WITH_HIP) + auto t1 = paddle::Tensor(paddle::PlaceType::kHIP); + t1.reshape(tensor_shape); + t1.mutable_data(); + CHECK((paddle::PlaceType::kHIP == t1.place())); #endif auto t2 = paddle::Tensor(paddle::PlaceType::kCPU); t2.reshape(tensor_shape);