diff --git a/paddle/fluid/platform/device_event.h b/paddle/fluid/platform/device_event.h index 57f45a40165d7a031d2b807dd436100a6c46718b..463329d32c936153ecce30691bb08c374a58f1dd 100644 --- a/paddle/fluid/platform/device_event.h +++ b/paddle/fluid/platform/device_event.h @@ -29,7 +29,7 @@ using ::paddle::platform::kCPU; USE_EVENT(kCPU) USE_EVENT_WAIT(kCPU, kCPU) -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) USE_EVENT(kCUDA); USE_EVENT_WAIT(kCUDA, kCUDA) USE_EVENT_WAIT(kCPU, kCUDA) diff --git a/paddle/fluid/platform/device_event_gpu.cc b/paddle/fluid/platform/device_event_gpu.cc index a811a5b9c130dcf6a732c8557b9867dcaa1d0fe8..f42ccc5a1db54efd70660beecd98bbda1a11dc41 100644 --- a/paddle/fluid/platform/device_event_gpu.cc +++ b/paddle/fluid/platform/device_event_gpu.cc @@ -15,7 +15,7 @@ #include "paddle/fluid/platform/device_event_base.h" #include "paddle/fluid/platform/event.h" -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) namespace paddle { namespace platform { struct CUDADeviceEventWrapper { diff --git a/paddle/fluid/platform/device_event_test.cc b/paddle/fluid/platform/device_event_test.cc index 96e89f9257dd23039f822723c28682e42e4d86c8..d9f744b26256b1f00bd256319a5ab606fe7a0b4c 100644 --- a/paddle/fluid/platform/device_event_test.cc +++ b/paddle/fluid/platform/device_event_test.cc @@ -75,6 +75,58 @@ TEST(DeviceEvent, CUDA) { } #endif +#ifdef PADDLE_WITH_HIP +#include + +TEST(DeviceEvent, CUDA) { + VLOG(1) << "In Test"; + using paddle::platform::CUDAPlace; + + auto& pool = DeviceContextPool::Instance(); + auto place = CUDAPlace(0); + auto* context = + static_cast(pool.Get(place)); + + ASSERT_NE(context, nullptr); + // case 1. test for event_creator + DeviceEvent event(place); + ASSERT_NE(event.GetEvent().get(), nullptr); + bool status = event.Query(); + ASSERT_EQ(status, true); + // case 2. test for event_recorder + event.Record(context); + status = event.Query(); + ASSERT_EQ(status, false); + // case 3. test for event_finisher + event.Finish(); + status = event.Query(); + ASSERT_EQ(status, true); + + // case 4. test for event_waiter + float *src_fp32, *dst_fp32; + int size = 1000000 * sizeof(float); + hipMallocHost(reinterpret_cast(&src_fp32), size); + hipMalloc(reinterpret_cast(&dst_fp32), size); + hipMemcpyAsync(dst_fp32, src_fp32, size, hipMemcpyHostToDevice, + context->stream()); + event.Record(context); // step 1. record it + status = event.Query(); + ASSERT_EQ(status, false); + + event.Wait(kCUDA, context); // step 2. add streamWaitEvent + status = event.Query(); + ASSERT_EQ(status, false); // async + + event.Wait(kCPU, context); // step 3. EventSynchornize + status = event.Query(); + ASSERT_EQ(status, true); // sync + + // release resource + hipFree(dst_fp32); + hipFreeHost(src_fp32); +} +#endif + TEST(DeviceEvent, CPU) { using paddle::platform::CPUPlace; auto place = CPUPlace();