From ebe72b8833c3010cc60bc2d7879aa14b08e23ab7 Mon Sep 17 00:00:00 2001 From: Qi Li Date: Fri, 8 Apr 2022 13:08:06 +0800 Subject: [PATCH] [Cherry-pick][ROCm] fix dcu error in device event base, test=develop (#41523) Cherry-pick of #41521 --- paddle/fluid/platform/device_event.h | 2 +- paddle/fluid/platform/device_event_gpu.cc | 2 +- paddle/fluid/platform/device_event_test.cc | 52 ++++++++++++++++++++++ 3 files changed, 54 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/platform/device_event.h b/paddle/fluid/platform/device_event.h index 57f45a40165..463329d32c9 100644 --- a/paddle/fluid/platform/device_event.h +++ b/paddle/fluid/platform/device_event.h @@ -29,7 +29,7 @@ using ::paddle::platform::kCPU; USE_EVENT(kCPU) USE_EVENT_WAIT(kCPU, kCPU) -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) USE_EVENT(kCUDA); USE_EVENT_WAIT(kCUDA, kCUDA) USE_EVENT_WAIT(kCPU, kCUDA) diff --git a/paddle/fluid/platform/device_event_gpu.cc b/paddle/fluid/platform/device_event_gpu.cc index a811a5b9c13..f42ccc5a1db 100644 --- a/paddle/fluid/platform/device_event_gpu.cc +++ b/paddle/fluid/platform/device_event_gpu.cc @@ -15,7 +15,7 @@ #include "paddle/fluid/platform/device_event_base.h" #include "paddle/fluid/platform/event.h" -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) namespace paddle { namespace platform { struct CUDADeviceEventWrapper { diff --git a/paddle/fluid/platform/device_event_test.cc b/paddle/fluid/platform/device_event_test.cc index 96e89f9257d..d9f744b2625 100644 --- a/paddle/fluid/platform/device_event_test.cc +++ b/paddle/fluid/platform/device_event_test.cc @@ -75,6 +75,58 @@ TEST(DeviceEvent, CUDA) { } #endif +#ifdef PADDLE_WITH_HIP +#include + +TEST(DeviceEvent, CUDA) { + VLOG(1) << "In Test"; + using paddle::platform::CUDAPlace; + + auto& pool = DeviceContextPool::Instance(); + auto place = CUDAPlace(0); + auto* context = + static_cast(pool.Get(place)); + + ASSERT_NE(context, nullptr); + // case 1. test for event_creator + DeviceEvent event(place); + ASSERT_NE(event.GetEvent().get(), nullptr); + bool status = event.Query(); + ASSERT_EQ(status, true); + // case 2. test for event_recorder + event.Record(context); + status = event.Query(); + ASSERT_EQ(status, false); + // case 3. test for event_finisher + event.Finish(); + status = event.Query(); + ASSERT_EQ(status, true); + + // case 4. test for event_waiter + float *src_fp32, *dst_fp32; + int size = 1000000 * sizeof(float); + hipMallocHost(reinterpret_cast(&src_fp32), size); + hipMalloc(reinterpret_cast(&dst_fp32), size); + hipMemcpyAsync(dst_fp32, src_fp32, size, hipMemcpyHostToDevice, + context->stream()); + event.Record(context); // step 1. record it + status = event.Query(); + ASSERT_EQ(status, false); + + event.Wait(kCUDA, context); // step 2. add streamWaitEvent + status = event.Query(); + ASSERT_EQ(status, false); // async + + event.Wait(kCPU, context); // step 3. EventSynchornize + status = event.Query(); + ASSERT_EQ(status, true); // sync + + // release resource + hipFree(dst_fp32); + hipFreeHost(src_fp32); +} +#endif + TEST(DeviceEvent, CPU) { using paddle::platform::CPUPlace; auto place = CPUPlace(); -- GitLab