From ebe72b8833c3010cc60bc2d7879aa14b08e23ab7 Mon Sep 17 00:00:00 2001
From: Qi Li <qili93@qq.com>
Date: Fri, 8 Apr 2022 13:08:06 +0800
Subject: [PATCH] [Cherry-pick][ROCm] fix dcu error in device event base,
 test=develop (#41523)

Cherry-pick of #41521
---
 paddle/fluid/platform/device_event.h       |  2 +-
 paddle/fluid/platform/device_event_gpu.cc  |  2 +-
 paddle/fluid/platform/device_event_test.cc | 52 ++++++++++++++++++++++
 3 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/paddle/fluid/platform/device_event.h b/paddle/fluid/platform/device_event.h
index 57f45a40165..463329d32c9 100644
--- a/paddle/fluid/platform/device_event.h
+++ b/paddle/fluid/platform/device_event.h
@@ -29,7 +29,7 @@ using ::paddle::platform::kCPU;
 USE_EVENT(kCPU)
 USE_EVENT_WAIT(kCPU, kCPU)
 
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
 USE_EVENT(kCUDA);
 USE_EVENT_WAIT(kCUDA, kCUDA)
 USE_EVENT_WAIT(kCPU, kCUDA)
diff --git a/paddle/fluid/platform/device_event_gpu.cc b/paddle/fluid/platform/device_event_gpu.cc
index a811a5b9c13..f42ccc5a1db 100644
--- a/paddle/fluid/platform/device_event_gpu.cc
+++ b/paddle/fluid/platform/device_event_gpu.cc
@@ -15,7 +15,7 @@
 #include "paddle/fluid/platform/device_event_base.h"
 #include "paddle/fluid/platform/event.h"
 
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
 namespace paddle {
 namespace platform {
 struct CUDADeviceEventWrapper {
diff --git a/paddle/fluid/platform/device_event_test.cc b/paddle/fluid/platform/device_event_test.cc
index 96e89f9257d..d9f744b2625 100644
--- a/paddle/fluid/platform/device_event_test.cc
+++ b/paddle/fluid/platform/device_event_test.cc
@@ -75,6 +75,58 @@ TEST(DeviceEvent, CUDA) {
 }
 #endif
 
+#ifdef PADDLE_WITH_HIP
+#include <hip/hip_runtime.h>
+
+TEST(DeviceEvent, CUDA) {
+  VLOG(1) << "In Test";
+  using paddle::platform::CUDAPlace;
+
+  auto& pool = DeviceContextPool::Instance();
+  auto place = CUDAPlace(0);
+  auto* context =
+      static_cast<paddle::platform::CUDADeviceContext*>(pool.Get(place));
+
+  ASSERT_NE(context, nullptr);
+  // case 1. test for event_creator
+  DeviceEvent event(place);
+  ASSERT_NE(event.GetEvent().get(), nullptr);
+  bool status = event.Query();
+  ASSERT_EQ(status, true);
+  // case 2. test for event_recorder
+  event.Record(context);
+  status = event.Query();
+  ASSERT_EQ(status, false);
+  // case 3. test for event_finisher
+  event.Finish();
+  status = event.Query();
+  ASSERT_EQ(status, true);
+
+  // case 4. test for event_waiter
+  float *src_fp32, *dst_fp32;
+  int size = 1000000 * sizeof(float);
+  hipMallocHost(reinterpret_cast<void**>(&src_fp32), size);
+  hipMalloc(reinterpret_cast<void**>(&dst_fp32), size);
+  hipMemcpyAsync(dst_fp32, src_fp32, size, hipMemcpyHostToDevice,
+                 context->stream());
+  event.Record(context);  // step 1. record it
+  status = event.Query();
+  ASSERT_EQ(status, false);
+
+  event.Wait(kCUDA, context);  // step 2. add streamWaitEvent
+  status = event.Query();
+  ASSERT_EQ(status, false);  // async
+
+  event.Wait(kCPU, context);  // step 3. EventSynchornize
+  status = event.Query();
+  ASSERT_EQ(status, true);  // sync
+
+  // release resource
+  hipFree(dst_fp32);
+  hipFreeHost(src_fp32);
+}
+#endif
+
 TEST(DeviceEvent, CPU) {
   using paddle::platform::CPUPlace;
   auto place = CPUPlace();
-- 
GitLab