diff --git a/paddle/fluid/framework/new_executor/interpretercore.cc b/paddle/fluid/framework/new_executor/interpretercore.cc index 0f2ad0ff3306191275b1a2e416f3fa20f793787e..c86a16cef08828f4fe10c38ec884b9d31dccfff0 100644 --- a/paddle/fluid/framework/new_executor/interpretercore.cc +++ b/paddle/fluid/framework/new_executor/interpretercore.cc @@ -77,7 +77,7 @@ void AssociateInputWithEvents( for (auto var_id : new_event_var_id) { if (var_id2event->count(var_id) == 0) { auto device_event = std::make_shared( - place, platform::get_cuda_flags(false, false, false)); + place, platform::GenerateDeviceEventFlag()); var_id2event->emplace(var_id, std::move(device_event)); } // Add events for next_instr.inputs diff --git a/paddle/fluid/platform/device_event_base.cc b/paddle/fluid/platform/device_event_base.cc index 288052edccc4dcb6ab82d307ed542ac233e1c397..0cd1cff556b3a484982980ef60d9dd2006bca107 100644 --- a/paddle/fluid/platform/device_event_base.cc +++ b/paddle/fluid/platform/device_event_base.cc @@ -14,6 +14,7 @@ #include "paddle/fluid/platform/device_event_base.h" #include "paddle/fluid/platform/device_event_cpu.h" +#include "paddle/fluid/platform/event.h" namespace paddle { namespace platform { @@ -25,6 +26,31 @@ EventFinishFunction DeviceEvent::event_finisher_[MaxDeviceTypes]; EventFinishFunction DeviceEvent::event_finished_setter_[MaxDeviceTypes]; EventWaitFunction DeviceEvent::event_waiter_[MaxDeviceTypes][MaxDeviceTypes]; +/* + * Generate flag used to create event on all sorts of equipment. + * NOTE: Support CPU/CUDA/ROCM currently. + */ +unsigned int GenerateDeviceEventFlag(bool enable_timing, bool blocking, + bool interprocess) { +#ifdef PADDLE_WITH_CUDA + unsigned int flags = + (blocking ? cudaEventBlockingSync : cudaEventDefault) | + (enable_timing ? cudaEventDefault : cudaEventDisableTiming) | + (interprocess ? cudaEventInterprocess : cudaEventDefault); + return flags; +#endif + +#ifdef PADDLE_WITH_HIP + unsigned int flags = + (blocking ? hipEventBlockingSync : hipEventDefault) | + (enable_timing ? hipEventDefault : hipEventDisableTiming) | + (interprocess ? hipEventInterprocess : hipEventDefault); + return flags; +#endif + + return 0; +} + void DeviceEventCreateCPU(DeviceEvent* event, const platform::Place& place, unsigned int flag) { event->InitEvent(std::make_shared(place, flag)); diff --git a/paddle/fluid/platform/device_event_base.h b/paddle/fluid/platform/device_event_base.h index d713a638af8bc100fdc496ca8267cb35f7b359bb..e2e1fdf29d51aafdcfa25de72e6a3c532befe5d9 100644 --- a/paddle/fluid/platform/device_event_base.h +++ b/paddle/fluid/platform/device_event_base.h @@ -39,6 +39,10 @@ inline int DeviceTypeToId(const DeviceType& device_type) { return static_cast(device_type); } +unsigned int GenerateDeviceEventFlag(bool enable_timing = false, + bool blocking = false, + bool interprocess = false); + enum EventStatus { INITIALIZED = 0, SCHEDULED = 1, diff --git a/paddle/fluid/platform/event.h b/paddle/fluid/platform/event.h index a79ab22743d16680d2876c64b3568e5086e2f944..2b11de48a1ec70528f7aeddbe530869f10a779b9 100644 --- a/paddle/fluid/platform/event.h +++ b/paddle/fluid/platform/event.h @@ -195,30 +195,5 @@ class CudaEvent { #endif }; -static unsigned int get_cuda_flags(bool enable_timing, bool blocking, - bool interprocess) { -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - -#ifdef PADDLE_WITH_HIP - unsigned int flags = - (blocking ? hipEventBlockingSync : hipEventDefault) | - (enable_timing ? hipEventDefault : hipEventDisableTiming) | - (interprocess ? hipEventInterprocess : hipEventDefault); - return flags; -#else - unsigned int flags = - (blocking ? cudaEventBlockingSync : cudaEventDefault) | - (enable_timing ? cudaEventDefault : cudaEventDisableTiming) | - (interprocess ? cudaEventInterprocess : cudaEventDefault); - return flags; -#endif - -#else - PADDLE_THROW(platform::errors::Unavailable( - "Paddle is not compiled with CUDA. Cannot get the cuda event flags.")); - return 0; -#endif -} - } // namespace platform } // namespace paddle diff --git a/paddle/fluid/pybind/cuda_streams_py.cc b/paddle/fluid/pybind/cuda_streams_py.cc index 5ea0a2553f7516f062a7d0577aaa319014ecd4fb..50c6d0e983939f4c708cbd1f8072c47214ca8022 100644 --- a/paddle/fluid/pybind/cuda_streams_py.cc +++ b/paddle/fluid/pybind/cuda_streams_py.cc @@ -15,6 +15,7 @@ #include #include +#include "paddle/fluid/platform/device_event_base.h" #include "paddle/fluid/platform/event.h" #include "paddle/fluid/platform/stream/cuda_stream.h" #include "paddle/fluid/pybind/cuda_streams_py.h" @@ -331,7 +332,7 @@ void BindCudaStream(py::module *m_ptr) { [](paddle::platform::CudaEvent &self, bool enable_timing, bool blocking, bool interprocess) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - unsigned int flags = platform::get_cuda_flags( + unsigned int flags = platform::GenerateDeviceEventFlag( enable_timing, blocking, interprocess); new (&self) paddle::platform::CudaEvent(flags); #else