diff --git a/paddle/fluid/platform/profiler/profiler.cc b/paddle/fluid/platform/profiler/profiler.cc index 5957c4c24ca3be1357b6c5864b7493d9c115ed69..12099195143ae3a4b027a025d9ca7a9b2e8f8e7e 100644 --- a/paddle/fluid/platform/profiler/profiler.cc +++ b/paddle/fluid/platform/profiler/profiler.cc @@ -32,11 +32,32 @@ #include "paddle/fluid/platform/profiler/mlu/mlu_tracer.h" #include "paddle/fluid/platform/profiler/trace_event_collector.h" #include "paddle/fluid/platform/profiler/utils.h" +#ifdef PADDLE_WITH_CUSTOM_DEVICE +#include "paddle/phi/backends/device_manager.h" +#endif namespace paddle { namespace platform { -void SynchronizeAllDevice(); +void SynchronizeDevice() { +#ifdef PADDLE_WITH_CUDA + PADDLE_ENFORCE_GPU_SUCCESS(cudaDeviceSynchronize()); +#endif +#ifdef PADDLE_WITH_HIP + PADDLE_ENFORCE_GPU_SUCCESS(hipDeviceSynchronize()); +#endif +#ifdef PADDLE_WITH_MLU + PADDLE_ENFORCE_MLU_SUCCESS(cnrtSyncDevice()); +#endif +#ifdef PADDLE_WITH_CUSTOM_DEVICE + auto dev_types = phi::DeviceManager::GetAllCustomDeviceTypes(); + for (const auto& dev_type : dev_types) { + auto i = phi::DeviceManager::GetDevice(dev_type); + auto place = paddle::platform::CustomPlace(dev_type, i); + phi::DeviceManager::SynchronizeDevice(place); + } +#endif +} std::atomic Profiler::alive_{false}; @@ -99,7 +120,7 @@ void Profiler::Prepare() { } void Profiler::Start() { - SynchronizeAllDevice(); + SynchronizeDevice(); for (auto& tracer : tracers_) { tracer.Get().StartTracing(); } @@ -107,7 +128,7 @@ void Profiler::Start() { } std::unique_ptr Profiler::Stop() { - SynchronizeAllDevice(); + SynchronizeDevice(); TraceEventCollector collector; for (auto& tracer : tracers_) { tracer.Get().StopTracing(); diff --git a/paddle/fluid/platform/profiler/profiler.h b/paddle/fluid/platform/profiler/profiler.h index 878f73f2b98021483fa43d943039e1f129fabf0a..a67836845364bb27b6fc9377710bd12b57f0a777 100644 --- a/paddle/fluid/platform/profiler/profiler.h +++ b/paddle/fluid/platform/profiler/profiler.h @@ -37,6 +37,8 @@ static constexpr uint32_t kProfileGPUOptionBit = 1; static constexpr uint32_t kProfileMLUOptionBit = 2; static constexpr uint32_t kProfileCustomDeviceOptionBit = 3; +void SynchronizeDevice(); + struct ProfilerOptions { uint32_t trace_switch = 0; // bit 0: cpu, bit 1: gpu, bit 2: mlu uint32_t trace_level = FLAGS_host_trace_level;