From 7c489c2eef7ae8baa5f312d8889e6c6fddf3d85a Mon Sep 17 00:00:00 2001 From: liutiexing <74819124+liutiexing@users.noreply.github.com> Date: Fri, 28 Jan 2022 16:12:32 +0800 Subject: [PATCH] Host tracer and ProfilerController (#39230) * add align for WorkQueue * add spinlock * merge develop * merge * Add EventsWaiter * Revert "Add EventsWaiter" This reverts commit e206173aa9be7401b83a53581627bfaf557c8fb2. * split template * Add Profiler and HostTracer * update * update * update * updateg * fix cmake Co-authored-by: liutiexing --- .../workqueue/thread_data_registry.h | 126 ++++++++++++++++++ paddle/fluid/platform/CMakeLists.txt | 6 +- paddle/fluid/platform/os_info.cc | 81 +---------- paddle/fluid/platform/profiler.cc | 61 ++++++--- paddle/fluid/platform/profiler/CMakeLists.txt | 4 +- paddle/fluid/platform/profiler/common_event.h | 66 +++++++++ .../fluid/platform/profiler/event_tracing.h | 13 +- .../platform/profiler/host_event_recorder.cc | 33 ----- .../platform/profiler/host_event_recorder.h | 78 ++++------- paddle/fluid/platform/profiler/host_tracer.cc | 71 ++++++++++ paddle/fluid/platform/profiler/host_tracer.h | 63 +++++++++ paddle/fluid/platform/profiler/profiler.cc | 76 +++++++++++ paddle/fluid/platform/profiler/profiler.h | 74 ++++++++++ .../fluid/platform/profiler/profiler_test.cc | 52 ++++++++ paddle/fluid/platform/profiler/trace_event.h | 10 +- .../platform/profiler/trace_event_collector.h | 49 +++---- 16 files changed, 635 insertions(+), 228 deletions(-) create mode 100644 paddle/fluid/framework/new_executor/workqueue/thread_data_registry.h create mode 100644 paddle/fluid/platform/profiler/common_event.h delete mode 100644 paddle/fluid/platform/profiler/host_event_recorder.cc create mode 100644 paddle/fluid/platform/profiler/host_tracer.cc create mode 100644 paddle/fluid/platform/profiler/host_tracer.h create mode 100644 paddle/fluid/platform/profiler/profiler.cc create mode 100644 paddle/fluid/platform/profiler/profiler.h create mode 100644 paddle/fluid/platform/profiler/profiler_test.cc diff --git a/paddle/fluid/framework/new_executor/workqueue/thread_data_registry.h b/paddle/fluid/framework/new_executor/workqueue/thread_data_registry.h new file mode 100644 index 0000000000..a4a913cdff --- /dev/null +++ b/paddle/fluid/framework/new_executor/workqueue/thread_data_registry.h @@ -0,0 +1,126 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include + +namespace paddle { +namespace framework { + +static uint64_t main_tid = + std::hash()(std::this_thread::get_id()); + +template +class ThreadDataRegistry { + class ThreadDataHolder; + + public: + // Singleton + static ThreadDataRegistry& GetInstance() { + static ThreadDataRegistry instance; + return instance; + } + + T* GetMutableCurrentThreadData() { return &CurrentThreadData(); } + + const T& GetCurrentThreadData() { return CurrentThreadData(); } + + template ::value>> + void SetCurrentThreadData(const T& val) { + std::lock_guard lock(lock_); + CurrentThreadData() = val; + } + + // Returns current snapshot of all threads. Make sure there is no thread + // create/destory when using it. + template ::value>> + std::unordered_map GetAllThreadDataByValue() { + std::unordered_map data_copy; + std::lock_guard lock(lock_); + data_copy.reserve(tid_map_.size()); + for (auto& kv : tid_map_) { + data_copy.emplace(kv.first, kv.second->GetData()); + } + return data_copy; + } + + // Returns current snapshot of all threads. Make sure there is no thread + // create/destory when using it. + std::unordered_map> + GetAllThreadDataByRef() { + std::unordered_map> data_ref; + std::lock_guard lock(lock_); + data_ref.reserve(tid_map_.size()); + for (auto& kv : tid_map_) { + data_ref.emplace(kv.first, std::ref(kv.second->GetData())); + } + return data_ref; + } + + void RegisterData(uint64_t tid, ThreadDataHolder* tls_obj) { + std::lock_guard lock(lock_); + tid_map_[tid] = tls_obj; + } + + void UnregisterData(uint64_t tid) { + if (tid == main_tid) { + return; + } + std::lock_guard lock(lock_); + tid_map_.erase(tid); + } + + private: + class ThreadDataHolder { + public: + ThreadDataHolder() { + tid_ = std::hash()(std::this_thread::get_id()); + ThreadDataRegistry::GetInstance().RegisterData(tid_, this); + } + + ~ThreadDataHolder() { + ThreadDataRegistry::GetInstance().UnregisterData(tid_); + } + + T& GetData() { return data_; } + + private: + uint64_t tid_; + T data_; + }; + + ThreadDataRegistry() = default; + + ThreadDataRegistry(const ThreadDataRegistry&) = delete; + + ThreadDataRegistry& operator=(const ThreadDataRegistry&) = delete; + + T& CurrentThreadData() { + static thread_local ThreadDataHolder thread_data; + return thread_data.GetData(); + } + + std::mutex lock_; + std::unordered_map tid_map_; // not owned +}; + +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index 7dc07942d4..7f54903e69 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -177,13 +177,13 @@ add_subdirectory(profiler) cc_library(device_tracer SRCS device_tracer.cc DEPS boost profiler_proto framework_proto ${GPU_CTX_DEPS}) if(WITH_GPU) - nv_library(profiler SRCS profiler.cc profiler.cu DEPS host_event_recorder os_info device_tracer gpu_info enforce dynload_cuda) + nv_library(profiler SRCS profiler.cc profiler.cu DEPS os_info device_tracer gpu_info enforce dynload_cuda) nv_library(device_memory_aligment SRCS device_memory_aligment.cc DEPS cpu_info gpu_info place) elseif(WITH_ROCM) - hip_library(profiler SRCS profiler.cc profiler.cu DEPS host_event_recorder os_info device_tracer gpu_info enforce) + hip_library(profiler SRCS profiler.cc profiler.cu DEPS os_info device_tracer gpu_info enforce) hip_library(device_memory_aligment SRCS device_memory_aligment.cc DEPS cpu_info gpu_info place) else() - cc_library(profiler SRCS profiler.cc DEPS host_event_recorder os_info device_tracer enforce) + cc_library(profiler SRCS profiler.cc DEPS os_info device_tracer enforce) cc_library(device_memory_aligment SRCS device_memory_aligment.cc DEPS cpu_info place) endif() diff --git a/paddle/fluid/platform/os_info.cc b/paddle/fluid/platform/os_info.cc index 92d218504e..58d37783d0 100644 --- a/paddle/fluid/platform/os_info.cc +++ b/paddle/fluid/platform/os_info.cc @@ -14,7 +14,6 @@ limitations under the License. */ #include "paddle/fluid/platform/os_info.h" #include -#include #include #include #include @@ -27,90 +26,14 @@ limitations under the License. */ #else #include #endif +#include "paddle/fluid/framework/new_executor/workqueue/thread_data_registry.h" #include "paddle/fluid/platform/macros.h" // import DISABLE_COPY_AND_ASSIGN namespace paddle { namespace platform { namespace internal { -static uint64_t main_tid = - std::hash()(std::this_thread::get_id()); - -template -class ThreadDataRegistry { - class ThreadDataHolder; - - public: - // Singleton - static ThreadDataRegistry& GetInstance() { - static ThreadDataRegistry instance; - return instance; - } - - const T& GetCurrentThreadData() { return CurrentThreadData(); } - - void SetCurrentThreadData(const T& val) { - std::lock_guard lock(lock_); - CurrentThreadData() = val; - } - - // Returns current snapshot of all threads. Make sure there is no thread - // create/destory when using it. - template ::value>> - std::unordered_map GetAllThreadDataByValue() { - std::unordered_map data_copy; - std::lock_guard lock(lock_); - data_copy.reserve(tid_map_.size()); - for (auto& kv : tid_map_) { - data_copy.emplace(kv.first, kv.second->GetData()); - } - return std::move(data_copy); - } - - void RegisterData(uint64_t tid, ThreadDataHolder* tls_obj) { - std::lock_guard lock(lock_); - tid_map_[tid] = tls_obj; - } - - void UnregisterData(uint64_t tid) { - if (tid == main_tid) { - return; - } - std::lock_guard lock(lock_); - tid_map_.erase(tid); - } - - private: - class ThreadDataHolder { - public: - ThreadDataHolder() { - tid_ = std::hash()(std::this_thread::get_id()); - ThreadDataRegistry::GetInstance().RegisterData(tid_, this); - } - - ~ThreadDataHolder() { - ThreadDataRegistry::GetInstance().UnregisterData(tid_); - } - - T& GetData() { return data_; } - - private: - uint64_t tid_; - T data_; - }; - - ThreadDataRegistry() = default; - - DISABLE_COPY_AND_ASSIGN(ThreadDataRegistry); - - T& CurrentThreadData() { - static thread_local ThreadDataHolder thread_data; - return thread_data.GetData(); - } - - std::mutex lock_; - std::unordered_map tid_map_; // not owned -}; +using framework::ThreadDataRegistry; class InternalThreadId { public: diff --git a/paddle/fluid/platform/profiler.cc b/paddle/fluid/platform/profiler.cc index c4beac93ef..8fecf444dc 100644 --- a/paddle/fluid/platform/profiler.cc +++ b/paddle/fluid/platform/profiler.cc @@ -21,7 +21,9 @@ limitations under the License. */ #include "paddle/fluid/platform/device_tracer.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/profiler.h" +#include "paddle/fluid/platform/profiler/common_event.h" #include "paddle/fluid/platform/profiler/host_event_recorder.h" +#include "paddle/fluid/platform/profiler/host_tracer.h" #include "paddle/fluid/platform/profiler_helper.h" #ifdef PADDLE_WITH_CUDA #include "paddle/fluid/platform/dynload/nvtx.h" @@ -64,7 +66,8 @@ double Event::CudaElapsedMs(const Event &e) const { #endif } -RecordEvent::RecordEvent(const char *name, const EventRole role) { +RecordEvent::RecordEvent(const char *name, const EventRole role, + uint32_t level) { #ifndef _WIN32 #ifdef PADDLE_WITH_CUDA if (g_enable_nvprof_hook) { @@ -73,16 +76,21 @@ RecordEvent::RecordEvent(const char *name, const EventRole role) { } #endif #endif - if (UNLIKELY(FLAGS_enable_host_event_recorder_hook == false)) { + if (FLAGS_enable_host_event_recorder_hook == false) { OriginalConstruct(name, role, "none"); return; } + if (UNLIKELY(HostTraceLevel::GetInstance().NeedTrace(level) == false)) { + return; + } + is_enabled_ = true; shallow_copy_name_ = name; role_ = role; start_ns_ = PosixInNsec(); } -RecordEvent::RecordEvent(const std::string &name, const EventRole role) { +RecordEvent::RecordEvent(const std::string &name, const EventRole role, + uint32_t level) { #ifndef _WIN32 #ifdef PADDLE_WITH_CUDA if (g_enable_nvprof_hook) { @@ -91,17 +99,21 @@ RecordEvent::RecordEvent(const std::string &name, const EventRole role) { } #endif #endif - if (UNLIKELY(FLAGS_enable_host_event_recorder_hook == false)) { + if (FLAGS_enable_host_event_recorder_hook == false) { OriginalConstruct(name, role, "none"); return; } + if (UNLIKELY(HostTraceLevel::GetInstance().NeedTrace(level) == false)) { + return; + } + is_enabled_ = true; name_ = new std::string(name); role_ = role; start_ns_ = PosixInNsec(); } RecordEvent::RecordEvent(const std::string &name, const EventRole role, - const std::string &attr) { + const std::string &attr, uint32_t level) { #ifndef _WIN32 #ifdef PADDLE_WITH_CUDA if (g_enable_nvprof_hook) { @@ -110,10 +122,14 @@ RecordEvent::RecordEvent(const std::string &name, const EventRole role, } #endif #endif - if (UNLIKELY(FLAGS_enable_host_event_recorder_hook == false)) { + if (FLAGS_enable_host_event_recorder_hook == false) { OriginalConstruct(name, role, attr); return; } + if (UNLIKELY(HostTraceLevel::GetInstance().NeedTrace(level) == false)) { + return; + } + is_enabled_ = true; name_ = new std::string(name); start_ns_ = PosixInNsec(); attr_ = new std::string(attr); @@ -138,10 +154,6 @@ void RecordEvent::OriginalConstruct(const std::string &name, } void RecordEvent::End() { - if (UNLIKELY(finished_)) { - return; - } - finished_ = true; #ifndef _WIN32 #ifdef PADDLE_WITH_CUDA if (g_enable_nvprof_hook && is_pushed_) { @@ -150,21 +162,25 @@ void RecordEvent::End() { #endif #endif uint64_t end_ns = PosixInNsec(); - if (LIKELY(FLAGS_enable_host_event_recorder_hook)) { + if (LIKELY(FLAGS_enable_host_event_recorder_hook && is_enabled_)) { if (LIKELY(shallow_copy_name_ != nullptr)) { HostEventRecorder::GetInstance().RecordEvent(shallow_copy_name_, - start_ns_, end_ns, role_); + start_ns_, end_ns, role_, + TracerEventType::NumTypes); } else if (name_ != nullptr) { if (attr_ == nullptr) { - HostEventRecorder::GetInstance().RecordEvent(*name_, start_ns_, end_ns, - role_); + HostEventRecorder::GetInstance().RecordEvent( + *name_, start_ns_, end_ns, role_, TracerEventType::NumTypes); } else { - HostEventRecorder::GetInstance().RecordEvent(*name_, start_ns_, end_ns, - role_, *attr_); + HostEventRecorder::GetInstance().RecordEvent( + *name_, start_ns_, end_ns, role_, TracerEventType::NumTypes, + *attr_); delete attr_; } delete name_; } + // use this flag to avoid double End(); + is_enabled_ = false; return; } @@ -179,15 +195,18 @@ void RecordEvent::End() { PopEvent(*name_, role_); delete name_; delete attr_; + // use this flag to avoid double End(); + is_enabled_ = false; } -RecordInstantEvent::RecordInstantEvent(const char *name, const EventRole role) { - if (UNLIKELY(FLAGS_enable_host_event_recorder_hook == false)) { +RecordInstantEvent::RecordInstantEvent(const char *name, TracerEventType type, + uint32_t level) { + if (UNLIKELY(HostTraceLevel::GetInstance().NeedTrace(level) == false)) { return; } auto start_end_ns = PosixInNsec(); HostEventRecorder::GetInstance().RecordEvent(name, start_end_ns, start_end_ns, - role); + EventRole::kOrdinary, type); } void MemEvenRecorder::PushMemRecord(const void *ptr, const Place &place, @@ -281,8 +300,8 @@ void PopMemEvent(uint64_t start_ns, uint64_t end_ns, size_t bytes, void Mark(const std::string &name) { if (FLAGS_enable_host_event_recorder_hook) { - HostEventRecorder::GetInstance().RecordEvent(name, 0, 0, - EventRole::kOrdinary); + HostEventRecorder::GetInstance().RecordEvent( + name, 0, 0, EventRole::kOrdinary, TracerEventType::NumTypes); return; } GetEventList().Record(EventType::kMark, name, g_thread_id); diff --git a/paddle/fluid/platform/profiler/CMakeLists.txt b/paddle/fluid/platform/profiler/CMakeLists.txt index de22183df6..e25e4f3f56 100644 --- a/paddle/fluid/platform/profiler/CMakeLists.txt +++ b/paddle/fluid/platform/profiler/CMakeLists.txt @@ -1 +1,3 @@ -cc_library(host_event_recorder SRCS host_event_recorder.cc DEPS os_info) +cc_library(host_tracer SRCS host_tracer.cc DEPS enforce) +cc_library(new_profiler SRCS profiler.cc DEPS host_tracer) +cc_test(new_profiler_test SRCS profiler_test.cc DEPS new_profiler) diff --git a/paddle/fluid/platform/profiler/common_event.h b/paddle/fluid/platform/profiler/common_event.h new file mode 100644 index 0000000000..cfdc3be110 --- /dev/null +++ b/paddle/fluid/platform/profiler/common_event.h @@ -0,0 +1,66 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include "paddle/fluid/platform/event.h" // import EventRole, TODO(TIEXING): remove later +#include "paddle/fluid/platform/profiler/trace_event.h" + +namespace paddle { +namespace platform { + +struct CommonEvent { + public: + CommonEvent(const char *name, uint64_t start_ns, uint64_t end_ns, + EventRole role, TracerEventType type) + : name(name), + start_ns(start_ns), + end_ns(end_ns), + role(role), + type(type) {} + + CommonEvent(std::function arena_allocator, + const std::string &name_str, uint64_t start_ns, uint64_t end_ns, + EventRole role, TracerEventType type, const std::string &attr_str) + : start_ns(start_ns), end_ns(end_ns), role(role), type(type) { + auto buf = static_cast(arena_allocator(name_str.length() + 1)); + strncpy(buf, name_str.c_str(), name_str.length() + 1); + name = buf; + buf = static_cast(arena_allocator(attr_str.length() + 1)); + strncpy(buf, attr_str.c_str(), attr_str.length() + 1); + attr = buf; + } + + CommonEvent(std::function arena_allocator, + const std::string &name_str, uint64_t start_ns, uint64_t end_ns, + EventRole role, TracerEventType type) + : start_ns(start_ns), end_ns(end_ns), role(role), type(type) { + auto buf = static_cast(arena_allocator(name_str.length() + 1)); + strncpy(buf, name_str.c_str(), name_str.length() + 1); + name = buf; + } + + const char *name = nullptr; // not owned, designed for performance + uint64_t start_ns = 0; + uint64_t end_ns = 0; + EventRole role = EventRole::kOrdinary; + TracerEventType type = TracerEventType::NumTypes; + const char *attr = nullptr; // not owned, designed for performance +}; + +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/profiler/event_tracing.h b/paddle/fluid/platform/profiler/event_tracing.h index f68b4b5162..2532077bcc 100644 --- a/paddle/fluid/platform/profiler/event_tracing.h +++ b/paddle/fluid/platform/profiler/event_tracing.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include "paddle/fluid/platform/event.h" +#include "paddle/fluid/platform/profiler/trace_event.h" namespace paddle { namespace platform { @@ -24,8 +25,8 @@ namespace platform { // associated with it. For example, thread starts working. // Chrome Trace Viewer Format: Instant Event struct RecordInstantEvent { - explicit RecordInstantEvent(const char* name, - const EventRole role = EventRole::kOrdinary); + explicit RecordInstantEvent(const char* name, TracerEventType type, + uint32_t level = 1); }; // CPU event tracing. A trace starts when an object of this clas is created and @@ -34,13 +35,15 @@ struct RecordInstantEvent { class RecordEvent { public: explicit RecordEvent(const std::string& name, - const EventRole role = EventRole::kOrdinary); + const EventRole role = EventRole::kOrdinary, + uint32_t level = 1); explicit RecordEvent(const char* name, - const EventRole role = EventRole::kOrdinary); + const EventRole role = EventRole::kOrdinary, + uint32_t level = 1); RecordEvent(const std::string& name, const EventRole role, - const std::string& attr); + const std::string& attr, uint32_t level = 1); // Stop event tracing explicitly before the object goes out of scope. // Sometimes it's inconvenient to use RAII diff --git a/paddle/fluid/platform/profiler/host_event_recorder.cc b/paddle/fluid/platform/profiler/host_event_recorder.cc deleted file mode 100644 index b8495ca45c..0000000000 --- a/paddle/fluid/platform/profiler/host_event_recorder.cc +++ /dev/null @@ -1,33 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/platform/profiler/host_event_recorder.h" -#include "paddle/fluid/platform/os_info.h" - -namespace paddle { -namespace platform { - -ThreadEventRecorder::ThreadEventRecorder() { - thread_id_ = GetCurrentThreadSysId(); - HostEventRecorder::GetInstance().RegisterThreadRecorder(thread_id_, this); -} - -HostEventSection HostEventRecorder::GatherEvents() { - HostEventSection host_sec; - host_sec.thr_sections.reserve(thread_recorders_.size()); - for (auto &kv : thread_recorders_) { - host_sec.thr_sections.emplace_back(std::move(kv.second->GatherEvents())); - } - return host_sec; -} - -} // namespace platform -} // namespace paddle diff --git a/paddle/fluid/platform/profiler/host_event_recorder.h b/paddle/fluid/platform/profiler/host_event_recorder.h index 071f0d65bd..9c810dc184 100644 --- a/paddle/fluid/platform/profiler/host_event_recorder.h +++ b/paddle/fluid/platform/profiler/host_event_recorder.h @@ -14,51 +14,17 @@ limitations under the License. */ #pragma once -#include -#include #include #include -#include #include -#include "paddle/fluid/platform/event.h" +#include "paddle/fluid/framework/new_executor/workqueue/thread_data_registry.h" +#include "paddle/fluid/platform/macros.h" +#include "paddle/fluid/platform/os_info.h" +#include "paddle/fluid/platform/profiler/common_event.h" namespace paddle { namespace platform { -struct CommonEvent { - public: - CommonEvent(const char *name, uint64_t start_ns, uint64_t end_ns, - EventRole role) - : name(name), start_ns(start_ns), end_ns(end_ns), role(role) {} - - CommonEvent(std::function &arena_allocator, - const std::string &name_str, uint64_t start_ns, uint64_t end_ns, - EventRole role, const std::string &attr_str) - : start_ns(start_ns), end_ns(end_ns), role(role) { - auto buf = static_cast(arena_allocator(name_str.length() + 1)); - strncpy(buf, name_str.c_str(), name_str.length() + 1); - name = buf; - buf = static_cast(arena_allocator(attr_str.length() + 1)); - strncpy(buf, attr_str.c_str(), attr_str.length() + 1); - attr = buf; - } - - CommonEvent(const std::function &arena_allocator, - const std::string &name_str, uint64_t start_ns, uint64_t end_ns, - EventRole role) - : start_ns(start_ns), end_ns(end_ns), role(role) { - auto buf = static_cast(arena_allocator(name_str.length() + 1)); - strncpy(buf, name_str.c_str(), name_str.length() + 1); - name = buf; - } - - const char *name = nullptr; // not owned, designed for performance - uint64_t start_ns = 0; - uint64_t end_ns = 0; - EventRole role = EventRole::kOrdinary; - const char *attr = nullptr; // not owned, designed for performance -}; - template struct ContainsStdString : std::conditional_t< @@ -223,7 +189,8 @@ struct ThreadEventSection { class ThreadEventRecorder { public: - ThreadEventRecorder(); + ThreadEventRecorder() { thread_id_ = GetCurrentThreadSysId(); } + DISABLE_COPY_AND_ASSIGN(ThreadEventRecorder); public: @@ -261,34 +228,43 @@ class HostEventRecorder { return instance; } + // thread-safe // If your string argument has a longer lifetime than the Event, // use 'const char*'. e.g.: string literal, op name, etc. // Do your best to avoid using 'std::string' as the argument type. // It will cause deep-copy to harm performance. template void RecordEvent(Args &&... args) { - GetThreadLocalRecorder().RecordEvent(std::forward(args)...); + GetThreadLocalRecorder()->RecordEvent(std::forward(args)...); } + // thread-unsafe, make sure make sure there is no running tracing. // Poor performance, call it at the ending - HostEventSection GatherEvents(); - - void RegisterThreadRecorder(uint64_t tid, ThreadEventRecorder *recorder) { - const std::lock_guard guard(thread_recorders_lock_); - thread_recorders_[tid] = recorder; + HostEventSection GatherEvents() { + auto thr_recorders = + ThreadEventRecorderRegistry::GetInstance().GetAllThreadDataByRef(); + HostEventSection host_sec; + host_sec.process_id = GetProcessId(); + host_sec.thr_sections.reserve(thr_recorders.size()); + for (auto &kv : thr_recorders) { + auto &thr_recorder = kv.second.get(); + host_sec.thr_sections.emplace_back( + std::move(thr_recorder.GatherEvents())); + } + return host_sec; } private: + using ThreadEventRecorderRegistry = + framework::ThreadDataRegistry; + HostEventRecorder() = default; DISABLE_COPY_AND_ASSIGN(HostEventRecorder); - ThreadEventRecorder &GetThreadLocalRecorder() { - static thread_local ThreadEventRecorder tls_recorder; - return tls_recorder; + ThreadEventRecorder *GetThreadLocalRecorder() { + return ThreadEventRecorderRegistry::GetInstance() + .GetMutableCurrentThreadData(); } - - std::mutex thread_recorders_lock_; - std::unordered_map thread_recorders_; }; } // namespace platform diff --git a/paddle/fluid/platform/profiler/host_tracer.cc b/paddle/fluid/platform/profiler/host_tracer.cc new file mode 100644 index 0000000000..80f9a5d9af --- /dev/null +++ b/paddle/fluid/platform/profiler/host_tracer.cc @@ -0,0 +1,71 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/platform/profiler/host_tracer.h" +#include "glog/logging.h" +#include "paddle/fluid/platform/profiler/common_event.h" +#include "paddle/fluid/platform/profiler/host_event_recorder.h" + +namespace paddle { +namespace platform { + +namespace { + +void ProcessHostEvents(const HostEventSection& host_events, + TraceEventCollector* collector) { + for (const auto& thr_sec : host_events.thr_sections) { + uint64_t tid = thr_sec.thread_id; + for (const auto& evt : thr_sec.events) { + HostTraceEvent event; + event.name = evt.name; + event.type = evt.type; + event.start_ns = evt.start_ns; + event.end_ns = evt.end_ns; + event.process_id = host_events.process_id; + event.thread_id = tid; + collector->AddHostEvent(std::move(event)); + } + } +} + +} // namespace + +void HostTracer::StartTracing() { + PADDLE_ENFORCE_EQ( + state_ == TracerState::READY || state_ == TracerState::STOPED, true, + platform::errors::PreconditionNotMet("TracerState must be READY")); + HostEventRecorder::GetInstance().GatherEvents(); + HostTraceLevel::GetInstance().SetLevel(trace_level_); + state_ = TracerState::STARTED; +} + +void HostTracer::StopTracing() { + PADDLE_ENFORCE_EQ( + state_, TracerState::STARTED, + platform::errors::PreconditionNotMet("TracerState must be STARTED")); + HostTraceLevel::GetInstance().SetLevel(HostTraceLevel::kDisabled); + state_ = TracerState::STOPED; +} + +void HostTracer::CollectTraceData(TraceEventCollector* collector) { + PADDLE_ENFORCE_EQ( + state_, TracerState::STOPED, + platform::errors::PreconditionNotMet("TracerState must be STOPED")); + HostEventSection host_events = + HostEventRecorder::GetInstance().GatherEvents(); + ProcessHostEvents(host_events, collector); +} + +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/profiler/host_tracer.h b/paddle/fluid/platform/profiler/host_tracer.h new file mode 100644 index 0000000000..c73b5eca15 --- /dev/null +++ b/paddle/fluid/platform/profiler/host_tracer.h @@ -0,0 +1,63 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/fluid/platform/profiler/tracer_base.h" + +namespace paddle { +namespace platform { + +class HostTraceLevel { + public: + static constexpr int64_t kDisabled = -1; + + static HostTraceLevel& GetInstance() { + static HostTraceLevel instance; + return instance; + } + + bool NeedTrace(uint32_t level) { + return trace_level_ >= static_cast(level); + } + + void SetLevel(int64_t trace_level) { trace_level_ = trace_level; } + + private: + // Verbose trace level, works like VLOG(level) + int trace_level_ = kDisabled; +}; + +struct HostTracerOptions { + uint32_t trace_level = 0; +}; + +class HostTracer : public TracerBase { + public: + explicit HostTracer(const HostTracerOptions& options) { + trace_level_ = options.trace_level; + } + + void StartTracing() override; + + void StopTracing() override; + + void CollectTraceData(TraceEventCollector* collector) override; + + private: + uint32_t trace_level_; +}; + +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/profiler/profiler.cc b/paddle/fluid/platform/profiler/profiler.cc new file mode 100644 index 0000000000..e9f0eb98d5 --- /dev/null +++ b/paddle/fluid/platform/profiler/profiler.cc @@ -0,0 +1,76 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/platform/profiler/profiler.h" +#include "glog/logging.h" +#ifdef PADDLE_WITH_CUDA +#include +#endif +#ifdef PADDLE_WITH_HIP +#include +#endif +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +#include "paddle/fluid/platform/device/gpu/gpu_info.h" +#endif +#include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/profiler/host_tracer.h" + +namespace paddle { +namespace platform { + +void SynchronizeAllDevice(); + +std::atomic Profiler::alive_{false}; + +std::unique_ptr Profiler::Create(const ProfilerOptions& options) { + if (alive_.exchange(true)) { + return nullptr; + } + return std::unique_ptr(new Profiler(options)); +} + +Profiler::Profiler(const ProfilerOptions& options) { + options_ = options; + HostTracerOptions host_tracer_options; + host_tracer_options.trace_level = options.trace_level; + tracers_.emplace_back(new HostTracer(host_tracer_options), true); +} + +Profiler::~Profiler() { alive_.store(false); } + +void Profiler::Prepare() { + for (auto& tracer : tracers_) { + tracer.Get().PrepareTracing(); + } +} + +void Profiler::Start() { + SynchronizeAllDevice(); + for (auto& tracer : tracers_) { + tracer.Get().StartTracing(); + } +} + +TraceEventCollector Profiler::Stop() { + SynchronizeAllDevice(); + TraceEventCollector collector; + for (auto& tracer : tracers_) { + tracer.Get().StopTracing(); + tracer.Get().CollectTraceData(&collector); + } + return collector; +} + +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/profiler/profiler.h b/paddle/fluid/platform/profiler/profiler.h new file mode 100644 index 0000000000..1324d81f95 --- /dev/null +++ b/paddle/fluid/platform/profiler/profiler.h @@ -0,0 +1,74 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include +#include +#include "paddle/fluid/platform/macros.h" +#include "paddle/fluid/platform/profiler/trace_event_collector.h" +#include "paddle/fluid/platform/profiler/tracer_base.h" + +namespace paddle { +namespace platform { + +struct ProfilerOptions { + uint32_t trace_level = 0; +}; + +class Profiler { + public: + static std::unique_ptr Create(const ProfilerOptions& options); + + void Prepare(); + + void Start(); + + TraceEventCollector Stop(); + + ~Profiler(); + + private: + class TracerHolder { + public: + TracerHolder(TracerBase* tracer, bool owned) + : tracer(tracer), owned(owned) {} + ~TracerHolder() { + if (owned) { + delete tracer; + } + } + + TracerBase& Get() { return *tracer; } + + private: + TracerBase* tracer; + bool owned; + }; + + explicit Profiler(const ProfilerOptions& options); + + DISABLE_COPY_AND_ASSIGN(Profiler); + + static std::atomic alive_; + ProfilerOptions options_; + uint64_t start_ns_ = UINT64_MAX; + std::list tracers_; +}; + +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/profiler/profiler_test.cc b/paddle/fluid/platform/profiler/profiler_test.cc new file mode 100644 index 0000000000..414987d2f1 --- /dev/null +++ b/paddle/fluid/platform/profiler/profiler_test.cc @@ -0,0 +1,52 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include "glog/logging.h" +#include "gtest/gtest.h" +#ifdef PADDLE_WITH_CUDA +#include +#endif +#ifdef PADDLE_WITH_HIP +#include +#endif +#include "paddle/fluid/platform/profiler/event_tracing.h" +#include "paddle/fluid/platform/profiler/profiler.h" + +TEST(ProfilerTest, TestHostTracer) { + using paddle::platform::ProfilerOptions; + using paddle::platform::Profiler; + using paddle::platform::RecordInstantEvent; + using paddle::platform::TracerEventType; + ProfilerOptions options; + options.trace_level = 2; + auto profiler = Profiler::Create(options); + EXPECT_TRUE(profiler); + profiler->Prepare(); + profiler->Start(); + { + RecordInstantEvent("TestTraceLevel_record1", TracerEventType::UserDefined, + 2); + RecordInstantEvent("TestTraceLevel_record2", TracerEventType::UserDefined, + 3); + } + auto collector = profiler->Stop(); + std::set host_events; + for (const auto evt : collector.HostEvents()) { + host_events.insert(evt.name); + } + EXPECT_EQ(host_events.count("TestTraceLevel_record1"), 1u); + EXPECT_EQ(host_events.count("TestTraceLevel_record2"), 0u); +} diff --git a/paddle/fluid/platform/profiler/trace_event.h b/paddle/fluid/platform/profiler/trace_event.h index e676942c45..1f146adf4f 100644 --- a/paddle/fluid/platform/profiler/trace_event.h +++ b/paddle/fluid/platform/profiler/trace_event.h @@ -76,26 +76,28 @@ struct KernelEventInfo { uint64_t completed; }; +static constexpr size_t kMemKindMaxLen = 50; + struct MemcpyEventInfo { // The number of bytes transferred by the memory copy. uint64_t num_bytes; // The kind of the memory copy. // Each kind represents the source and destination targets of a memory copy. // Targets are host, device, and array. Refer to CUpti_ActivityMemcpyKind - std::string copy_kind; + // std::string copy_kind; // The source memory kind read by the memory copy. // Each kind represents the type of the memory accessed by a memory // operation/copy. Refer to CUpti_ActivityMemoryKind - std::string src_kind; + char src_kind[kMemKindMaxLen]; // The destination memory kind read by the memory copy. - std::string dst_kind; + char dst_kind[kMemKindMaxLen]; }; struct MemsetEventInfo { // The number of bytes being set by the memory set. uint64_t num_bytes; // The memory kind of the memory set. Refer to CUpti_ActivityMemoryKind - std::string memory_kind; + char memory_kind[kMemKindMaxLen]; // the value being assigned to memory by the memory set. uint32_t value; }; diff --git a/paddle/fluid/platform/profiler/trace_event_collector.h b/paddle/fluid/platform/profiler/trace_event_collector.h index eabafb7354..30b32220d9 100644 --- a/paddle/fluid/platform/profiler/trace_event_collector.h +++ b/paddle/fluid/platform/profiler/trace_event_collector.h @@ -15,50 +15,37 @@ limitations under the License. */ #pragma once #include +#include "paddle/fluid/platform/profiler/trace_event.h" namespace paddle { namespace platform { -struct HostRecord { - std::string name; - uint64_t start_ns; - uint64_t end_ns; - uint64_t process_id; - uint64_t thread_id; -}; +class TraceEventCollector { + public: + void AddHostEvent(HostTraceEvent&& event) { host_events_.push_back(event); } -struct RuntimeRecord { - std::string name; - uint64_t start_ns; - uint64_t end_ns; - uint64_t process_id; - uint64_t thread_id; - uint32_t correlation_id; -}; + void AddRuntimeEvent(RuntimeTraceEvent&& event) { + runtime_events_.push_back(event); + } -struct DeviceRecord { - std::string name; - uint64_t start_ns; - uint64_t end_ns; - uint32_t correlation_id; -}; + void AddDeviceEvent(DeviceTraceEvent&& event) { + device_events_.push_back(event); + } -class TraceEventCollector { - public: - void AddHostRecord(HostRecord&& record) { host_records_.push_back(record); } + const std::list& HostEvents() const { return host_events_; } - void AddRuntimeRecord(RuntimeRecord&& record) { - runtime_records_.push_back(record); + const std::list& RuntimeEvents() const { + return runtime_events_; } - void AddDeviceRecord(DeviceRecord&& record) { - device_records_.push_back(record); + const std::list& DeviceEvents() const { + return device_events_; } private: - std::list host_records_; - std::list runtime_records_; - std::list device_records_; + std::list host_events_; + std::list runtime_events_; + std::list device_events_; }; } // namespace platform -- GitLab