未验证 提交 7c489c2e 编写于 作者: L liutiexing 提交者: GitHub

Host tracer and ProfilerController (#39230)

* add align for WorkQueue

* add spinlock

* merge develop

* merge

* Add EventsWaiter

* Revert "Add EventsWaiter"

This reverts commit e206173aa9be7401b83a53581627bfaf557c8fb2.

* split template

* Add Profiler and HostTracer

* update

* update

* update

* updateg

* fix cmake
Co-authored-by: Nliutiexing <liutiexing@google.com>
上级 44af74b8
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <functional>
#include <mutex>
#include <thread>
#include <type_traits>
#include <unordered_map>
namespace paddle {
namespace framework {
static uint64_t main_tid =
std::hash<std::thread::id>()(std::this_thread::get_id());
template <typename T>
class ThreadDataRegistry {
class ThreadDataHolder;
public:
// Singleton
static ThreadDataRegistry& GetInstance() {
static ThreadDataRegistry instance;
return instance;
}
T* GetMutableCurrentThreadData() { return &CurrentThreadData(); }
const T& GetCurrentThreadData() { return CurrentThreadData(); }
template <typename Alias = T,
typename = std::enable_if_t<std::is_copy_assignable<Alias>::value>>
void SetCurrentThreadData(const T& val) {
std::lock_guard<std::mutex> lock(lock_);
CurrentThreadData() = val;
}
// Returns current snapshot of all threads. Make sure there is no thread
// create/destory when using it.
template <typename Alias = T, typename = std::enable_if_t<
std::is_copy_constructible<Alias>::value>>
std::unordered_map<uint64_t, T> GetAllThreadDataByValue() {
std::unordered_map<uint64_t, T> data_copy;
std::lock_guard<std::mutex> lock(lock_);
data_copy.reserve(tid_map_.size());
for (auto& kv : tid_map_) {
data_copy.emplace(kv.first, kv.second->GetData());
}
return data_copy;
}
// Returns current snapshot of all threads. Make sure there is no thread
// create/destory when using it.
std::unordered_map<uint64_t, std::reference_wrapper<T>>
GetAllThreadDataByRef() {
std::unordered_map<uint64_t, std::reference_wrapper<T>> data_ref;
std::lock_guard<std::mutex> lock(lock_);
data_ref.reserve(tid_map_.size());
for (auto& kv : tid_map_) {
data_ref.emplace(kv.first, std::ref(kv.second->GetData()));
}
return data_ref;
}
void RegisterData(uint64_t tid, ThreadDataHolder* tls_obj) {
std::lock_guard<std::mutex> lock(lock_);
tid_map_[tid] = tls_obj;
}
void UnregisterData(uint64_t tid) {
if (tid == main_tid) {
return;
}
std::lock_guard<std::mutex> lock(lock_);
tid_map_.erase(tid);
}
private:
class ThreadDataHolder {
public:
ThreadDataHolder() {
tid_ = std::hash<std::thread::id>()(std::this_thread::get_id());
ThreadDataRegistry::GetInstance().RegisterData(tid_, this);
}
~ThreadDataHolder() {
ThreadDataRegistry::GetInstance().UnregisterData(tid_);
}
T& GetData() { return data_; }
private:
uint64_t tid_;
T data_;
};
ThreadDataRegistry() = default;
ThreadDataRegistry(const ThreadDataRegistry&) = delete;
ThreadDataRegistry& operator=(const ThreadDataRegistry&) = delete;
T& CurrentThreadData() {
static thread_local ThreadDataHolder thread_data;
return thread_data.GetData();
}
std::mutex lock_;
std::unordered_map<uint64_t, ThreadDataHolder*> tid_map_; // not owned
};
} // namespace framework
} // namespace paddle
...@@ -177,13 +177,13 @@ add_subdirectory(profiler) ...@@ -177,13 +177,13 @@ add_subdirectory(profiler)
cc_library(device_tracer SRCS device_tracer.cc DEPS boost profiler_proto framework_proto ${GPU_CTX_DEPS}) cc_library(device_tracer SRCS device_tracer.cc DEPS boost profiler_proto framework_proto ${GPU_CTX_DEPS})
if(WITH_GPU) if(WITH_GPU)
nv_library(profiler SRCS profiler.cc profiler.cu DEPS host_event_recorder os_info device_tracer gpu_info enforce dynload_cuda) nv_library(profiler SRCS profiler.cc profiler.cu DEPS os_info device_tracer gpu_info enforce dynload_cuda)
nv_library(device_memory_aligment SRCS device_memory_aligment.cc DEPS cpu_info gpu_info place) nv_library(device_memory_aligment SRCS device_memory_aligment.cc DEPS cpu_info gpu_info place)
elseif(WITH_ROCM) elseif(WITH_ROCM)
hip_library(profiler SRCS profiler.cc profiler.cu DEPS host_event_recorder os_info device_tracer gpu_info enforce) hip_library(profiler SRCS profiler.cc profiler.cu DEPS os_info device_tracer gpu_info enforce)
hip_library(device_memory_aligment SRCS device_memory_aligment.cc DEPS cpu_info gpu_info place) hip_library(device_memory_aligment SRCS device_memory_aligment.cc DEPS cpu_info gpu_info place)
else() else()
cc_library(profiler SRCS profiler.cc DEPS host_event_recorder os_info device_tracer enforce) cc_library(profiler SRCS profiler.cc DEPS os_info device_tracer enforce)
cc_library(device_memory_aligment SRCS device_memory_aligment.cc DEPS cpu_info place) cc_library(device_memory_aligment SRCS device_memory_aligment.cc DEPS cpu_info place)
endif() endif()
......
...@@ -14,7 +14,6 @@ limitations under the License. */ ...@@ -14,7 +14,6 @@ limitations under the License. */
#include "paddle/fluid/platform/os_info.h" #include "paddle/fluid/platform/os_info.h"
#include <functional> #include <functional>
#include <mutex>
#include <sstream> #include <sstream>
#include <thread> #include <thread>
#include <vector> #include <vector>
...@@ -27,90 +26,14 @@ limitations under the License. */ ...@@ -27,90 +26,14 @@ limitations under the License. */
#else #else
#include <unistd.h> #include <unistd.h>
#endif #endif
#include "paddle/fluid/framework/new_executor/workqueue/thread_data_registry.h"
#include "paddle/fluid/platform/macros.h" // import DISABLE_COPY_AND_ASSIGN #include "paddle/fluid/platform/macros.h" // import DISABLE_COPY_AND_ASSIGN
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace internal { namespace internal {
static uint64_t main_tid = using framework::ThreadDataRegistry;
std::hash<std::thread::id>()(std::this_thread::get_id());
template <typename T>
class ThreadDataRegistry {
class ThreadDataHolder;
public:
// Singleton
static ThreadDataRegistry& GetInstance() {
static ThreadDataRegistry instance;
return instance;
}
const T& GetCurrentThreadData() { return CurrentThreadData(); }
void SetCurrentThreadData(const T& val) {
std::lock_guard<std::mutex> lock(lock_);
CurrentThreadData() = val;
}
// Returns current snapshot of all threads. Make sure there is no thread
// create/destory when using it.
template <typename = std::enable_if_t<std::is_copy_constructible<T>::value>>
std::unordered_map<uint64_t, T> GetAllThreadDataByValue() {
std::unordered_map<uint64_t, T> data_copy;
std::lock_guard<std::mutex> lock(lock_);
data_copy.reserve(tid_map_.size());
for (auto& kv : tid_map_) {
data_copy.emplace(kv.first, kv.second->GetData());
}
return std::move(data_copy);
}
void RegisterData(uint64_t tid, ThreadDataHolder* tls_obj) {
std::lock_guard<std::mutex> lock(lock_);
tid_map_[tid] = tls_obj;
}
void UnregisterData(uint64_t tid) {
if (tid == main_tid) {
return;
}
std::lock_guard<std::mutex> lock(lock_);
tid_map_.erase(tid);
}
private:
class ThreadDataHolder {
public:
ThreadDataHolder() {
tid_ = std::hash<std::thread::id>()(std::this_thread::get_id());
ThreadDataRegistry::GetInstance().RegisterData(tid_, this);
}
~ThreadDataHolder() {
ThreadDataRegistry::GetInstance().UnregisterData(tid_);
}
T& GetData() { return data_; }
private:
uint64_t tid_;
T data_;
};
ThreadDataRegistry() = default;
DISABLE_COPY_AND_ASSIGN(ThreadDataRegistry);
T& CurrentThreadData() {
static thread_local ThreadDataHolder thread_data;
return thread_data.GetData();
}
std::mutex lock_;
std::unordered_map<uint64_t, ThreadDataHolder*> tid_map_; // not owned
};
class InternalThreadId { class InternalThreadId {
public: public:
......
...@@ -21,7 +21,9 @@ limitations under the License. */ ...@@ -21,7 +21,9 @@ limitations under the License. */
#include "paddle/fluid/platform/device_tracer.h" #include "paddle/fluid/platform/device_tracer.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler/common_event.h"
#include "paddle/fluid/platform/profiler/host_event_recorder.h" #include "paddle/fluid/platform/profiler/host_event_recorder.h"
#include "paddle/fluid/platform/profiler/host_tracer.h"
#include "paddle/fluid/platform/profiler_helper.h" #include "paddle/fluid/platform/profiler_helper.h"
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/dynload/nvtx.h" #include "paddle/fluid/platform/dynload/nvtx.h"
...@@ -64,7 +66,8 @@ double Event::CudaElapsedMs(const Event &e) const { ...@@ -64,7 +66,8 @@ double Event::CudaElapsedMs(const Event &e) const {
#endif #endif
} }
RecordEvent::RecordEvent(const char *name, const EventRole role) { RecordEvent::RecordEvent(const char *name, const EventRole role,
uint32_t level) {
#ifndef _WIN32 #ifndef _WIN32
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
if (g_enable_nvprof_hook) { if (g_enable_nvprof_hook) {
...@@ -73,16 +76,21 @@ RecordEvent::RecordEvent(const char *name, const EventRole role) { ...@@ -73,16 +76,21 @@ RecordEvent::RecordEvent(const char *name, const EventRole role) {
} }
#endif #endif
#endif #endif
if (UNLIKELY(FLAGS_enable_host_event_recorder_hook == false)) { if (FLAGS_enable_host_event_recorder_hook == false) {
OriginalConstruct(name, role, "none"); OriginalConstruct(name, role, "none");
return; return;
} }
if (UNLIKELY(HostTraceLevel::GetInstance().NeedTrace(level) == false)) {
return;
}
is_enabled_ = true;
shallow_copy_name_ = name; shallow_copy_name_ = name;
role_ = role; role_ = role;
start_ns_ = PosixInNsec(); start_ns_ = PosixInNsec();
} }
RecordEvent::RecordEvent(const std::string &name, const EventRole role) { RecordEvent::RecordEvent(const std::string &name, const EventRole role,
uint32_t level) {
#ifndef _WIN32 #ifndef _WIN32
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
if (g_enable_nvprof_hook) { if (g_enable_nvprof_hook) {
...@@ -91,17 +99,21 @@ RecordEvent::RecordEvent(const std::string &name, const EventRole role) { ...@@ -91,17 +99,21 @@ RecordEvent::RecordEvent(const std::string &name, const EventRole role) {
} }
#endif #endif
#endif #endif
if (UNLIKELY(FLAGS_enable_host_event_recorder_hook == false)) { if (FLAGS_enable_host_event_recorder_hook == false) {
OriginalConstruct(name, role, "none"); OriginalConstruct(name, role, "none");
return; return;
} }
if (UNLIKELY(HostTraceLevel::GetInstance().NeedTrace(level) == false)) {
return;
}
is_enabled_ = true;
name_ = new std::string(name); name_ = new std::string(name);
role_ = role; role_ = role;
start_ns_ = PosixInNsec(); start_ns_ = PosixInNsec();
} }
RecordEvent::RecordEvent(const std::string &name, const EventRole role, RecordEvent::RecordEvent(const std::string &name, const EventRole role,
const std::string &attr) { const std::string &attr, uint32_t level) {
#ifndef _WIN32 #ifndef _WIN32
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
if (g_enable_nvprof_hook) { if (g_enable_nvprof_hook) {
...@@ -110,10 +122,14 @@ RecordEvent::RecordEvent(const std::string &name, const EventRole role, ...@@ -110,10 +122,14 @@ RecordEvent::RecordEvent(const std::string &name, const EventRole role,
} }
#endif #endif
#endif #endif
if (UNLIKELY(FLAGS_enable_host_event_recorder_hook == false)) { if (FLAGS_enable_host_event_recorder_hook == false) {
OriginalConstruct(name, role, attr); OriginalConstruct(name, role, attr);
return; return;
} }
if (UNLIKELY(HostTraceLevel::GetInstance().NeedTrace(level) == false)) {
return;
}
is_enabled_ = true;
name_ = new std::string(name); name_ = new std::string(name);
start_ns_ = PosixInNsec(); start_ns_ = PosixInNsec();
attr_ = new std::string(attr); attr_ = new std::string(attr);
...@@ -138,10 +154,6 @@ void RecordEvent::OriginalConstruct(const std::string &name, ...@@ -138,10 +154,6 @@ void RecordEvent::OriginalConstruct(const std::string &name,
} }
void RecordEvent::End() { void RecordEvent::End() {
if (UNLIKELY(finished_)) {
return;
}
finished_ = true;
#ifndef _WIN32 #ifndef _WIN32
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
if (g_enable_nvprof_hook && is_pushed_) { if (g_enable_nvprof_hook && is_pushed_) {
...@@ -150,21 +162,25 @@ void RecordEvent::End() { ...@@ -150,21 +162,25 @@ void RecordEvent::End() {
#endif #endif
#endif #endif
uint64_t end_ns = PosixInNsec(); uint64_t end_ns = PosixInNsec();
if (LIKELY(FLAGS_enable_host_event_recorder_hook)) { if (LIKELY(FLAGS_enable_host_event_recorder_hook && is_enabled_)) {
if (LIKELY(shallow_copy_name_ != nullptr)) { if (LIKELY(shallow_copy_name_ != nullptr)) {
HostEventRecorder::GetInstance().RecordEvent(shallow_copy_name_, HostEventRecorder::GetInstance().RecordEvent(shallow_copy_name_,
start_ns_, end_ns, role_); start_ns_, end_ns, role_,
TracerEventType::NumTypes);
} else if (name_ != nullptr) { } else if (name_ != nullptr) {
if (attr_ == nullptr) { if (attr_ == nullptr) {
HostEventRecorder::GetInstance().RecordEvent(*name_, start_ns_, end_ns, HostEventRecorder::GetInstance().RecordEvent(
role_); *name_, start_ns_, end_ns, role_, TracerEventType::NumTypes);
} else { } else {
HostEventRecorder::GetInstance().RecordEvent(*name_, start_ns_, end_ns, HostEventRecorder::GetInstance().RecordEvent(
role_, *attr_); *name_, start_ns_, end_ns, role_, TracerEventType::NumTypes,
*attr_);
delete attr_; delete attr_;
} }
delete name_; delete name_;
} }
// use this flag to avoid double End();
is_enabled_ = false;
return; return;
} }
...@@ -179,15 +195,18 @@ void RecordEvent::End() { ...@@ -179,15 +195,18 @@ void RecordEvent::End() {
PopEvent(*name_, role_); PopEvent(*name_, role_);
delete name_; delete name_;
delete attr_; delete attr_;
// use this flag to avoid double End();
is_enabled_ = false;
} }
RecordInstantEvent::RecordInstantEvent(const char *name, const EventRole role) { RecordInstantEvent::RecordInstantEvent(const char *name, TracerEventType type,
if (UNLIKELY(FLAGS_enable_host_event_recorder_hook == false)) { uint32_t level) {
if (UNLIKELY(HostTraceLevel::GetInstance().NeedTrace(level) == false)) {
return; return;
} }
auto start_end_ns = PosixInNsec(); auto start_end_ns = PosixInNsec();
HostEventRecorder::GetInstance().RecordEvent(name, start_end_ns, start_end_ns, HostEventRecorder::GetInstance().RecordEvent(name, start_end_ns, start_end_ns,
role); EventRole::kOrdinary, type);
} }
void MemEvenRecorder::PushMemRecord(const void *ptr, const Place &place, void MemEvenRecorder::PushMemRecord(const void *ptr, const Place &place,
...@@ -281,8 +300,8 @@ void PopMemEvent(uint64_t start_ns, uint64_t end_ns, size_t bytes, ...@@ -281,8 +300,8 @@ void PopMemEvent(uint64_t start_ns, uint64_t end_ns, size_t bytes,
void Mark(const std::string &name) { void Mark(const std::string &name) {
if (FLAGS_enable_host_event_recorder_hook) { if (FLAGS_enable_host_event_recorder_hook) {
HostEventRecorder::GetInstance().RecordEvent(name, 0, 0, HostEventRecorder::GetInstance().RecordEvent(
EventRole::kOrdinary); name, 0, 0, EventRole::kOrdinary, TracerEventType::NumTypes);
return; return;
} }
GetEventList().Record(EventType::kMark, name, g_thread_id); GetEventList().Record(EventType::kMark, name, g_thread_id);
......
cc_library(host_event_recorder SRCS host_event_recorder.cc DEPS os_info) cc_library(host_tracer SRCS host_tracer.cc DEPS enforce)
cc_library(new_profiler SRCS profiler.cc DEPS host_tracer)
cc_test(new_profiler_test SRCS profiler_test.cc DEPS new_profiler)
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cstring>
#include <functional>
#include <string>
#include "paddle/fluid/platform/event.h" // import EventRole, TODO(TIEXING): remove later
#include "paddle/fluid/platform/profiler/trace_event.h"
namespace paddle {
namespace platform {
struct CommonEvent {
public:
CommonEvent(const char *name, uint64_t start_ns, uint64_t end_ns,
EventRole role, TracerEventType type)
: name(name),
start_ns(start_ns),
end_ns(end_ns),
role(role),
type(type) {}
CommonEvent(std::function<void *(size_t)> arena_allocator,
const std::string &name_str, uint64_t start_ns, uint64_t end_ns,
EventRole role, TracerEventType type, const std::string &attr_str)
: start_ns(start_ns), end_ns(end_ns), role(role), type(type) {
auto buf = static_cast<char *>(arena_allocator(name_str.length() + 1));
strncpy(buf, name_str.c_str(), name_str.length() + 1);
name = buf;
buf = static_cast<char *>(arena_allocator(attr_str.length() + 1));
strncpy(buf, attr_str.c_str(), attr_str.length() + 1);
attr = buf;
}
CommonEvent(std::function<void *(size_t)> arena_allocator,
const std::string &name_str, uint64_t start_ns, uint64_t end_ns,
EventRole role, TracerEventType type)
: start_ns(start_ns), end_ns(end_ns), role(role), type(type) {
auto buf = static_cast<char *>(arena_allocator(name_str.length() + 1));
strncpy(buf, name_str.c_str(), name_str.length() + 1);
name = buf;
}
const char *name = nullptr; // not owned, designed for performance
uint64_t start_ns = 0;
uint64_t end_ns = 0;
EventRole role = EventRole::kOrdinary;
TracerEventType type = TracerEventType::NumTypes;
const char *attr = nullptr; // not owned, designed for performance
};
} // namespace platform
} // namespace paddle
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include <string> #include <string>
#include "paddle/fluid/platform/event.h" #include "paddle/fluid/platform/event.h"
#include "paddle/fluid/platform/profiler/trace_event.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
...@@ -24,8 +25,8 @@ namespace platform { ...@@ -24,8 +25,8 @@ namespace platform {
// associated with it. For example, thread starts working. // associated with it. For example, thread starts working.
// Chrome Trace Viewer Format: Instant Event // Chrome Trace Viewer Format: Instant Event
struct RecordInstantEvent { struct RecordInstantEvent {
explicit RecordInstantEvent(const char* name, explicit RecordInstantEvent(const char* name, TracerEventType type,
const EventRole role = EventRole::kOrdinary); uint32_t level = 1);
}; };
// CPU event tracing. A trace starts when an object of this clas is created and // CPU event tracing. A trace starts when an object of this clas is created and
...@@ -34,13 +35,15 @@ struct RecordInstantEvent { ...@@ -34,13 +35,15 @@ struct RecordInstantEvent {
class RecordEvent { class RecordEvent {
public: public:
explicit RecordEvent(const std::string& name, explicit RecordEvent(const std::string& name,
const EventRole role = EventRole::kOrdinary); const EventRole role = EventRole::kOrdinary,
uint32_t level = 1);
explicit RecordEvent(const char* name, explicit RecordEvent(const char* name,
const EventRole role = EventRole::kOrdinary); const EventRole role = EventRole::kOrdinary,
uint32_t level = 1);
RecordEvent(const std::string& name, const EventRole role, RecordEvent(const std::string& name, const EventRole role,
const std::string& attr); const std::string& attr, uint32_t level = 1);
// Stop event tracing explicitly before the object goes out of scope. // Stop event tracing explicitly before the object goes out of scope.
// Sometimes it's inconvenient to use RAII // Sometimes it's inconvenient to use RAII
......
...@@ -14,51 +14,17 @@ limitations under the License. */ ...@@ -14,51 +14,17 @@ limitations under the License. */
#pragma once #pragma once
#include <cstring>
#include <mutex>
#include <string> #include <string>
#include <type_traits> #include <type_traits>
#include <unordered_map>
#include <vector> #include <vector>
#include "paddle/fluid/platform/event.h" #include "paddle/fluid/framework/new_executor/workqueue/thread_data_registry.h"
#include "paddle/fluid/platform/macros.h"
#include "paddle/fluid/platform/os_info.h"
#include "paddle/fluid/platform/profiler/common_event.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
struct CommonEvent {
public:
CommonEvent(const char *name, uint64_t start_ns, uint64_t end_ns,
EventRole role)
: name(name), start_ns(start_ns), end_ns(end_ns), role(role) {}
CommonEvent(std::function<void *(size_t)> &arena_allocator,
const std::string &name_str, uint64_t start_ns, uint64_t end_ns,
EventRole role, const std::string &attr_str)
: start_ns(start_ns), end_ns(end_ns), role(role) {
auto buf = static_cast<char *>(arena_allocator(name_str.length() + 1));
strncpy(buf, name_str.c_str(), name_str.length() + 1);
name = buf;
buf = static_cast<char *>(arena_allocator(attr_str.length() + 1));
strncpy(buf, attr_str.c_str(), attr_str.length() + 1);
attr = buf;
}
CommonEvent(const std::function<void *(size_t)> &arena_allocator,
const std::string &name_str, uint64_t start_ns, uint64_t end_ns,
EventRole role)
: start_ns(start_ns), end_ns(end_ns), role(role) {
auto buf = static_cast<char *>(arena_allocator(name_str.length() + 1));
strncpy(buf, name_str.c_str(), name_str.length() + 1);
name = buf;
}
const char *name = nullptr; // not owned, designed for performance
uint64_t start_ns = 0;
uint64_t end_ns = 0;
EventRole role = EventRole::kOrdinary;
const char *attr = nullptr; // not owned, designed for performance
};
template <typename HeadType, typename... RestTypes> template <typename HeadType, typename... RestTypes>
struct ContainsStdString struct ContainsStdString
: std::conditional_t< : std::conditional_t<
...@@ -223,7 +189,8 @@ struct ThreadEventSection { ...@@ -223,7 +189,8 @@ struct ThreadEventSection {
class ThreadEventRecorder { class ThreadEventRecorder {
public: public:
ThreadEventRecorder(); ThreadEventRecorder() { thread_id_ = GetCurrentThreadSysId(); }
DISABLE_COPY_AND_ASSIGN(ThreadEventRecorder); DISABLE_COPY_AND_ASSIGN(ThreadEventRecorder);
public: public:
...@@ -261,34 +228,43 @@ class HostEventRecorder { ...@@ -261,34 +228,43 @@ class HostEventRecorder {
return instance; return instance;
} }
// thread-safe
// If your string argument has a longer lifetime than the Event, // If your string argument has a longer lifetime than the Event,
// use 'const char*'. e.g.: string literal, op name, etc. // use 'const char*'. e.g.: string literal, op name, etc.
// Do your best to avoid using 'std::string' as the argument type. // Do your best to avoid using 'std::string' as the argument type.
// It will cause deep-copy to harm performance. // It will cause deep-copy to harm performance.
template <typename... Args> template <typename... Args>
void RecordEvent(Args &&... args) { void RecordEvent(Args &&... args) {
GetThreadLocalRecorder().RecordEvent(std::forward<Args>(args)...); GetThreadLocalRecorder()->RecordEvent(std::forward<Args>(args)...);
} }
// thread-unsafe, make sure make sure there is no running tracing.
// Poor performance, call it at the ending // Poor performance, call it at the ending
HostEventSection GatherEvents(); HostEventSection GatherEvents() {
auto thr_recorders =
void RegisterThreadRecorder(uint64_t tid, ThreadEventRecorder *recorder) { ThreadEventRecorderRegistry::GetInstance().GetAllThreadDataByRef();
const std::lock_guard<std::mutex> guard(thread_recorders_lock_); HostEventSection host_sec;
thread_recorders_[tid] = recorder; host_sec.process_id = GetProcessId();
host_sec.thr_sections.reserve(thr_recorders.size());
for (auto &kv : thr_recorders) {
auto &thr_recorder = kv.second.get();
host_sec.thr_sections.emplace_back(
std::move(thr_recorder.GatherEvents()));
}
return host_sec;
} }
private: private:
using ThreadEventRecorderRegistry =
framework::ThreadDataRegistry<ThreadEventRecorder>;
HostEventRecorder() = default; HostEventRecorder() = default;
DISABLE_COPY_AND_ASSIGN(HostEventRecorder); DISABLE_COPY_AND_ASSIGN(HostEventRecorder);
ThreadEventRecorder &GetThreadLocalRecorder() { ThreadEventRecorder *GetThreadLocalRecorder() {
static thread_local ThreadEventRecorder tls_recorder; return ThreadEventRecorderRegistry::GetInstance()
return tls_recorder; .GetMutableCurrentThreadData();
} }
std::mutex thread_recorders_lock_;
std::unordered_map<uint64_t, ThreadEventRecorder *> thread_recorders_;
}; };
} // namespace platform } // namespace platform
......
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/profiler/host_tracer.h"
#include "glog/logging.h"
#include "paddle/fluid/platform/profiler/common_event.h"
#include "paddle/fluid/platform/profiler/host_event_recorder.h"
namespace paddle {
namespace platform {
namespace {
void ProcessHostEvents(const HostEventSection& host_events,
TraceEventCollector* collector) {
for (const auto& thr_sec : host_events.thr_sections) {
uint64_t tid = thr_sec.thread_id;
for (const auto& evt : thr_sec.events) {
HostTraceEvent event;
event.name = evt.name;
event.type = evt.type;
event.start_ns = evt.start_ns;
event.end_ns = evt.end_ns;
event.process_id = host_events.process_id;
event.thread_id = tid;
collector->AddHostEvent(std::move(event));
}
}
}
} // namespace
void HostTracer::StartTracing() {
PADDLE_ENFORCE_EQ(
state_ == TracerState::READY || state_ == TracerState::STOPED, true,
platform::errors::PreconditionNotMet("TracerState must be READY"));
HostEventRecorder::GetInstance().GatherEvents();
HostTraceLevel::GetInstance().SetLevel(trace_level_);
state_ = TracerState::STARTED;
}
void HostTracer::StopTracing() {
PADDLE_ENFORCE_EQ(
state_, TracerState::STARTED,
platform::errors::PreconditionNotMet("TracerState must be STARTED"));
HostTraceLevel::GetInstance().SetLevel(HostTraceLevel::kDisabled);
state_ = TracerState::STOPED;
}
void HostTracer::CollectTraceData(TraceEventCollector* collector) {
PADDLE_ENFORCE_EQ(
state_, TracerState::STOPED,
platform::errors::PreconditionNotMet("TracerState must be STOPED"));
HostEventSection host_events =
HostEventRecorder::GetInstance().GatherEvents();
ProcessHostEvents(host_events, collector);
}
} // namespace platform
} // namespace paddle
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. /* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
licensed under the Apache License, Version 2.0 (the "License"); licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
You may obtain a copy of the License at You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/platform/profiler/host_event_recorder.h" #pragma once
#include "paddle/fluid/platform/os_info.h"
#include "paddle/fluid/platform/profiler/tracer_base.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
ThreadEventRecorder::ThreadEventRecorder() { class HostTraceLevel {
thread_id_ = GetCurrentThreadSysId(); public:
HostEventRecorder::GetInstance().RegisterThreadRecorder(thread_id_, this); static constexpr int64_t kDisabled = -1;
}
HostEventSection HostEventRecorder::GatherEvents() { static HostTraceLevel& GetInstance() {
HostEventSection host_sec; static HostTraceLevel instance;
host_sec.thr_sections.reserve(thread_recorders_.size()); return instance;
for (auto &kv : thread_recorders_) {
host_sec.thr_sections.emplace_back(std::move(kv.second->GatherEvents()));
} }
return host_sec;
} bool NeedTrace(uint32_t level) {
return trace_level_ >= static_cast<int64_t>(level);
}
void SetLevel(int64_t trace_level) { trace_level_ = trace_level; }
private:
// Verbose trace level, works like VLOG(level)
int trace_level_ = kDisabled;
};
struct HostTracerOptions {
uint32_t trace_level = 0;
};
class HostTracer : public TracerBase {
public:
explicit HostTracer(const HostTracerOptions& options) {
trace_level_ = options.trace_level;
}
void StartTracing() override;
void StopTracing() override;
void CollectTraceData(TraceEventCollector* collector) override;
private:
uint32_t trace_level_;
};
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/profiler/profiler.h"
#include "glog/logging.h"
#ifdef PADDLE_WITH_CUDA
#include <cuda.h>
#endif
#ifdef PADDLE_WITH_HIP
#include <hip/hip_runtime.h>
#endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#endif
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/profiler/host_tracer.h"
namespace paddle {
namespace platform {
void SynchronizeAllDevice();
std::atomic<bool> Profiler::alive_{false};
std::unique_ptr<Profiler> Profiler::Create(const ProfilerOptions& options) {
if (alive_.exchange(true)) {
return nullptr;
}
return std::unique_ptr<Profiler>(new Profiler(options));
}
Profiler::Profiler(const ProfilerOptions& options) {
options_ = options;
HostTracerOptions host_tracer_options;
host_tracer_options.trace_level = options.trace_level;
tracers_.emplace_back(new HostTracer(host_tracer_options), true);
}
Profiler::~Profiler() { alive_.store(false); }
void Profiler::Prepare() {
for (auto& tracer : tracers_) {
tracer.Get().PrepareTracing();
}
}
void Profiler::Start() {
SynchronizeAllDevice();
for (auto& tracer : tracers_) {
tracer.Get().StartTracing();
}
}
TraceEventCollector Profiler::Stop() {
SynchronizeAllDevice();
TraceEventCollector collector;
for (auto& tracer : tracers_) {
tracer.Get().StopTracing();
tracer.Get().CollectTraceData(&collector);
}
return collector;
}
} // namespace platform
} // namespace paddle
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <atomic>
#include <cstdint>
#include <functional>
#include <list>
#include <memory>
#include "paddle/fluid/platform/macros.h"
#include "paddle/fluid/platform/profiler/trace_event_collector.h"
#include "paddle/fluid/platform/profiler/tracer_base.h"
namespace paddle {
namespace platform {
struct ProfilerOptions {
uint32_t trace_level = 0;
};
class Profiler {
public:
static std::unique_ptr<Profiler> Create(const ProfilerOptions& options);
void Prepare();
void Start();
TraceEventCollector Stop();
~Profiler();
private:
class TracerHolder {
public:
TracerHolder(TracerBase* tracer, bool owned)
: tracer(tracer), owned(owned) {}
~TracerHolder() {
if (owned) {
delete tracer;
}
}
TracerBase& Get() { return *tracer; }
private:
TracerBase* tracer;
bool owned;
};
explicit Profiler(const ProfilerOptions& options);
DISABLE_COPY_AND_ASSIGN(Profiler);
static std::atomic<bool> alive_;
ProfilerOptions options_;
uint64_t start_ns_ = UINT64_MAX;
std::list<TracerHolder> tracers_;
};
} // namespace platform
} // namespace paddle
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <set>
#include <string>
#include "glog/logging.h"
#include "gtest/gtest.h"
#ifdef PADDLE_WITH_CUDA
#include <cuda.h>
#endif
#ifdef PADDLE_WITH_HIP
#include <hip/hip_runtime.h>
#endif
#include "paddle/fluid/platform/profiler/event_tracing.h"
#include "paddle/fluid/platform/profiler/profiler.h"
TEST(ProfilerTest, TestHostTracer) {
using paddle::platform::ProfilerOptions;
using paddle::platform::Profiler;
using paddle::platform::RecordInstantEvent;
using paddle::platform::TracerEventType;
ProfilerOptions options;
options.trace_level = 2;
auto profiler = Profiler::Create(options);
EXPECT_TRUE(profiler);
profiler->Prepare();
profiler->Start();
{
RecordInstantEvent("TestTraceLevel_record1", TracerEventType::UserDefined,
2);
RecordInstantEvent("TestTraceLevel_record2", TracerEventType::UserDefined,
3);
}
auto collector = profiler->Stop();
std::set<std::string> host_events;
for (const auto evt : collector.HostEvents()) {
host_events.insert(evt.name);
}
EXPECT_EQ(host_events.count("TestTraceLevel_record1"), 1u);
EXPECT_EQ(host_events.count("TestTraceLevel_record2"), 0u);
}
...@@ -76,26 +76,28 @@ struct KernelEventInfo { ...@@ -76,26 +76,28 @@ struct KernelEventInfo {
uint64_t completed; uint64_t completed;
}; };
static constexpr size_t kMemKindMaxLen = 50;
struct MemcpyEventInfo { struct MemcpyEventInfo {
// The number of bytes transferred by the memory copy. // The number of bytes transferred by the memory copy.
uint64_t num_bytes; uint64_t num_bytes;
// The kind of the memory copy. // The kind of the memory copy.
// Each kind represents the source and destination targets of a memory copy. // Each kind represents the source and destination targets of a memory copy.
// Targets are host, device, and array. Refer to CUpti_ActivityMemcpyKind // Targets are host, device, and array. Refer to CUpti_ActivityMemcpyKind
std::string copy_kind; // std::string copy_kind;
// The source memory kind read by the memory copy. // The source memory kind read by the memory copy.
// Each kind represents the type of the memory accessed by a memory // Each kind represents the type of the memory accessed by a memory
// operation/copy. Refer to CUpti_ActivityMemoryKind // operation/copy. Refer to CUpti_ActivityMemoryKind
std::string src_kind; char src_kind[kMemKindMaxLen];
// The destination memory kind read by the memory copy. // The destination memory kind read by the memory copy.
std::string dst_kind; char dst_kind[kMemKindMaxLen];
}; };
struct MemsetEventInfo { struct MemsetEventInfo {
// The number of bytes being set by the memory set. // The number of bytes being set by the memory set.
uint64_t num_bytes; uint64_t num_bytes;
// The memory kind of the memory set. Refer to CUpti_ActivityMemoryKind // The memory kind of the memory set. Refer to CUpti_ActivityMemoryKind
std::string memory_kind; char memory_kind[kMemKindMaxLen];
// the value being assigned to memory by the memory set. // the value being assigned to memory by the memory set.
uint32_t value; uint32_t value;
}; };
......
...@@ -15,50 +15,37 @@ limitations under the License. */ ...@@ -15,50 +15,37 @@ limitations under the License. */
#pragma once #pragma once
#include <list> #include <list>
#include "paddle/fluid/platform/profiler/trace_event.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
struct HostRecord { class TraceEventCollector {
std::string name; public:
uint64_t start_ns; void AddHostEvent(HostTraceEvent&& event) { host_events_.push_back(event); }
uint64_t end_ns;
uint64_t process_id;
uint64_t thread_id;
};
struct RuntimeRecord { void AddRuntimeEvent(RuntimeTraceEvent&& event) {
std::string name; runtime_events_.push_back(event);
uint64_t start_ns; }
uint64_t end_ns;
uint64_t process_id;
uint64_t thread_id;
uint32_t correlation_id;
};
struct DeviceRecord { void AddDeviceEvent(DeviceTraceEvent&& event) {
std::string name; device_events_.push_back(event);
uint64_t start_ns; }
uint64_t end_ns;
uint32_t correlation_id;
};
class TraceEventCollector { const std::list<HostTraceEvent>& HostEvents() const { return host_events_; }
public:
void AddHostRecord(HostRecord&& record) { host_records_.push_back(record); }
void AddRuntimeRecord(RuntimeRecord&& record) { const std::list<RuntimeTraceEvent>& RuntimeEvents() const {
runtime_records_.push_back(record); return runtime_events_;
} }
void AddDeviceRecord(DeviceRecord&& record) { const std::list<DeviceTraceEvent>& DeviceEvents() const {
device_records_.push_back(record); return device_events_;
} }
private: private:
std::list<HostRecord> host_records_; std::list<HostTraceEvent> host_events_;
std::list<RuntimeRecord> runtime_records_; std::list<RuntimeTraceEvent> runtime_events_;
std::list<DeviceRecord> device_records_; std::list<DeviceTraceEvent> device_events_;
}; };
} // namespace platform } // namespace platform
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册