未验证 提交 5b6be4d7 编写于 作者: L liutiexing 提交者: GitHub

Adapt host event recorder to profiler (#37766)

* add align for WorkQueue

* add spinlock

* merge develop

* merge

* Add EventsWaiter

* Revert "Add EventsWaiter"

This reverts commit e206173aa9be7401b83a53581627bfaf557c8fb2.

* add os_info

* update

* update

* update

* update

* update

* update for bugfix

* update

* update

* update
Co-authored-by: Nliutiexing <liutiexing@google.com>
上级 dd3afc9d
......@@ -45,6 +45,7 @@ IF(WITH_XBYAK)
ENDIF()
cc_library(cpu_info SRCS cpu_info.cc DEPS ${CPU_INFO_DEPS})
cc_test(cpu_info_test SRCS cpu_info_test.cc DEPS cpu_info)
cc_library(os_info SRCS os_info.cc DEPS enforce device_tracer)
IF(WITH_GPU)
nv_library(cuda_graph_with_memory_pool SRCS cuda_graph_with_memory_pool.cc DEPS device_context allocator_facade cuda_graph)
......@@ -165,13 +166,13 @@ cc_test(lodtensor_printer_test SRCS lodtensor_printer_test.cc DEPS lodtensor_pri
cc_library(device_tracer SRCS device_tracer.cc DEPS boost profiler_proto framework_proto ${GPU_CTX_DEPS})
if(WITH_GPU)
nv_library(profiler SRCS profiler.cc profiler.cu DEPS device_tracer gpu_info enforce dynload_cuda)
nv_library(profiler SRCS profiler.cc profiler.cu DEPS os_info device_tracer gpu_info enforce dynload_cuda)
nv_library(device_memory_aligment SRCS device_memory_aligment.cc DEPS cpu_info gpu_info place)
elseif(WITH_ROCM)
hip_library(profiler SRCS profiler.cc profiler.cu DEPS device_tracer gpu_info enforce)
hip_library(profiler SRCS profiler.cc profiler.cu DEPS os_info device_tracer gpu_info enforce)
hip_library(device_memory_aligment SRCS device_memory_aligment.cc DEPS cpu_info gpu_info place)
else()
cc_library(profiler SRCS profiler.cc DEPS device_tracer enforce)
cc_library(profiler SRCS profiler.cc DEPS os_info device_tracer enforce)
cc_library(device_memory_aligment SRCS device_memory_aligment.cc DEPS cpu_info place)
endif()
......
......@@ -22,8 +22,14 @@ limitations under the License. */
#include "glog/logging.h"
#include "paddle/fluid/platform/device_tracer.h"
DECLARE_bool(enable_host_event_recorder_hook);
namespace paddle {
namespace platform {
// Used only by DeviceTracer
uint64_t GetThreadIdFromSystemThreadId(uint32_t id);
namespace {
// Tracking the nested block stacks of each thread.
#ifdef PADDLE_WITH_SW
......@@ -40,7 +46,8 @@ thread_local std::deque<Event *> annotation_stack;
static std::deque<Event *> main_thread_annotation_stack{};
static std::deque<std::string> main_thread_annotation_stack_name{};
std::map<uint32_t, int32_t> system_thread_id_map;
std::map<uint32_t, uint64_t> system_thread_id_map;
std::mutex system_thread_id_map_mutex;
std::once_flag tracer_once_flag;
DeviceTracer *tracer = nullptr;
......@@ -299,6 +306,47 @@ class DeviceTracerImpl : public DeviceTracer {
local_correlations_pairs->push_front(std::make_pair(id, event));
}
void AddAnnotations(const std::map<uint64_t, ThreadEvents> &thr_events) {
for (auto &tmp : active_kind_records_) {
for (const ActiveKindRecord &r : tmp) {
auto iter = thr_events.find(r.thread_id);
if (iter == thr_events.end()) {
VLOG(10) << __func__ << " " << r.name
<< " Missing tid: " << r.thread_id;
continue;
}
const ThreadEvents &evts = iter->second;
auto evt_iter = evts.upper_bound(r.end_ns);
if (evt_iter == evts.end()) {
VLOG(10) << __func__ << " Missing Record " << r.name
<< " tid: " << r.thread_id << " end_ns: " << r.end_ns;
continue;
}
if (evt_iter != evts.begin()) {
auto prev_iter = std::prev(evt_iter);
if (prev_iter->first >= r.end_ns) {
evt_iter = prev_iter;
} else {
VLOG(10) << __func__ << " prev end_ns " << prev_iter->first
<< " end_ns: " << r.end_ns;
}
}
Event *evt = evt_iter->second.first;
uint64_t start_ns = evt_iter->second.second;
if (start_ns > r.start_ns) {
VLOG(10) << __func__ << " Mismatch Record " << r.name
<< " tid: " << r.thread_id << " start_ns: " << r.start_ns
<< " end_ns: " << r.end_ns << ", event " << evt->name()
<< " start_ns: " << start_ns;
continue;
}
VLOG(10) << __func__ << " tid: " << r.thread_id << " Add correlation "
<< r.correlation_id << "<->" << evt->name();
AddAnnotation(r.correlation_id, evt);
}
}
}
void AddCPURecords(const std::string &anno, uint64_t start_ns,
uint64_t end_ns, int64_t device_id, int64_t thread_id) {
if (anno.empty()) {
......@@ -357,7 +405,7 @@ class DeviceTracerImpl : public DeviceTracer {
void AddActiveKindRecords(const std::string &anno, uint64_t start_ns,
uint64_t end_ns, int64_t device_id,
int64_t thread_id, uint32_t correlation_id) {
uint64_t thread_id, uint32_t correlation_id) {
if (anno.empty()) {
VLOG(1) << "Empty timeline annotation.";
return;
......@@ -524,7 +572,7 @@ class DeviceTracerImpl : public DeviceTracer {
event->set_detail_info(c->second->attr());
find++;
} else {
VLOG(10) << "Missing Kernel Event: " + r.name;
VLOG(10) << __func__ << " Missing Kernel Event: " + r.name;
miss++;
event->set_name(r.name);
}
......@@ -533,7 +581,8 @@ class DeviceTracerImpl : public DeviceTracer {
event->set_sub_device_id(r.stream_id);
event->set_device_id(r.device_id);
}
VLOG(1) << "KernelRecord event miss: " << miss << " find: " << find;
VLOG(1) << __func__ << " KernelRecord event miss: " << miss
<< " find: " << find;
for (auto &tmp : cpu_records_) {
for (const CPURecord &r : tmp) {
......@@ -583,7 +632,8 @@ class DeviceTracerImpl : public DeviceTracer {
event->set_device_id(r.device_id);
event->mutable_memcopy()->set_bytes(r.bytes);
}
VLOG(1) << "MemRecord event miss: " << miss << " find: " << find;
VLOG(1) << __func__ << " MemRecord event miss: " << miss
<< " find: " << find;
for (auto &tmp : mem_info_record_) {
for (const auto &r : tmp) {
......@@ -633,6 +683,9 @@ class DeviceTracerImpl : public DeviceTracer {
#ifdef PADDLE_WITH_CUPTI
static void CUPTIAPI ApiCallback(void *userdata, CUpti_CallbackDomain domain,
CUpti_CallbackId cbid, const void *cbdata) {
if (LIKELY(FLAGS_enable_host_event_recorder_hook)) {
return;
}
auto *cbInfo = reinterpret_cast<const CUpti_CallbackData *>(cbdata);
DeviceTracerImpl *tracer = reinterpret_cast<DeviceTracerImpl *>(userdata);
if (cbInfo->callbackSite == CUPTI_API_ENTER) {
......@@ -712,6 +765,7 @@ Event *CurAnnotation() {
if (annotation_stack.empty()) return nullptr;
return annotation_stack.back();
}
std::string CurAnnotationName() {
if (annotation_stack.empty()) return "Unknown";
return annotation_stack.back()->name();
......@@ -730,13 +784,13 @@ uint32_t GetCurSystemThreadId() {
return id;
}
void RecoreCurThreadId(int32_t id) {
void RecoreCurThreadId(uint64_t id) {
std::lock_guard<std::mutex> lock(system_thread_id_map_mutex);
auto gid = GetCurSystemThreadId();
VLOG(1) << "RecoreCurThreadId: " << gid << " -> " << id;
system_thread_id_map[gid] = id;
}
int32_t GetThreadIdFromSystemThreadId(uint32_t id) {
uint64_t GetThreadIdFromSystemThreadId(uint32_t id) {
auto it = system_thread_id_map.find(id);
if (it != system_thread_id_map.end()) return it->second;
// return origin id if no event is recorded in this thread.
......
......@@ -18,8 +18,8 @@ limitations under the License. */
#include "paddle/fluid/platform/dynload/cupti.h"
#include "paddle/fluid/platform/event.h"
#include "paddle/fluid/platform/os_info.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/platform/profiler.pb.h"
namespace paddle {
......@@ -30,12 +30,6 @@ namespace platform {
//////////////////////
class Event;
inline uint64_t PosixInNsec() {
struct timeval tv;
gettimeofday(&tv, nullptr);
return 1000 * (static_cast<uint64_t>(tv.tv_sec) * 1000000 + tv.tv_usec);
}
// DeviceTracer performs the following tasks:
// 1. Register cuda callbacks for various events: kernel, memcpy, etc.
// 2. Collect cuda statistics: start/end ts, memory, etc.
......@@ -84,7 +78,7 @@ class DeviceTracer {
uint64_t start_ns;
uint64_t end_ns;
int64_t device_id;
int64_t thread_id;
uint64_t thread_id;
uint32_t correlation_id;
};
......@@ -101,6 +95,9 @@ class DeviceTracer {
// human-readable annotations.
virtual void AddAnnotation(uint32_t id, Event* event) = 0;
virtual void AddAnnotations(
const std::map<uint64_t, ThreadEvents>& thr_events) = 0;
virtual void AddMemRecords(const std::string& name, uint64_t start_ns,
uint64_t end_ns, int64_t device_id,
int64_t stream_id, uint32_t correlation_id,
......@@ -111,7 +108,7 @@ class DeviceTracer {
int64_t thread_id) = 0;
virtual void AddActiveKindRecords(const std::string& anno, uint64_t start_ns,
uint64_t end_ns, int64_t device_id,
int64_t thread_id,
uint64_t thread_id,
uint32_t correlation_id) = 0;
virtual void AddMemInfoRecord(uint64_t start_ns, uint64_t end_ns,
......@@ -154,7 +151,6 @@ void ClearCurBlock();
int BlockDepth();
// Set current thread id, so we can map the system thread id to thread id.
void RecoreCurThreadId(int32_t id);
int32_t GetThreadIdFromSystemThreadId(uint32_t id);
void RecoreCurThreadId(uint64_t id);
} // namespace platform
} // namespace paddle
......@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once
#include <map>
#include <string>
#include <utility>
#ifdef PADDLE_WITH_CUDA
#include <cuda_runtime.h>
#endif
......@@ -48,7 +50,7 @@ class Event {
void set_parent(Event* parent) { parent_ = parent; }
std::string name() const { return name_; }
EventRole role() const { return role_; }
uint32_t thread_id() const { return thread_id_; }
uint64_t thread_id() const { return thread_id_; }
void set_name(std::string name) { name_ = name; }
void set_role(EventRole role) { role_ = role; }
std::string attr() const { return attr_; }
......@@ -66,7 +68,7 @@ class Event {
EventType type_;
std::string name_{};
Event* parent_{nullptr};
uint32_t thread_id_;
uint64_t thread_id_;
EventRole role_{};
int64_t cpu_ns_;
bool visited_status_{false};
......@@ -88,6 +90,9 @@ class Event {
#endif
};
using EventWithStartNs = std::pair<Event*, uint64_t>;
using ThreadEvents = std::map<uint64_t, EventWithStartNs>;
class MemEvent {
public:
MemEvent(EventType type, uint64_t start_ns, uint64_t end_ns, size_t bytes,
......@@ -105,7 +110,7 @@ class MemEvent {
uint64_t end_ns() const { return end_ns_; }
size_t bytes() const { return bytes_; }
Place place() const { return place_; }
int64_t thread_id() const { return thread_id_; }
uint64_t thread_id() const { return thread_id_; }
const std::string& annotation() const { return annotation_; }
private:
......@@ -114,7 +119,7 @@ class MemEvent {
uint64_t end_ns_ = 0;
size_t bytes_;
Place place_;
int64_t thread_id_;
uint64_t thread_id_;
std::string annotation_;
};
......
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/os_info.h"
#include <sstream>
#include "paddle/fluid/platform/device_tracer.h"
namespace paddle {
namespace platform {
ThreadId::ThreadId() {
std_tid_ = std::hash<std::thread::id>()(std::this_thread::get_id());
std::stringstream ss;
ss << std::this_thread::get_id();
cupti_tid_ = static_cast<uint32_t>(std::stoull(ss.str()));
RecoreCurThreadId(MainTid()); // For DeviceTracer
}
ThreadIdRegistry::~ThreadIdRegistry() {
std::lock_guard<std::mutex> lock(lock_);
for (auto id_pair : id_map_) {
delete id_pair.second;
}
}
} // namespace platform
} // namespace paddle
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <mutex>
#include <thread>
#include <unordered_map>
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/macros.h"
#include "paddle/fluid/platform/port.h"
#ifdef _POSIX_C_SOURCE
#include <time.h>
#endif
namespace paddle {
namespace platform {
// Get current time in nanoseconds
inline uint64_t PosixInNsec() {
#ifdef _POSIX_C_SOURCE
struct timespec tp;
clock_gettime(CLOCK_REALTIME, &tp);
return tp.tv_sec * 1000 * 1000 * 1000 + tp.tv_nsec;
#else
struct timeval tv;
gettimeofday(&tv, nullptr);
return 1000 * (static_cast<uint64_t>(tv.tv_sec) * 1000000 + tv.tv_usec);
#endif
}
// All kinds of Ids for OS thread
class ThreadId {
public:
ThreadId();
uint64_t MainTid() const { return std_tid_; }
uint64_t StdTid() const { return std_tid_; }
uint32_t CuptiTid() const { return cupti_tid_; }
uint64_t SysTid() const { return sys_tid_; }
private:
uint64_t std_tid_ = 0; // std::hash<std::thread::id>
uint32_t cupti_tid_ = 0; // thread_id used by Nvidia CUPTI
uint64_t sys_tid_ = 0; // OS-specific, Linux: gettid
};
class ThreadIdRegistry {
public:
// singleton
static ThreadIdRegistry& GetInstance() {
static ThreadIdRegistry instance;
return instance;
}
const ThreadId* GetThreadId(uint64_t std_id) {
std::lock_guard<std::mutex> lock(lock_);
if (LIKELY(id_map_.find(std_id) != id_map_.end())) {
return id_map_[std_id];
}
return nullptr;
}
const ThreadId& CurrentThreadId() {
static thread_local ThreadId* tid_ = nullptr;
if (LIKELY(tid_ != nullptr)) {
return *tid_;
}
tid_ = new ThreadId;
std::lock_guard<std::mutex> lock(lock_);
id_map_[tid_->StdTid()] = tid_;
return *tid_;
}
private:
ThreadIdRegistry() = default;
DISABLE_COPY_AND_ASSIGN(ThreadIdRegistry);
~ThreadIdRegistry();
std::mutex lock_;
std::unordered_map<uint64_t, ThreadId*> id_map_;
};
} // namespace platform
} // namespace paddle
......@@ -25,10 +25,14 @@ limitations under the License. */
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/dynload/nvtx.h"
#endif
#include "paddle/fluid/platform/os_info.h"
PADDLE_DEFINE_EXPORTED_bool(enable_rpc_profiler, false,
"Enable rpc profiler or not.");
DEFINE_bool(enable_host_event_recorder_hook, false,
"enable HostEventRecorder, hook Profiler");
namespace paddle {
namespace platform {
......@@ -298,12 +302,8 @@ class HostEventRecorder {
std::unordered_map<uint64_t, ThreadEventRecorder *> thread_recorders_;
};
static uint64_t GetThreadId() {
return std::hash<std::thread::id>{}(std::this_thread::get_id());
}
ThreadEventRecorder::ThreadEventRecorder() {
thread_id_ = GetThreadId();
thread_id_ = ThreadIdRegistry::GetInstance().CurrentThreadId().MainTid();
HostEventRecorder::GetInstance().RegisterThreadRecorder(thread_id_, this);
}
......@@ -352,7 +352,7 @@ RecordEvent::RecordEvent(const char *name, const EventRole role) {
}
#endif
#endif
if (UNLIKELY(g_enable_host_event_recorder_hook == false)) {
if (UNLIKELY(FLAGS_enable_host_event_recorder_hook == false)) {
OriginalConstruct(name, role, "none");
return;
}
......@@ -370,7 +370,7 @@ RecordEvent::RecordEvent(const std::string &name, const EventRole role) {
}
#endif
#endif
if (UNLIKELY(g_enable_host_event_recorder_hook == false)) {
if (UNLIKELY(FLAGS_enable_host_event_recorder_hook == false)) {
OriginalConstruct(name, role, "none");
return;
}
......@@ -389,7 +389,7 @@ RecordEvent::RecordEvent(const std::string &name, const EventRole role,
}
#endif
#endif
if (UNLIKELY(g_enable_host_event_recorder_hook == false)) {
if (UNLIKELY(FLAGS_enable_host_event_recorder_hook == false)) {
OriginalConstruct(name, role, attr);
return;
}
......@@ -425,7 +425,7 @@ RecordEvent::~RecordEvent() {
#endif
#endif
uint64_t end_ns = PosixInNsec();
if (LIKELY(g_enable_host_event_recorder_hook)) {
if (LIKELY(FLAGS_enable_host_event_recorder_hook)) {
if (LIKELY(shallow_copy_name_ != nullptr)) {
HostEventRecorder::GetInstance().RecordEvent(shallow_copy_name_,
start_ns_, end_ns, role_);
......@@ -546,6 +546,11 @@ void PopMemEvent(uint64_t start_ns, uint64_t end_ns, size_t bytes,
}
void Mark(const std::string &name) {
if (FLAGS_enable_host_event_recorder_hook) {
HostEventRecorder::GetInstance().RecordEvent(name, 0, 0,
EventRole::kOrdinary);
return;
}
GetEventList().Record(EventType::kMark, name, g_thread_id);
}
......@@ -598,9 +603,14 @@ void ResetProfiler() {
}
}
static std::map<uint64_t, ThreadEvents> DockHostEventRecorderHostPart();
static void DockHostEventRecorderDevicePart(
const std::map<uint64_t, ThreadEvents> &thr_events);
void DisableProfiler(EventSortingKey sorted_key,
const std::string &profile_path) {
SynchronizeAllDevice();
auto thr_events = DockHostEventRecorderHostPart();
MemEvenRecorder::Instance().Flush();
std::lock_guard<std::mutex> l(profiler_mu);
......@@ -612,6 +622,7 @@ void DisableProfiler(EventSortingKey sorted_key,
DeviceTracer *tracer = GetDeviceTracer();
if (tracer->IsEnabled()) {
tracer->Disable();
DockHostEventRecorderDevicePart(thr_events);
tracer->GenEventKernelCudaElapsedTime();
tracer->GenProfile(profile_path);
}
......@@ -634,6 +645,7 @@ void CompleteProfilerEvents(proto::Profile *tracer_profile,
std::vector<std::vector<Event>> *time_events,
std::vector<std::vector<MemEvent>> *mem_events) {
SynchronizeAllDevice();
auto thr_events = DockHostEventRecorderHostPart();
MemEvenRecorder::Instance().Flush();
std::lock_guard<std::mutex> l(profiler_mu);
......@@ -645,6 +657,7 @@ void CompleteProfilerEvents(proto::Profile *tracer_profile,
DeviceTracer *tracer = GetDeviceTracer();
if (tracer->IsEnabled() && tracer_profile != nullptr) {
tracer->Disable();
DockHostEventRecorderDevicePart(thr_events);
tracer->GenEventKernelCudaElapsedTime();
*tracer_profile = tracer->GetProfile();
}
......@@ -719,7 +732,7 @@ void NvprofEnableRecordEvent() {
void NvprofDisableRecordEvent() { g_enable_nvprof_hook = false; }
void EnableHostEventRecorder() { g_enable_host_event_recorder_hook = true; }
void EnableHostEventRecorder() { FLAGS_enable_host_event_recorder_hook = true; }
std::string PrintHostEvents() {
std::ostringstream oss;
......@@ -734,5 +747,95 @@ std::string PrintHostEvents() {
return oss.str();
}
static void EmulateEventPushAndPop(const HostEventSection &host_sec,
std::map<uint64_t, ThreadEvents> *out) {
for (const auto &thr_sec : host_sec.thr_sections) {
uint64_t tid = thr_sec.thread_id;
auto cur_thr_list = std::make_shared<EventList<Event>>();
g_all_event_lists.emplace_front(cur_thr_list);
// for nesting events
std::stack<size_t> evt_stk;
std::stack<std::string> prefix_stk;
std::map<uint64_t, size_t> start2evt;
for (size_t i = 0; i < thr_sec.events.size(); ++i) {
const auto &evt = thr_sec.events[i];
start2evt[evt.start_ns] = i;
}
auto iter = start2evt.begin();
// loop events
for (size_t i = 0; i < thr_sec.events.size(); ++i) {
const auto &thr_evts = thr_sec.events;
const auto &evt = thr_evts[i];
// For nesting events
while (!evt_stk.empty() && thr_evts[evt_stk.top()].end_ns <= evt.end_ns) {
evt_stk.pop();
prefix_stk.pop();
}
while (iter != start2evt.end() &&
thr_evts[iter->second].start_ns < evt.start_ns) {
if (thr_evts[iter->second].end_ns > evt.start_ns) {
evt_stk.push(iter->second);
std::string prefix = thr_evts[iter->second].name;
if (!prefix_stk.empty()) {
prefix = prefix_stk.top() + "/" + prefix;
}
prefix_stk.push(prefix);
}
++iter;
}
// Record orig event pair
std::string name =
prefix_stk.empty() ? evt.name : prefix_stk.top() + "/" + evt.name;
const char *attr = (evt.attr == nullptr ? "none" : evt.attr);
Event *orig_evt = cur_thr_list->Record(EventType::kPushRange, name, tid,
evt.role, attr);
(*out)[tid][evt.end_ns] = std::make_pair(orig_evt, evt.start_ns);
cur_thr_list->Record(EventType::kPopRange, name, tid, evt.role, attr);
}
}
}
static void EmulateCPURecordsAdd(const HostEventSection &host_sec) {
DeviceTracer *tracer = GetDeviceTracer();
if (tracer == nullptr) {
return;
}
for (const auto &thr_sec : host_sec.thr_sections) {
uint64_t tid = thr_sec.thread_id;
for (const auto &evt : thr_sec.events) {
tracer->AddCPURecords(evt.name, evt.start_ns, evt.end_ns, BlockDepth(),
tid);
}
}
}
static void EmulateCorrelation(
const std::map<uint64_t, ThreadEvents> &thr_events) {
DeviceTracer *tracer = GetDeviceTracer();
if (tracer == nullptr) {
return;
}
tracer->AddAnnotations(thr_events);
}
static std::map<uint64_t, ThreadEvents> DockHostEventRecorderHostPart() {
std::map<uint64_t, ThreadEvents> thr_events;
if (FLAGS_enable_host_event_recorder_hook == false) {
return thr_events;
}
auto host_evt_sec = HostEventRecorder::GetInstance().GatherEvents();
EmulateEventPushAndPop(host_evt_sec, &thr_events);
EmulateCPURecordsAdd(host_evt_sec);
return std::move(thr_events);
}
static void DockHostEventRecorderDevicePart(
const std::map<uint64_t, ThreadEvents> &thr_events) {
if (FLAGS_enable_host_event_recorder_hook == false) {
return;
}
EmulateCorrelation(thr_events);
}
} // namespace platform
} // namespace paddle
......@@ -47,11 +47,9 @@ static TracerOption g_tracer_option = TracerOption::kDefault;
static ProfilerState g_state = ProfilerState::kDisabled;
// To hook RecordEvent's events, use it to nvtx timeline
static bool g_enable_nvprof_hook = false;
// To hook RecordEvent, use HostEventRecorder
static bool g_enable_host_event_recorder_hook = false;
// The thread local event list only can be accessed by the specific thread
// The thread index of each thread
static thread_local int32_t g_thread_id;
static thread_local uint64_t g_thread_id;
// The g_next_thread_id is a global counter for threads, by the g_thread_id and
// g_next_thread_id, we can know how many threads have created EventList.
static uint32_t g_next_thread_id = 0;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册