profiler.cc 19.6 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
D
dangqingqing 已提交
2 3 4 5

licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
6

D
dangqingqing 已提交
7 8 9 10 11 12 13 14
    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

15
#include <mutex>  // NOLINT
16
#include <random>
L
liutiexing 已提交
17
#include <sstream>
18
#include <string>
L
liutiexing 已提交
19
#include <type_traits>
Y
Yancey1989 已提交
20

21
#include "paddle/fluid/platform/device_tracer.h"
W
wangchaochaohu 已提交
22 23
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/profiler.h"
24
#include "paddle/fluid/platform/profiler/common_event.h"
L
liutiexing 已提交
25
#include "paddle/fluid/platform/profiler/host_event_recorder.h"
26
#include "paddle/fluid/platform/profiler/host_tracer.h"
C
chenjian 已提交
27
#include "paddle/fluid/platform/profiler/profiler.h"
W
wangchaochaohu 已提交
28
#include "paddle/fluid/platform/profiler_helper.h"
29 30 31
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/dynload/nvtx.h"
#endif
32
#include "paddle/fluid/platform/os_info.h"
D
dangqingqing 已提交
33

Z
Zeng Jinle 已提交
34 35
PADDLE_DEFINE_EXPORTED_bool(enable_rpc_profiler, false,
                            "Enable rpc profiler or not.");
G
gongweibao 已提交
36

37 38 39
DEFINE_bool(enable_host_event_recorder_hook, false,
            "enable HostEventRecorder, hook Profiler");

D
dangqingqing 已提交
40 41 42
namespace paddle {
namespace platform {

W
wangchaochaohu 已提交
43
MemEvenRecorder MemEvenRecorder::recorder;
D
dangqingqing 已提交
44

45
Event::Event(EventType type, std::string name, uint32_t thread_id,
Y
Yuang Liu 已提交
46 47 48 49 50 51
             EventRole role, std::string attr)
    : type_(type),
      name_(name),
      thread_id_(thread_id),
      role_(role),
      attr_(attr) {
D
dangqingqing 已提交
52 53 54
  cpu_ns_ = GetTimeInNsec();
}

C
chengduo 已提交
55
const EventType &Event::type() const { return type_; }
D
dangqingqing 已提交
56

C
chengduo 已提交
57
double Event::CpuElapsedMs(const Event &e) const {
58
  return (e.cpu_ns_ - cpu_ns_) / (1000000.0);
D
dangqingqing 已提交
59 60
}

C
chengduo 已提交
61
double Event::CudaElapsedMs(const Event &e) const {
62 63
#ifdef PADDLE_WITH_CUPTI
  return gpu_ns_ / 1000000.0;
D
Dun Liang 已提交
64
#else
D
Dun Liang 已提交
65 66
  LOG_FIRST_N(WARNING, 1) << "CUDA CUPTI is not enabled";
  return 0;
D
dangqingqing 已提交
67 68 69
#endif
}

C
chenjian 已提交
70 71
RecordEvent::RecordEvent(const char *name, const TracerEventType type,
                         uint32_t level, const EventRole role) {
L
liutiexing 已提交
72 73 74 75 76 77 78 79
#ifndef _WIN32
#ifdef PADDLE_WITH_CUDA
  if (g_enable_nvprof_hook) {
    dynload::nvtxRangePushA(name);
    is_pushed_ = true;
  }
#endif
#endif
C
chenjian 已提交
80 81 82 83
  if (UNLIKELY(HostTraceLevel::GetInstance().NeedTrace(level) == false)) {
    return;
  }

84
  if (FLAGS_enable_host_event_recorder_hook == false) {
L
liutiexing 已提交
85
    if (g_state != ProfilerState::kDisabled) {  // avoid temp string
C
chenjian 已提交
86 87 88 89 90
      if (type == TracerEventType::Operator ||
          type == TracerEventType::OperatorInner ||
          type == TracerEventType::UserDefined) {
        OriginalConstruct(name, role, "none");
      }
L
liutiexing 已提交
91
    }
L
liutiexing 已提交
92 93
    return;
  }
C
chenjian 已提交
94

95
  is_enabled_ = true;
L
liutiexing 已提交
96 97
  shallow_copy_name_ = name;
  role_ = role;
C
chenjian 已提交
98
  type_ = type;
L
liutiexing 已提交
99 100 101
  start_ns_ = PosixInNsec();
}

C
chenjian 已提交
102 103
RecordEvent::RecordEvent(const std::string &name, const TracerEventType type,
                         uint32_t level, const EventRole role) {
L
liutiexing 已提交
104 105 106 107 108 109 110 111
#ifndef _WIN32
#ifdef PADDLE_WITH_CUDA
  if (g_enable_nvprof_hook) {
    dynload::nvtxRangePushA(name.c_str());
    is_pushed_ = true;
  }
#endif
#endif
C
chenjian 已提交
112
  if (UNLIKELY(HostTraceLevel::GetInstance().NeedTrace(level) == false)) {
L
liutiexing 已提交
113 114
    return;
  }
C
chenjian 已提交
115 116 117 118 119 120 121

  if (FLAGS_enable_host_event_recorder_hook == false) {
    if (type == TracerEventType::Operator ||
        type == TracerEventType::OperatorInner ||
        type == TracerEventType::UserDefined) {
      OriginalConstruct(name, role, "none");
    }
122 123
    return;
  }
C
chenjian 已提交
124

125
  is_enabled_ = true;
L
liutiexing 已提交
126 127
  name_ = new std::string(name);
  role_ = role;
C
chenjian 已提交
128
  type_ = type;
L
liutiexing 已提交
129 130 131
  start_ns_ = PosixInNsec();
}

C
chenjian 已提交
132 133 134
RecordEvent::RecordEvent(const std::string &name, const std::string &attr,
                         const TracerEventType type, uint32_t level,
                         const EventRole role) {
135 136 137 138 139 140 141 142
#ifndef _WIN32
#ifdef PADDLE_WITH_CUDA
  if (g_enable_nvprof_hook) {
    dynload::nvtxRangePushA(name.c_str());
    is_pushed_ = true;
  }
#endif
#endif
C
chenjian 已提交
143 144

  if (UNLIKELY(HostTraceLevel::GetInstance().NeedTrace(level) == false)) {
L
liutiexing 已提交
145 146
    return;
  }
C
chenjian 已提交
147 148 149 150 151 152 153

  if (FLAGS_enable_host_event_recorder_hook == false) {
    if (type == TracerEventType::Operator ||
        type == TracerEventType::OperatorInner ||
        type == TracerEventType::UserDefined) {
      OriginalConstruct(name, role, attr);
    }
154 155
    return;
  }
C
chenjian 已提交
156

157
  is_enabled_ = true;
C
chenjian 已提交
158
  type_ = type;
L
liutiexing 已提交
159 160 161 162
  name_ = new std::string(name);
  start_ns_ = PosixInNsec();
  attr_ = new std::string(attr);
}
L
liutiexing 已提交
163

L
liutiexing 已提交
164 165 166
void RecordEvent::OriginalConstruct(const std::string &name,
                                    const EventRole role,
                                    const std::string &attr) {
167
  if (g_state == ProfilerState::kDisabled || name.empty()) return;
168 169

  // do some initialization
L
liutiexing 已提交
170
  name_ = new std::string(name);
171 172
  start_ns_ = PosixInNsec();
  role_ = role;
L
liutiexing 已提交
173
  attr_ = new std::string(attr);
X
Xin Pan 已提交
174
  is_enabled_ = true;
175
  // lock is not needed, the code below is thread-safe
176
  // Maybe need the same push/pop behavior.
Y
Yuang Liu 已提交
177
  Event *e = PushEvent(name, role, attr);
178
  SetCurAnnotation(e);
L
liutiexing 已提交
179
  *name_ = e->name();
D
dangqingqing 已提交
180 181
}

L
liutiexing 已提交
182
void RecordEvent::End() {
183 184
#ifndef _WIN32
#ifdef PADDLE_WITH_CUDA
185
  if (g_enable_nvprof_hook && is_pushed_) {
186
    dynload::nvtxRangePop();
187
    is_pushed_ = false;
188 189 190
  }
#endif
#endif
191
  if (LIKELY(FLAGS_enable_host_event_recorder_hook && is_enabled_)) {
L
liutiexing 已提交
192
    uint64_t end_ns = PosixInNsec();
L
liutiexing 已提交
193
    if (LIKELY(shallow_copy_name_ != nullptr)) {
C
chenjian 已提交
194 195
      HostEventRecorder::GetInstance().RecordEvent(
          shallow_copy_name_, start_ns_, end_ns, role_, type_);
L
liutiexing 已提交
196 197
    } else if (name_ != nullptr) {
      if (attr_ == nullptr) {
C
chenjian 已提交
198 199
        HostEventRecorder::GetInstance().RecordEvent(*name_, start_ns_, end_ns,
                                                     role_, type_);
L
liutiexing 已提交
200
      } else {
C
chenjian 已提交
201 202
        HostEventRecorder::GetInstance().RecordEvent(*name_, start_ns_, end_ns,
                                                     role_, type_, *attr_);
L
liutiexing 已提交
203
        delete attr_;
L
liutiexing 已提交
204
      }
L
liutiexing 已提交
205
      delete name_;
L
liutiexing 已提交
206
    }
207 208
    // use this flag to avoid double End();
    is_enabled_ = false;
L
liutiexing 已提交
209 210 211
    return;
  }

X
Xin Pan 已提交
212
  if (g_state == ProfilerState::kDisabled || !is_enabled_) return;
213
  // lock is not needed, the code below is thread-safe
C
chengduo 已提交
214
  DeviceTracer *tracer = GetDeviceTracer();
X
Xin Pan 已提交
215
  if (tracer) {
L
liutiexing 已提交
216
    uint64_t end_ns = PosixInNsec();
L
liutiexing 已提交
217 218
    tracer->AddCPURecords(CurAnnotationName(), start_ns_, end_ns, BlockDepth(),
                          g_thread_id);
X
Xin Pan 已提交
219
  }
Y
Yibing Liu 已提交
220
  ClearCurAnnotation();
L
liutiexing 已提交
221 222 223
  PopEvent(*name_, role_);
  delete name_;
  delete attr_;
224 225
  // use this flag to avoid double End();
  is_enabled_ = false;
D
dangqingqing 已提交
226
}
D
dangqingqing 已提交
227

228 229 230
RecordInstantEvent::RecordInstantEvent(const char *name, TracerEventType type,
                                       uint32_t level) {
  if (UNLIKELY(HostTraceLevel::GetInstance().NeedTrace(level) == false)) {
L
liutiexing 已提交
231 232 233 234
    return;
  }
  auto start_end_ns = PosixInNsec();
  HostEventRecorder::GetInstance().RecordEvent(name, start_end_ns, start_end_ns,
235
                                               EventRole::kOrdinary, type);
L
liutiexing 已提交
236 237
}

C
chengduo 已提交
238 239 240 241 242
void MemEvenRecorder::PushMemRecord(const void *ptr, const Place &place,
                                    size_t size) {
  if (g_state == ProfilerState::kDisabled) return;
  std::lock_guard<std::mutex> guard(mtx_);
  auto &events = address_memevent_[place];
G
GaoWei8 已提交
243 244 245
  PADDLE_ENFORCE_EQ(events.count(ptr), 0,
                    platform::errors::InvalidArgument(
                        "The Place can't exist in the stage of PushMemRecord"));
C
chengduo 已提交
246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
  events.emplace(ptr, std::unique_ptr<RecordMemEvent>(
                          new MemEvenRecorder::RecordMemEvent(place, size)));
}

void MemEvenRecorder::PopMemRecord(const void *ptr, const Place &place) {
  if (g_state == ProfilerState::kDisabled) return;
  std::lock_guard<std::mutex> guard(mtx_);
  auto &events = address_memevent_[place];
  auto iter = events.find(ptr);
  // The ptr maybe not in address_memevent
  if (iter != events.end()) {
    events.erase(iter);
  }
}

void MemEvenRecorder::Flush() {
  std::lock_guard<std::mutex> guard(mtx_);
  address_memevent_.clear();
}

MemEvenRecorder::RecordMemEvent::RecordMemEvent(const Place &place,
                                                size_t bytes)
    : place_(place),
      bytes_(bytes),
      start_ns_(PosixInNsec()),
      alloc_in_(CurAnnotationName()) {
  PushMemEvent(start_ns_, end_ns_, bytes_, place_, alloc_in_);
}

MemEvenRecorder::RecordMemEvent::~RecordMemEvent() {
  DeviceTracer *tracer = GetDeviceTracer();
  end_ns_ = PosixInNsec();

  auto annotation_free = CurAnnotationName();
  if (tracer) {
    tracer->AddMemInfoRecord(start_ns_, end_ns_, bytes_, place_, alloc_in_,
                             annotation_free, g_mem_thread_id);
  }
  PopMemEvent(start_ns_, end_ns_, bytes_, place_, annotation_free);
}

L
liutiexing 已提交
287
/*RecordRPCEvent::RecordRPCEvent(const std::string &name) {
G
gongweibao 已提交
288
  if (FLAGS_enable_rpc_profiler) {
289
    event_.reset(new platform::RecordEvent(name));
G
gongweibao 已提交
290
  }
L
liutiexing 已提交
291
}*/
G
gongweibao 已提交
292

X
Xin Pan 已提交
293 294
RecordBlock::RecordBlock(int block_id)
    : is_enabled_(false), start_ns_(PosixInNsec()) {
295
  // lock is not needed, the code below is thread-safe
X
Xin Pan 已提交
296
  if (g_state == ProfilerState::kDisabled) return;
X
Xin Pan 已提交
297
  is_enabled_ = true;
X
Xin Pan 已提交
298 299 300 301 302
  SetCurBlock(block_id);
  name_ = string::Sprintf("block_%d", block_id);
}

RecordBlock::~RecordBlock() {
303
  // lock is not needed, the code below is thread-safe
X
Xin Pan 已提交
304
  if (g_state == ProfilerState::kDisabled || !is_enabled_) return;
C
chengduo 已提交
305
  DeviceTracer *tracer = GetDeviceTracer();
X
Xin Pan 已提交
306 307 308 309
  if (tracer) {
    // We try to put all blocks at the same nested depth in the
    // same timeline lane. and distinguish the using thread_id.
    tracer->AddCPURecords(name_, start_ns_, PosixInNsec(), BlockDepth(),
310
                          g_thread_id);
X
Xin Pan 已提交
311 312 313 314
  }
  ClearCurBlock();
}

W
wangchaochaohu 已提交
315 316 317 318 319 320 321 322 323 324 325 326 327
void PushMemEvent(uint64_t start_ns, uint64_t end_ns, size_t bytes,
                  const Place &place, const std::string &annotation) {
  GetMemEventList().Record(EventType::kPushRange, start_ns, end_ns, bytes,
                           place, g_mem_thread_id, annotation);
}

void PopMemEvent(uint64_t start_ns, uint64_t end_ns, size_t bytes,
                 const Place &place, const std::string &annotation) {
  GetMemEventList().Record(EventType::kPopRange, start_ns, end_ns, bytes, place,
                           g_mem_thread_id, annotation);
}

void Mark(const std::string &name) {
328
  if (FLAGS_enable_host_event_recorder_hook) {
329
    HostEventRecorder::GetInstance().RecordEvent(
C
chenjian 已提交
330
        name, 0, 0, EventRole::kOrdinary, TracerEventType::UserDefined);
331 332
    return;
  }
W
wangchaochaohu 已提交
333 334 335
  GetEventList().Record(EventType::kMark, name, g_thread_id);
}

Y
Yuang Liu 已提交
336 337 338 339
Event *PushEvent(const std::string &name, const EventRole role,
                 std::string attr) {
  return GetEventList().Record(EventType::kPushRange, name, g_thread_id, role,
                               attr);
340 341
}

Y
Yuang Liu 已提交
342 343
void PopEvent(const std::string &name, const EventRole role, std::string attr) {
  GetEventList().Record(EventType::kPopRange, name, g_thread_id, role, attr);
W
wangchaochaohu 已提交
344
}
D
dangqingqing 已提交
345
void EnableProfiler(ProfilerState state) {
W
wangchaochaohu 已提交
346 347 348 349
  PADDLE_ENFORCE_NE(state, ProfilerState::kDisabled,
                    platform::errors::InvalidArgument(
                        "Can't enable profiling, since the input state is"
                        "ProfilerState::kDisabled"));
350
  SynchronizeAllDevice();
X
Xin Pan 已提交
351
  std::lock_guard<std::mutex> l(profiler_mu);
352 353
  if (state == g_state) {
    return;
354
  }
355
  g_state = state;
C
chenjian 已提交
356 357
  ProfilerOptions option;
  HostTraceLevel::GetInstance().SetLevel(option.trace_level);
X
Xin Pan 已提交
358
  should_send_profile_state = true;
359
  GetDeviceTracer()->Enable();
360
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
361 362
  if (g_state == ProfilerState::kCUDA || g_state == ProfilerState::kAll ||
      g_state == ProfilerState::kCPU) {
363
    // Generate some dummy events first to reduce the startup overhead.
364 365
    DummyKernelAndEvent();
    GetDeviceTracer()->Reset();
D
dangqingqing 已提交
366 367 368
  }
#endif
  // Mark the profiling start.
369
  Mark("_start_profiler_");
D
dangqingqing 已提交
370 371
}

372
void ResetProfiler() {
373 374
  SynchronizeAllDevice();
  GetDeviceTracer()->Reset();
C
chengduo 已提交
375
  MemEvenRecorder::Instance().Flush();
D
dangqingqing 已提交
376
  std::lock_guard<std::mutex> guard(g_all_event_lists_mutex);
377 378 379 380
  for (auto it = g_all_event_lists.begin(); it != g_all_event_lists.end();
       ++it) {
    (*it)->Clear();
  }
C
chengduo 已提交
381 382 383 384
  for (auto it = g_all_mem_event_lists.begin();
       it != g_all_mem_event_lists.end(); ++it) {
    (*it)->Clear();
  }
385 386
}

387 388 389 390
static std::map<uint64_t, ThreadEvents> DockHostEventRecorderHostPart();
static void DockHostEventRecorderDevicePart(
    const std::map<uint64_t, ThreadEvents> &thr_events);

391
void DisableProfiler(EventSortingKey sorted_key,
C
chengduo 已提交
392
                     const std::string &profile_path) {
393
  SynchronizeAllDevice();
394
  auto thr_events = DockHostEventRecorderHostPart();
C
chengduo 已提交
395 396
  MemEvenRecorder::Instance().Flush();

X
Xin Pan 已提交
397
  std::lock_guard<std::mutex> l(profiler_mu);
398
  if (g_state == ProfilerState::kDisabled) return;
399
  // Mark the profiling stop.
400
  Mark("_stop_profiler_");
401
  DealWithShowName();
402

C
chengduo 已提交
403
  DeviceTracer *tracer = GetDeviceTracer();
404
  if (tracer->IsEnabled()) {
405
    tracer->Disable();
406
    DockHostEventRecorderDevicePart(thr_events);
407
    tracer->GenEventKernelCudaElapsedTime();
408
    tracer->GenProfile(profile_path);
409
  }
410 411

  std::vector<std::vector<Event>> all_events = GetAllEvents();
412

413 414
  ParseEvents(all_events, true, sorted_key);
  ParseEvents(all_events, false, sorted_key);
H
Huihuang Zheng 已提交
415 416 417 418 419 420 421 422 423 424 425 426 427 428

  std::vector<std::vector<MemEvent>> all_mem_events = GetMemEvents();
  ParseMemEvents(all_mem_events);

  ResetProfiler();
  g_state = ProfilerState::kDisabled;
  g_tracer_option = TracerOption::kDefault;
  should_send_profile_state = true;
}

void CompleteProfilerEvents(proto::Profile *tracer_profile,
                            std::vector<std::vector<Event>> *time_events,
                            std::vector<std::vector<MemEvent>> *mem_events) {
  SynchronizeAllDevice();
429
  auto thr_events = DockHostEventRecorderHostPart();
H
Huihuang Zheng 已提交
430 431 432 433 434 435 436 437 438 439 440
  MemEvenRecorder::Instance().Flush();

  std::lock_guard<std::mutex> l(profiler_mu);
  if (g_state == ProfilerState::kDisabled) return;

  // Mark the profiling stop.
  Mark("_stop_profiler_");

  DeviceTracer *tracer = GetDeviceTracer();
  if (tracer->IsEnabled() && tracer_profile != nullptr) {
    tracer->Disable();
441
    DockHostEventRecorderDevicePart(thr_events);
H
Huihuang Zheng 已提交
442 443 444 445 446 447 448 449 450
    tracer->GenEventKernelCudaElapsedTime();
    *tracer_profile = tracer->GetProfile();
  }

  if (time_events != nullptr) {
    *time_events = GetAllEvents();
  }
  if (mem_events != nullptr) {
    *mem_events = GetMemEvents();
C
chengduo 已提交
451 452
  }

453
  ResetProfiler();
454
  g_state = ProfilerState::kDisabled;
455
  g_tracer_option = TracerOption::kDefault;
X
Xin Pan 已提交
456
  should_send_profile_state = true;
457 458
}

W
wangchaochaohu 已提交
459 460 461 462 463 464 465 466 467 468
std::vector<std::vector<Event>> GetAllEvents() {
  std::lock_guard<std::mutex> guard(g_all_event_lists_mutex);
  std::vector<std::vector<Event>> result;
  for (auto it = g_all_event_lists.begin(); it != g_all_event_lists.end();
       ++it) {
    result.emplace_back((*it)->Reduce());
  }
  return result;
}

469 470
bool IsProfileEnabled() { return g_state != ProfilerState::kDisabled; }

W
wangchaochaohu 已提交
471
bool ShouldSendProfileState() { return should_send_profile_state; }
472

473 474
std::string OpName(const framework::VariableNameMap &name_map,
                   const std::string &type_name) {
475 476
  if (platform::GetTracerOption() != platform::TracerOption::kAllOpDetail ||
      !IsProfileEnabled())
477 478 479 480 481
    return "";

  std::string ret = type_name + "%";
  for (auto it = name_map.begin(); it != name_map.end(); it++) {
    auto name_outputs = it->second;
482
    if (!name_outputs.empty()) {
483 484 485 486 487 488 489 490 491 492 493 494 495 496 497
      ret = ret + name_outputs[0];
      break;
    }
  }
  ret = ret + "%";

  return ret;
}

void SetTracerOption(TracerOption option) {
  std::lock_guard<std::mutex> l(profiler_mu);
  g_tracer_option = option;
}

platform::TracerOption GetTracerOption() { return g_tracer_option; }
W
wangchaochaohu 已提交
498 499 500 501 502 503 504 505 506 507 508

void SetProfileListener() {
  std::mt19937 rng;
  rng.seed(std::random_device()());
  std::uniform_int_distribution<std::mt19937::result_type> dist6(
      1, std::numeric_limits<int>::max());
  profiler_lister_id = dist6(rng);
}

int64_t ListenerId() { return profiler_lister_id; }

509 510 511 512 513 514 515
void NvprofEnableRecordEvent() {
  SynchronizeAllDevice();
  g_enable_nvprof_hook = true;
}

void NvprofDisableRecordEvent() { g_enable_nvprof_hook = false; }

516
void EnableHostEventRecorder() { FLAGS_enable_host_event_recorder_hook = true; }
L
liutiexing 已提交
517

C
chenjian 已提交
518 519 520 521
void DisableHostEventRecorder() {
  FLAGS_enable_host_event_recorder_hook = false;
}

L
liutiexing 已提交
522 523 524 525 526 527
std::string PrintHostEvents() {
  std::ostringstream oss;
  auto host_evt_sec = HostEventRecorder::GetInstance().GatherEvents();
  for (const auto &thr_evt_sec : host_evt_sec.thr_sections) {
    oss << thr_evt_sec.thread_id << std::endl;
    for (const auto &evt : thr_evt_sec.events) {
L
liutiexing 已提交
528 529 530
      oss << "{ " << evt.name << " | " << evt.start_ns << "ns | " << evt.end_ns
          << "ns | " << (evt.end_ns - evt.start_ns) / 1000.000 << "us }"
          << std::endl;
L
liutiexing 已提交
531 532 533 534 535
    }
  }
  return oss.str();
}

536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625
static void EmulateEventPushAndPop(const HostEventSection &host_sec,
                                   std::map<uint64_t, ThreadEvents> *out) {
  for (const auto &thr_sec : host_sec.thr_sections) {
    uint64_t tid = thr_sec.thread_id;
    auto cur_thr_list = std::make_shared<EventList<Event>>();
    g_all_event_lists.emplace_front(cur_thr_list);
    // for nesting events
    std::stack<size_t> evt_stk;
    std::stack<std::string> prefix_stk;
    std::map<uint64_t, size_t> start2evt;
    for (size_t i = 0; i < thr_sec.events.size(); ++i) {
      const auto &evt = thr_sec.events[i];
      start2evt[evt.start_ns] = i;
    }
    auto iter = start2evt.begin();
    // loop events
    for (size_t i = 0; i < thr_sec.events.size(); ++i) {
      const auto &thr_evts = thr_sec.events;
      const auto &evt = thr_evts[i];
      // For nesting events
      while (!evt_stk.empty() && thr_evts[evt_stk.top()].end_ns <= evt.end_ns) {
        evt_stk.pop();
        prefix_stk.pop();
      }
      while (iter != start2evt.end() &&
             thr_evts[iter->second].start_ns < evt.start_ns) {
        if (thr_evts[iter->second].end_ns > evt.start_ns) {
          evt_stk.push(iter->second);
          std::string prefix = thr_evts[iter->second].name;
          if (!prefix_stk.empty()) {
            prefix = prefix_stk.top() + "/" + prefix;
          }
          prefix_stk.push(prefix);
        }
        ++iter;
      }
      // Record orig event pair
      std::string name =
          prefix_stk.empty() ? evt.name : prefix_stk.top() + "/" + evt.name;
      const char *attr = (evt.attr == nullptr ? "none" : evt.attr);
      Event *orig_evt = cur_thr_list->Record(EventType::kPushRange, name, tid,
                                             evt.role, attr);
      (*out)[tid][evt.end_ns] = std::make_pair(orig_evt, evt.start_ns);
      cur_thr_list->Record(EventType::kPopRange, name, tid, evt.role, attr);
    }
  }
}

static void EmulateCPURecordsAdd(const HostEventSection &host_sec) {
  DeviceTracer *tracer = GetDeviceTracer();
  if (tracer == nullptr) {
    return;
  }
  for (const auto &thr_sec : host_sec.thr_sections) {
    uint64_t tid = thr_sec.thread_id;
    for (const auto &evt : thr_sec.events) {
      tracer->AddCPURecords(evt.name, evt.start_ns, evt.end_ns, BlockDepth(),
                            tid);
    }
  }
}

static void EmulateCorrelation(
    const std::map<uint64_t, ThreadEvents> &thr_events) {
  DeviceTracer *tracer = GetDeviceTracer();
  if (tracer == nullptr) {
    return;
  }
  tracer->AddAnnotations(thr_events);
}

static std::map<uint64_t, ThreadEvents> DockHostEventRecorderHostPart() {
  std::map<uint64_t, ThreadEvents> thr_events;
  if (FLAGS_enable_host_event_recorder_hook == false) {
    return thr_events;
  }
  auto host_evt_sec = HostEventRecorder::GetInstance().GatherEvents();
  EmulateEventPushAndPop(host_evt_sec, &thr_events);
  EmulateCPURecordsAdd(host_evt_sec);
  return std::move(thr_events);
}

static void DockHostEventRecorderDevicePart(
    const std::map<uint64_t, ThreadEvents> &thr_events) {
  if (FLAGS_enable_host_event_recorder_hook == false) {
    return;
  }
  EmulateCorrelation(thr_events);
}

D
dangqingqing 已提交
626 627
}  // namespace platform
}  // namespace paddle