host_tracer.cc 7.0 KB
Newer Older
L
liutiexing 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
14
#include "paddle/fluid/platform/profiler/host_tracer.h"
15

16 17
#include <sstream>

18
#include "glog/logging.h"
19
#include "paddle/fluid/framework/op_proto_maker.h"
20 21
#include "paddle/fluid/platform/profiler/common_event.h"
#include "paddle/fluid/platform/profiler/host_event_recorder.h"
22
#include "paddle/phi/core/flags.h"
23

L
liutiexing 已提交
24 25
// Used to filter events, works like glog VLOG(level).
// RecordEvent will works if host_trace_level >= level.
26 27
PADDLE_DEFINE_EXPORTED_int64(host_trace_level,
                             1,
L
liutiexing 已提交
28 29 30
                             "RecordEvent will works "
                             "if host_trace_level >= level.");

31 32 33 34 35
namespace paddle {
namespace platform {

namespace {

L
liutiexing 已提交
36
void ProcessHostEvents(const HostEventSection<CommonEvent>& host_events,
37 38 39
                       TraceEventCollector* collector) {
  for (const auto& thr_sec : host_events.thr_sections) {
    uint64_t tid = thr_sec.thread_id;
L
liutiexing 已提交
40 41 42
    if (thr_sec.thread_name != kDefaultThreadName) {
      collector->AddThreadName(tid, thr_sec.thread_name);
    }
43 44 45 46 47 48 49 50 51 52 53 54 55
    for (const auto& evt : thr_sec.events) {
      HostTraceEvent event;
      event.name = evt.name;
      event.type = evt.type;
      event.start_ns = evt.start_ns;
      event.end_ns = evt.end_ns;
      event.process_id = host_events.process_id;
      event.thread_id = tid;
      collector->AddHostEvent(std::move(event));
    }
  }
}

56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
void ProcessHostMemEvents(
    const HostEventSection<CommonMemEvent>& host_mem_events,
    TraceEventCollector* collector) {
  for (const auto& thr_sec : host_mem_events.thr_sections) {
    uint64_t tid = thr_sec.thread_id;
    if (thr_sec.thread_name != kDefaultThreadName) {
      collector->AddThreadName(tid, thr_sec.thread_name);
    }
    for (const auto& evt : thr_sec.events) {
      MemTraceEvent event;
      event.timestamp_ns = evt.timestamp_ns;
      event.addr = evt.addr;
      event.type = evt.type;
      event.increase_bytes = evt.increase_bytes;
      event.place = evt.place.DebugString();
      event.current_allocated = evt.current_allocated;
      event.current_reserved = evt.current_reserved;
      event.peak_allocated = evt.peak_allocated;
      event.peak_reserved = evt.peak_reserved;
      event.process_id = host_mem_events.process_id;
      event.thread_id = tid;
      collector->AddMemEvent(std::move(event));
    }
  }
}

void ProcessOperatorSupplementEvents(
    const HostEventSection<OperatorSupplementOriginEvent>& op_supplement_events,
    TraceEventCollector* collector) {
  for (const auto& thr_sec : op_supplement_events.thr_sections) {
    uint64_t tid = thr_sec.thread_id;
    if (thr_sec.thread_name != kDefaultThreadName) {
      collector->AddThreadName(tid, thr_sec.thread_name);
    }
    for (const auto& evt : thr_sec.events) {
91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
      // get callstack from event
      std::vector<std::string> callstacks;
      const std::vector<std::string>* callstack_ptr = nullptr;
      auto iter = evt.attributes.find(
          framework::OpProtoAndCheckerMaker::OpCreationCallstackAttrName());
      if (iter != evt.attributes.end()) {
        callstack_ptr =
            &PADDLE_GET_CONST(std::vector<std::string>, iter->second);
        callstacks = *callstack_ptr;
      }
      std::ostringstream result_string;
      for (auto it = callstacks.begin(); it != callstacks.end(); it++) {
        result_string << (*it) << std::endl;
      }

106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
      OperatorSupplementEvent event;
      event.timestamp_ns = evt.timestamp_ns;
      event.op_type = evt.op_type;
      std::map<std::string, std::vector<std::vector<int64_t>>> input_shapes;
      std::map<std::string, std::vector<std::string>> dtypes;
      std::string callstack;
      for (auto it = evt.input_shapes.begin(); it != evt.input_shapes.end();
           it++) {
        for (auto idx = 0lu; idx < it->second.size(); idx++) {
          input_shapes[it->first].push_back(std::vector<int64_t>());
          for (auto dim_idx = 0; dim_idx < it->second.at(idx).size();
               dim_idx++) {
            input_shapes[it->first][idx].push_back(
                it->second.at(idx).at(dim_idx));
          }
        }
      }
      for (auto it = evt.dtypes.begin(); it != evt.dtypes.end(); it++) {
        for (auto idx = 0lu; idx < it->second.size(); idx++) {
          dtypes[it->first].push_back(
              framework::proto::VarType::Type_Name(it->second.at(idx)));
        }
      }

      event.input_shapes = input_shapes;
      event.dtypes = dtypes;
      event.callstack = result_string.str();
133 134
      event.attributes = evt.attributes;
      event.op_id = evt.op_id;
135 136 137 138 139 140 141
      event.process_id = op_supplement_events.process_id;
      event.thread_id = tid;
      collector->AddOperatorSupplementEvent(std::move(event));
    }
  }
}

142 143
}  // namespace

L
liutiexing 已提交
144 145 146 147 148 149
void HostTracer::PrepareTracing() {
  // warm up
  HostTraceLevel::GetInstance().SetLevel(options_.trace_level);
  state_ = TracerState::READY;
}

150 151
void HostTracer::StartTracing() {
  PADDLE_ENFORCE_EQ(
152 153
      state_ == TracerState::READY || state_ == TracerState::STOPED,
      true,
154
      platform::errors::PreconditionNotMet("TracerState must be READY"));
L
liutiexing 已提交
155
  HostEventRecorder<CommonEvent>::GetInstance().GatherEvents();
156 157 158
  HostEventRecorder<CommonMemEvent>::GetInstance().GatherEvents();
  HostEventRecorder<OperatorSupplementOriginEvent>::GetInstance()
      .GatherEvents();
L
liutiexing 已提交
159
  HostTraceLevel::GetInstance().SetLevel(options_.trace_level);
160 161 162 163 164
  state_ = TracerState::STARTED;
}

void HostTracer::StopTracing() {
  PADDLE_ENFORCE_EQ(
165 166
      state_,
      TracerState::STARTED,
167 168 169 170 171 172 173
      platform::errors::PreconditionNotMet("TracerState must be STARTED"));
  HostTraceLevel::GetInstance().SetLevel(HostTraceLevel::kDisabled);
  state_ = TracerState::STOPED;
}

void HostTracer::CollectTraceData(TraceEventCollector* collector) {
  PADDLE_ENFORCE_EQ(
174 175
      state_,
      TracerState::STOPED,
176
      platform::errors::PreconditionNotMet("TracerState must be STOPED"));
L
liutiexing 已提交
177 178
  HostEventSection<CommonEvent> host_events =
      HostEventRecorder<CommonEvent>::GetInstance().GatherEvents();
179
  ProcessHostEvents(host_events, collector);
180 181 182 183 184 185 186
  HostEventSection<CommonMemEvent> host_mem_events =
      HostEventRecorder<CommonMemEvent>::GetInstance().GatherEvents();
  ProcessHostMemEvents(host_mem_events, collector);
  HostEventSection<OperatorSupplementOriginEvent> op_supplement_events =
      HostEventRecorder<OperatorSupplementOriginEvent>::GetInstance()
          .GatherEvents();
  ProcessOperatorSupplementEvents(op_supplement_events, collector);
187 188 189 190
}

}  // namespace platform
}  // namespace paddle