profiler.cc 4.5 KB
Newer Older
L
liutiexing 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
14 15

#include "paddle/fluid/platform/profiler/profiler.h"
16

17 18 19 20 21 22 23 24 25 26 27
#include "glog/logging.h"
#ifdef PADDLE_WITH_CUDA
#include <cuda.h>
#endif
#ifdef PADDLE_WITH_HIP
#include <hip/hip_runtime.h>
#endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#endif
#include "paddle/fluid/platform/enforce.h"
L
liutiexing 已提交
28
#include "paddle/fluid/platform/profiler/cuda_tracer.h"
29
#include "paddle/fluid/platform/profiler/custom_device/custom_tracer.h"
C
chenjian 已提交
30
#include "paddle/fluid/platform/profiler/extra_info.h"
31
#include "paddle/fluid/platform/profiler/host_tracer.h"
F
fwenguang 已提交
32
#include "paddle/fluid/platform/profiler/mlu/mlu_tracer.h"
33
#include "paddle/fluid/platform/profiler/trace_event_collector.h"
C
chenjian 已提交
34
#include "paddle/fluid/platform/profiler/utils.h"
35 36 37 38 39 40 41 42

namespace paddle {
namespace platform {

void SynchronizeAllDevice();

std::atomic<bool> Profiler::alive_{false};

43 44 45
std::unique_ptr<Profiler> Profiler::Create(
    const ProfilerOptions& options,
    const std::vector<std::string>& custom_device_types) {
46 47 48
  if (alive_.exchange(true)) {
    return nullptr;
  }
49
  return std::unique_ptr<Profiler>(new Profiler(options, custom_device_types));
50 51
}

C
chenjian 已提交
52 53 54 55 56 57 58 59
bool Profiler::IsCuptiSupported() {
  bool supported = false;
#ifdef PADDLE_WITH_CUPTI
  supported = true;
#endif
  return supported;
}

F
fwenguang 已提交
60 61 62 63 64 65 66 67
bool Profiler::IsCnpapiSupported() {
  bool supported = false;
#ifdef PADDLE_WITH_MLU
  supported = true;
#endif
  return supported;
}

68 69
Profiler::Profiler(const ProfilerOptions& options,
                   const std::vector<std::string>& custom_device_types) {
70
  options_ = options;
C
chenjian 已提交
71 72 73 74 75 76 77 78 79
  std::bitset<32> trace_switch(options_.trace_switch);
  if (trace_switch.test(kProfileCPUOptionBit)) {
    HostTracerOptions host_tracer_options;
    host_tracer_options.trace_level = options_.trace_level;
    tracers_.emplace_back(new HostTracer(host_tracer_options), true);
  }
  if (trace_switch.test(kProfileGPUOptionBit)) {
    tracers_.emplace_back(&CudaTracer::GetInstance(), false);
  }
F
fwenguang 已提交
80 81 82
  if (trace_switch.test(kProfileMLUOptionBit)) {
    tracers_.emplace_back(&MluTracer::GetInstance(), false);
  }
83 84 85 86 87
  if (trace_switch.test(kProfileCustomDeviceOptionBit)) {
    for (const auto& dev_type : custom_device_types) {
      tracers_.emplace_back(&CustomTracer::GetInstance(dev_type), false);
    }
  }
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
}

Profiler::~Profiler() { alive_.store(false); }

void Profiler::Prepare() {
  for (auto& tracer : tracers_) {
    tracer.Get().PrepareTracing();
  }
}

void Profiler::Start() {
  SynchronizeAllDevice();
  for (auto& tracer : tracers_) {
    tracer.Get().StartTracing();
  }
C
chenjian 已提交
103
  cpu_utilization_.RecordBeginTimeInfo();
104 105
}

C
chenjian 已提交
106
std::unique_ptr<ProfilerResult> Profiler::Stop() {
107 108 109 110 111 112
  SynchronizeAllDevice();
  TraceEventCollector collector;
  for (auto& tracer : tracers_) {
    tracer.Get().StopTracing();
    tracer.Get().CollectTraceData(&collector);
  }
C
chenjian 已提交
113
  std::unique_ptr<NodeTrees> tree(
114 115 116 117
      new NodeTrees(collector.HostEvents(),
                    collector.RuntimeEvents(),
                    collector.DeviceEvents(),
                    collector.MemEvents(),
C
chenjian 已提交
118
                    collector.OperatorSupplementEvents()));
C
chenjian 已提交
119 120 121 122 123 124 125 126 127 128 129 130
  cpu_utilization_.RecordEndTimeInfo();
  ExtraInfo extrainfo;
  extrainfo.AddExtraInfo(std::string("System Cpu Utilization"),
                         std::string("%f"),
                         cpu_utilization_.GetCpuUtilization());
  extrainfo.AddExtraInfo(std::string("Process Cpu Utilization"),
                         std::string("%f"),
                         cpu_utilization_.GetCpuCurProcessUtilization());
  const std::unordered_map<uint64_t, std::string> thread_names =
      collector.ThreadNames();
  for (const auto& kv : thread_names) {
    extrainfo.AddExtraInfo(string_format(std::string("%llu"), kv.first),
131 132
                           std::string("%s"),
                           kv.second.c_str());
C
chenjian 已提交
133 134 135
  }
  return std::unique_ptr<ProfilerResult>(
      new platform::ProfilerResult(std::move(tree), extrainfo));
136 137 138 139
}

}  // namespace platform
}  // namespace paddle