profiler.cc 5.9 KB
Newer Older
L
liutiexing 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
14 15

#include "paddle/fluid/platform/profiler/profiler.h"
16

17 18 19 20 21 22 23 24 25 26 27
#include "glog/logging.h"
#ifdef PADDLE_WITH_CUDA
#include <cuda.h>
#endif
#ifdef PADDLE_WITH_HIP
#include <hip/hip_runtime.h>
#endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#endif
#include "paddle/fluid/platform/enforce.h"
L
liutiexing 已提交
28
#include "paddle/fluid/platform/profiler/cuda_tracer.h"
29
#include "paddle/fluid/platform/profiler/custom_device/custom_tracer.h"
C
chenjian 已提交
30
#include "paddle/fluid/platform/profiler/extra_info.h"
31
#include "paddle/fluid/platform/profiler/host_tracer.h"
F
fwenguang 已提交
32
#include "paddle/fluid/platform/profiler/mlu/mlu_tracer.h"
33
#include "paddle/fluid/platform/profiler/trace_event_collector.h"
C
chenjian 已提交
34
#include "paddle/fluid/platform/profiler/utils.h"
35 36 37
#ifdef PADDLE_WITH_CUSTOM_DEVICE
#include "paddle/phi/backends/device_manager.h"
#endif
38 39 40 41

namespace paddle {
namespace platform {

42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
void SynchronizeDevice() {
#ifdef PADDLE_WITH_CUDA
  PADDLE_ENFORCE_GPU_SUCCESS(cudaDeviceSynchronize());
#endif
#ifdef PADDLE_WITH_HIP
  PADDLE_ENFORCE_GPU_SUCCESS(hipDeviceSynchronize());
#endif
#ifdef PADDLE_WITH_MLU
  PADDLE_ENFORCE_MLU_SUCCESS(cnrtSyncDevice());
#endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE
  auto dev_types = phi::DeviceManager::GetAllCustomDeviceTypes();
  for (const auto& dev_type : dev_types) {
    auto i = phi::DeviceManager::GetDevice(dev_type);
    auto place = paddle::platform::CustomPlace(dev_type, i);
    phi::DeviceManager::SynchronizeDevice(place);
  }
#endif
}
61 62 63

std::atomic<bool> Profiler::alive_{false};

64 65 66
uint32_t Profiler::span_indx = 0;
const char* Profiler::version = "1.0.2";

67 68 69
std::unique_ptr<Profiler> Profiler::Create(
    const ProfilerOptions& options,
    const std::vector<std::string>& custom_device_types) {
70 71 72
  if (alive_.exchange(true)) {
    return nullptr;
  }
73
  return std::unique_ptr<Profiler>(new Profiler(options, custom_device_types));
74 75
}

C
chenjian 已提交
76 77 78 79 80 81 82 83
bool Profiler::IsCuptiSupported() {
  bool supported = false;
#ifdef PADDLE_WITH_CUPTI
  supported = true;
#endif
  return supported;
}

F
fwenguang 已提交
84 85 86 87 88 89 90 91
bool Profiler::IsCnpapiSupported() {
  bool supported = false;
#ifdef PADDLE_WITH_MLU
  supported = true;
#endif
  return supported;
}

92 93
Profiler::Profiler(const ProfilerOptions& options,
                   const std::vector<std::string>& custom_device_types) {
94
  options_ = options;
C
chenjian 已提交
95 96 97 98 99 100 101 102 103
  std::bitset<32> trace_switch(options_.trace_switch);
  if (trace_switch.test(kProfileCPUOptionBit)) {
    HostTracerOptions host_tracer_options;
    host_tracer_options.trace_level = options_.trace_level;
    tracers_.emplace_back(new HostTracer(host_tracer_options), true);
  }
  if (trace_switch.test(kProfileGPUOptionBit)) {
    tracers_.emplace_back(&CudaTracer::GetInstance(), false);
  }
F
fwenguang 已提交
104 105 106
  if (trace_switch.test(kProfileMLUOptionBit)) {
    tracers_.emplace_back(&MluTracer::GetInstance(), false);
  }
107 108 109 110 111
  if (trace_switch.test(kProfileCustomDeviceOptionBit)) {
    for (const auto& dev_type : custom_device_types) {
      tracers_.emplace_back(&CustomTracer::GetInstance(dev_type), false);
    }
  }
112 113 114 115 116 117 118 119 120 121 122
}

Profiler::~Profiler() { alive_.store(false); }

void Profiler::Prepare() {
  for (auto& tracer : tracers_) {
    tracer.Get().PrepareTracing();
  }
}

void Profiler::Start() {
123
  SynchronizeDevice();
124 125 126
  for (auto& tracer : tracers_) {
    tracer.Get().StartTracing();
  }
C
chenjian 已提交
127
  cpu_utilization_.RecordBeginTimeInfo();
128 129
}

C
chenjian 已提交
130
std::unique_ptr<ProfilerResult> Profiler::Stop() {
131
  SynchronizeDevice();
132 133 134 135 136
  TraceEventCollector collector;
  for (auto& tracer : tracers_) {
    tracer.Get().StopTracing();
    tracer.Get().CollectTraceData(&collector);
  }
C
chenjian 已提交
137
  std::unique_ptr<NodeTrees> tree(
138 139 140 141
      new NodeTrees(collector.HostEvents(),
                    collector.RuntimeEvents(),
                    collector.DeviceEvents(),
                    collector.MemEvents(),
C
chenjian 已提交
142
                    collector.OperatorSupplementEvents()));
C
chenjian 已提交
143 144 145 146 147 148 149 150 151 152 153 154
  cpu_utilization_.RecordEndTimeInfo();
  ExtraInfo extrainfo;
  extrainfo.AddExtraInfo(std::string("System Cpu Utilization"),
                         std::string("%f"),
                         cpu_utilization_.GetCpuUtilization());
  extrainfo.AddExtraInfo(std::string("Process Cpu Utilization"),
                         std::string("%f"),
                         cpu_utilization_.GetCpuCurProcessUtilization());
  const std::unordered_map<uint64_t, std::string> thread_names =
      collector.ThreadNames();
  for (const auto& kv : thread_names) {
    extrainfo.AddExtraInfo(string_format(std::string("%llu"), kv.first),
155 156
                           std::string("%s"),
                           kv.second.c_str());
C
chenjian 已提交
157
  }
158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
  std::map<uint32_t, gpuDeviceProp> device_property_map;
  std::vector<int32_t> device_ids = GetSelectedDevices();
  for (auto index = 0u; index < device_ids.size(); index++) {
    const gpuDeviceProp& device_property =
        GetDeviceProperties(device_ids[index]);
    device_property_map[device_ids[index]] = device_property;
  }
  ProfilerResult* profiler_result_ptr = new platform::ProfilerResult(
      std::move(tree), extrainfo, device_property_map);
#else
  ProfilerResult* profiler_result_ptr =
      new platform::ProfilerResult(std::move(tree), extrainfo);
#endif
  profiler_result_ptr->SetVersion(std::string(version));
  profiler_result_ptr->SetSpanIndx(span_indx);
  span_indx += 1;
  return std::unique_ptr<ProfilerResult>(profiler_result_ptr);
176 177 178 179
}

}  // namespace platform
}  // namespace paddle