profiler.cc 5.6 KB
Newer Older
L
liutiexing 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
14 15

#include "paddle/fluid/platform/profiler/profiler.h"
16

17 18 19 20 21 22 23 24 25 26 27
#include "glog/logging.h"
#ifdef PADDLE_WITH_CUDA
#include <cuda.h>
#endif
#ifdef PADDLE_WITH_HIP
#include <hip/hip_runtime.h>
#endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#endif
#include "paddle/fluid/platform/enforce.h"
L
liutiexing 已提交
28
#include "paddle/fluid/platform/profiler/cuda_tracer.h"
29
#include "paddle/fluid/platform/profiler/custom_device/custom_tracer.h"
C
chenjian 已提交
30
#include "paddle/fluid/platform/profiler/extra_info.h"
31
#include "paddle/fluid/platform/profiler/host_tracer.h"
32
#include "paddle/fluid/platform/profiler/trace_event_collector.h"
C
chenjian 已提交
33
#include "paddle/fluid/platform/profiler/utils.h"
34 35 36
#ifdef PADDLE_WITH_CUSTOM_DEVICE
#include "paddle/phi/backends/device_manager.h"
#endif
37 38 39 40

namespace paddle {
namespace platform {

41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
void SynchronizeDevice() {
#ifdef PADDLE_WITH_CUDA
  PADDLE_ENFORCE_GPU_SUCCESS(cudaDeviceSynchronize());
#endif
#ifdef PADDLE_WITH_HIP
  PADDLE_ENFORCE_GPU_SUCCESS(hipDeviceSynchronize());
#endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE
  auto dev_types = phi::DeviceManager::GetAllCustomDeviceTypes();
  for (const auto& dev_type : dev_types) {
    auto i = phi::DeviceManager::GetDevice(dev_type);
    auto place = paddle::platform::CustomPlace(dev_type, i);
    phi::DeviceManager::SynchronizeDevice(place);
  }
#endif
}
57 58 59

std::atomic<bool> Profiler::alive_{false};

60 61 62
uint32_t Profiler::span_indx = 0;
const char* Profiler::version = "1.0.2";

63 64 65
std::unique_ptr<Profiler> Profiler::Create(
    const ProfilerOptions& options,
    const std::vector<std::string>& custom_device_types) {
66 67 68
  if (alive_.exchange(true)) {
    return nullptr;
  }
69
  return std::unique_ptr<Profiler>(new Profiler(options, custom_device_types));
70 71
}

C
chenjian 已提交
72 73 74 75 76 77 78 79
bool Profiler::IsCuptiSupported() {
  bool supported = false;
#ifdef PADDLE_WITH_CUPTI
  supported = true;
#endif
  return supported;
}

F
fwenguang 已提交
80 81 82 83 84
bool Profiler::IsCnpapiSupported() {
  bool supported = false;
  return supported;
}

85 86
Profiler::Profiler(const ProfilerOptions& options,
                   const std::vector<std::string>& custom_device_types) {
87
  options_ = options;
C
chenjian 已提交
88 89 90 91 92 93 94 95 96
  std::bitset<32> trace_switch(options_.trace_switch);
  if (trace_switch.test(kProfileCPUOptionBit)) {
    HostTracerOptions host_tracer_options;
    host_tracer_options.trace_level = options_.trace_level;
    tracers_.emplace_back(new HostTracer(host_tracer_options), true);
  }
  if (trace_switch.test(kProfileGPUOptionBit)) {
    tracers_.emplace_back(&CudaTracer::GetInstance(), false);
  }
97 98 99 100 101
  if (trace_switch.test(kProfileCustomDeviceOptionBit)) {
    for (const auto& dev_type : custom_device_types) {
      tracers_.emplace_back(&CustomTracer::GetInstance(dev_type), false);
    }
  }
102 103 104 105 106 107 108 109 110 111 112
}

Profiler::~Profiler() { alive_.store(false); }

void Profiler::Prepare() {
  for (auto& tracer : tracers_) {
    tracer.Get().PrepareTracing();
  }
}

void Profiler::Start() {
113
  SynchronizeDevice();
114 115 116
  for (auto& tracer : tracers_) {
    tracer.Get().StartTracing();
  }
C
chenjian 已提交
117
  cpu_utilization_.RecordBeginTimeInfo();
118 119
}

C
chenjian 已提交
120
std::unique_ptr<ProfilerResult> Profiler::Stop() {
121
  SynchronizeDevice();
122 123 124 125 126
  TraceEventCollector collector;
  for (auto& tracer : tracers_) {
    tracer.Get().StopTracing();
    tracer.Get().CollectTraceData(&collector);
  }
C
chenjian 已提交
127
  std::unique_ptr<NodeTrees> tree(
128 129 130 131
      new NodeTrees(collector.HostEvents(),
                    collector.RuntimeEvents(),
                    collector.DeviceEvents(),
                    collector.MemEvents(),
C
chenjian 已提交
132
                    collector.OperatorSupplementEvents()));
C
chenjian 已提交
133 134 135 136 137 138 139 140 141 142 143 144
  cpu_utilization_.RecordEndTimeInfo();
  ExtraInfo extrainfo;
  extrainfo.AddExtraInfo(std::string("System Cpu Utilization"),
                         std::string("%f"),
                         cpu_utilization_.GetCpuUtilization());
  extrainfo.AddExtraInfo(std::string("Process Cpu Utilization"),
                         std::string("%f"),
                         cpu_utilization_.GetCpuCurProcessUtilization());
  const std::unordered_map<uint64_t, std::string> thread_names =
      collector.ThreadNames();
  for (const auto& kv : thread_names) {
    extrainfo.AddExtraInfo(string_format(std::string("%llu"), kv.first),
145 146
                           std::string("%s"),
                           kv.second.c_str());
C
chenjian 已提交
147
  }
148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
  std::map<uint32_t, gpuDeviceProp> device_property_map;
  std::vector<int32_t> device_ids = GetSelectedDevices();
  for (auto index = 0u; index < device_ids.size(); index++) {
    const gpuDeviceProp& device_property =
        GetDeviceProperties(device_ids[index]);
    device_property_map[device_ids[index]] = device_property;
  }
  ProfilerResult* profiler_result_ptr = new platform::ProfilerResult(
      std::move(tree), extrainfo, device_property_map);
#else
  ProfilerResult* profiler_result_ptr =
      new platform::ProfilerResult(std::move(tree), extrainfo);
#endif
  profiler_result_ptr->SetVersion(std::string(version));
  profiler_result_ptr->SetSpanIndx(span_indx);
  span_indx += 1;
  return std::unique_ptr<ProfilerResult>(profiler_result_ptr);
166 167 168 169
}

}  // namespace platform
}  // namespace paddle