profiler.cc 4.0 KB
Newer Older
L
liutiexing 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
14 15 16 17 18 19 20 21 22 23 24 25 26

#include "paddle/fluid/platform/profiler/profiler.h"
#include "glog/logging.h"
#ifdef PADDLE_WITH_CUDA
#include <cuda.h>
#endif
#ifdef PADDLE_WITH_HIP
#include <hip/hip_runtime.h>
#endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#endif
#include "paddle/fluid/platform/enforce.h"
L
liutiexing 已提交
27
#include "paddle/fluid/platform/profiler/cuda_tracer.h"
C
chenjian 已提交
28
#include "paddle/fluid/platform/profiler/extra_info.h"
29
#include "paddle/fluid/platform/profiler/host_tracer.h"
30
#include "paddle/fluid/platform/profiler/mlu/mlu_tracer.h"
31
#include "paddle/fluid/platform/profiler/trace_event_collector.h"
C
chenjian 已提交
32
#include "paddle/fluid/platform/profiler/utils.h"
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47

namespace paddle {
namespace platform {

void SynchronizeAllDevice();

std::atomic<bool> Profiler::alive_{false};

std::unique_ptr<Profiler> Profiler::Create(const ProfilerOptions& options) {
  if (alive_.exchange(true)) {
    return nullptr;
  }
  return std::unique_ptr<Profiler>(new Profiler(options));
}

C
chenjian 已提交
48 49 50 51 52 53 54 55
bool Profiler::IsCuptiSupported() {
  bool supported = false;
#ifdef PADDLE_WITH_CUPTI
  supported = true;
#endif
  return supported;
}

56 57 58 59 60 61 62 63
bool Profiler::IsCnpapiSupported() {
  bool supported = false;
#ifdef PADDLE_WITH_MLU
  supported = true;
#endif
  return supported;
}

64 65
Profiler::Profiler(const ProfilerOptions& options) {
  options_ = options;
C
chenjian 已提交
66 67 68 69 70 71 72 73 74
  std::bitset<32> trace_switch(options_.trace_switch);
  if (trace_switch.test(kProfileCPUOptionBit)) {
    HostTracerOptions host_tracer_options;
    host_tracer_options.trace_level = options_.trace_level;
    tracers_.emplace_back(new HostTracer(host_tracer_options), true);
  }
  if (trace_switch.test(kProfileGPUOptionBit)) {
    tracers_.emplace_back(&CudaTracer::GetInstance(), false);
  }
75 76 77
  if (trace_switch.test(kProfileMLUOptionBit)) {
    tracers_.emplace_back(&MluTracer::GetInstance(), false);
  }
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
}

Profiler::~Profiler() { alive_.store(false); }

void Profiler::Prepare() {
  for (auto& tracer : tracers_) {
    tracer.Get().PrepareTracing();
  }
}

void Profiler::Start() {
  SynchronizeAllDevice();
  for (auto& tracer : tracers_) {
    tracer.Get().StartTracing();
  }
C
chenjian 已提交
93
  cpu_utilization_.RecordBeginTimeInfo();
94 95
}

C
chenjian 已提交
96
std::unique_ptr<ProfilerResult> Profiler::Stop() {
97 98 99 100 101 102
  SynchronizeAllDevice();
  TraceEventCollector collector;
  for (auto& tracer : tracers_) {
    tracer.Get().StopTracing();
    tracer.Get().CollectTraceData(&collector);
  }
103 104 105
  std::unique_ptr<NodeTrees> tree(new NodeTrees(collector.HostEvents(),
                                                collector.RuntimeEvents(),
                                                collector.DeviceEvents()));
C
chenjian 已提交
106 107 108 109 110 111 112 113 114 115 116 117
  cpu_utilization_.RecordEndTimeInfo();
  ExtraInfo extrainfo;
  extrainfo.AddExtraInfo(std::string("System Cpu Utilization"),
                         std::string("%f"),
                         cpu_utilization_.GetCpuUtilization());
  extrainfo.AddExtraInfo(std::string("Process Cpu Utilization"),
                         std::string("%f"),
                         cpu_utilization_.GetCpuCurProcessUtilization());
  const std::unordered_map<uint64_t, std::string> thread_names =
      collector.ThreadNames();
  for (const auto& kv : thread_names) {
    extrainfo.AddExtraInfo(string_format(std::string("%llu"), kv.first),
C
chenjian 已提交
118
                           std::string("%s"), kv.second.c_str());
C
chenjian 已提交
119 120 121
  }
  return std::unique_ptr<ProfilerResult>(
      new platform::ProfilerResult(std::move(tree), extrainfo));
122 123 124 125
}

}  // namespace platform
}  // namespace paddle