profiler.h 3.5 KB
Newer Older
D
dangqingqing 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once
#include <forward_list>
#include <list>
#include <mutex>
#include <vector>
#include "paddle/platform/device_context.h"

namespace paddle {
namespace platform {

enum EventKind { kMark, kPushRange, kPopRange };

class Event {
 public:
D
dangqingqing 已提交
29 30
  // The DeviceContext is used to get the cuda stream.
  // If CPU profiling mode, can pass nullptr.
D
dangqingqing 已提交
31
  Event(EventKind kind, std::string name, uint32_t thread_id,
D
dangqingqing 已提交
32
        DeviceContext* dev_ctx);
D
dangqingqing 已提交
33

D
dangqingqing 已提交
34
  std::string kind() const;
D
dangqingqing 已提交
35 36 37 38 39 40 41 42
  std::string name() const { return name_; }
  bool has_cuda() const { return has_cuda_; }

#ifdef PADDLE_WITH_CUDA
  cudaEvent_t event() const { return event_; }
  int device() const { return device_; }
#endif

D
dangqingqing 已提交
43 44
  double CpuElapsedUs(const Event& e) const;
  double CudaElapsedUs(const Event& e) const;
D
dangqingqing 已提交
45 46 47 48 49 50 51 52 53 54 55 56 57 58

 private:
  EventKind kind_;
  std::string name_;
  uint32_t thread_id_;
  int64_t cpu_ns_;
  bool has_cuda_;
#ifdef PADDLE_WITH_CUDA
  cudaEvent_t event_ = nullptr;
  int device_ = -1;
#endif
};

struct EventList {
D
dangqingqing 已提交
59 60 61 62 63
  constexpr static size_t kMB = 1024 * 1024;
  constexpr static size_t kEventBlockSize = 16 * kMB;
  constexpr static size_t kEventSize = sizeof(Event);
  constexpr static size_t kEventAlign = alignof(Event);
  constexpr static size_t kNumBlock =
D
dangqingqing 已提交
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
      kEventBlockSize /
      ((kEventSize + kEventAlign - 1) / kEventAlign * kEventAlign);

  template <typename... Args>
  void Record(Args&&... args) {
    if (event_blocks.empty() || event_blocks.front().size() == kNumBlock) {
      event_blocks.emplace_front();
      event_blocks.front().reserve(kNumBlock);
    }
    event_blocks.front().emplace_back(std::forward<Args>(args)...);
  }

  std::vector<Event> Reduce() {
    std::vector<Event> result;
    for (auto& block : event_blocks) {
      result.insert(result.begin(), std::make_move_iterator(block.begin()),
                    std::make_move_iterator(block.end()));
    }
    event_blocks.clear();
    return result;
  }

  std::forward_list<std::vector<Event>> event_blocks;
};

enum ProfilerState {
D
dangqingqing 已提交
90 91 92
  kDisabled,  // disabled state
  kCPU,       // CPU profiling state
  kCUDA,      // GPU profiling state
D
dangqingqing 已提交
93 94
};

D
dangqingqing 已提交
95
void Mark(const std::string& name, DeviceContext* dev_ctx);
D
dangqingqing 已提交
96

Y
Yibing Liu 已提交
97
void PushEvent(const std::string& name, DeviceContext* dev_ctx);
98

Y
Yibing Liu 已提交
99
void PopEvent(const std::string& name, DeviceContext* dev_ctx);
100

D
dangqingqing 已提交
101
struct RecordEvent {
D
dangqingqing 已提交
102
  explicit RecordEvent(const std::string& name, DeviceContext* dev_ctx);
D
dangqingqing 已提交
103

D
dangqingqing 已提交
104 105 106 107
  ~RecordEvent();

  // The device context is used by Event to get the current cuda stream.
  DeviceContext* dev_ctx_;
Y
Yibing Liu 已提交
108
  // Event name
109
  std::string name_;
D
dangqingqing 已提交
110 111
};

D
dangqingqing 已提交
112
// Enable the profiling function.
D
dangqingqing 已提交
113
void EnableProfiler(ProfilerState state);
D
dangqingqing 已提交
114 115 116

// Return the event list of all threads. Asummed the returned value calls
// event_lists, event_lists[i][j] represents the j-th Event of i-th thread.
D
dangqingqing 已提交
117 118
std::vector<std::vector<Event>> DisableProfiler();

Y
Yibing Liu 已提交
119
void ParseEvents(std::vector<std::vector<Event>>&);
120

D
dangqingqing 已提交
121 122
}  // namespace platform
}  // namespace paddle