profiler.h 3.2 KB
Newer Older
D
dangqingqing 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once
#include <forward_list>
#include <list>
#include <mutex>
#include <vector>
#include "paddle/platform/device_context.h"

namespace paddle {
namespace platform {

enum EventKind { kMark, kPushRange, kPopRange };

class Event {
 public:
D
dangqingqing 已提交
29 30
  // The DeviceContext is used to get the cuda stream.
  // If CPU profiling mode, can pass nullptr.
D
dangqingqing 已提交
31
  Event(EventKind kind, std::string name, uint32_t thread_id,
D
dangqingqing 已提交
32
        DeviceContext* dev_ctx);
D
dangqingqing 已提交
33

D
dangqingqing 已提交
34
  std::string kind() const;
D
dangqingqing 已提交
35 36 37 38 39 40 41 42
  std::string name() const { return name_; }
  bool has_cuda() const { return has_cuda_; }

#ifdef PADDLE_WITH_CUDA
  cudaEvent_t event() const { return event_; }
  int device() const { return device_; }
#endif

D
dangqingqing 已提交
43 44
  double CpuElapsedUs(const Event& e) const;
  double CudaElapsedUs(const Event& e) const;
D
dangqingqing 已提交
45 46 47 48 49 50 51 52 53 54 55 56 57 58

 private:
  EventKind kind_;
  std::string name_;
  uint32_t thread_id_;
  int64_t cpu_ns_;
  bool has_cuda_;
#ifdef PADDLE_WITH_CUDA
  cudaEvent_t event_ = nullptr;
  int device_ = -1;
#endif
};

struct EventList {
D
dangqingqing 已提交
59 60 61 62 63
  constexpr static size_t kMB = 1024 * 1024;
  constexpr static size_t kEventBlockSize = 16 * kMB;
  constexpr static size_t kEventSize = sizeof(Event);
  constexpr static size_t kEventAlign = alignof(Event);
  constexpr static size_t kNumBlock =
D
dangqingqing 已提交
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
      kEventBlockSize /
      ((kEventSize + kEventAlign - 1) / kEventAlign * kEventAlign);

  template <typename... Args>
  void Record(Args&&... args) {
    if (event_blocks.empty() || event_blocks.front().size() == kNumBlock) {
      event_blocks.emplace_front();
      event_blocks.front().reserve(kNumBlock);
    }
    event_blocks.front().emplace_back(std::forward<Args>(args)...);
  }

  std::vector<Event> Reduce() {
    std::vector<Event> result;
    for (auto& block : event_blocks) {
      result.insert(result.begin(), std::make_move_iterator(block.begin()),
                    std::make_move_iterator(block.end()));
    }
    event_blocks.clear();
    return result;
  }

  std::forward_list<std::vector<Event>> event_blocks;
};

enum ProfilerState {
D
dangqingqing 已提交
90 91 92
  kDisabled,  // disabled state
  kCPU,       // CPU profiling state
  kCUDA,      // GPU profiling state
D
dangqingqing 已提交
93 94
};

D
dangqingqing 已提交
95
void Mark(const std::string& name, DeviceContext* dev_ctx);
D
dangqingqing 已提交
96 97

struct RecordEvent {
D
dangqingqing 已提交
98
  explicit RecordEvent(const std::string& name, DeviceContext* dev_ctx);
D
dangqingqing 已提交
99

D
dangqingqing 已提交
100 101 102 103
  ~RecordEvent();

  // The device context is used by Event to get the current cuda stream.
  DeviceContext* dev_ctx_;
D
dangqingqing 已提交
104 105
};

D
dangqingqing 已提交
106
// Enable the profiling function.
D
dangqingqing 已提交
107
void EnableProfiler(ProfilerState state);
D
dangqingqing 已提交
108 109 110

// Return the event list of all threads. Asummed the returned value calls
// event_lists, event_lists[i][j] represents the j-th Event of i-th thread.
D
dangqingqing 已提交
111 112 113 114
std::vector<std::vector<Event>> DisableProfiler();

}  // namespace platform
}  // namespace paddle