profiler.h 3.7 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
D
dangqingqing 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17

licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once
#include <forward_list>
#include <list>
18
#include <string>
D
dangqingqing 已提交
19
#include <vector>
Y
Yi Wang 已提交
20
#include "paddle/fluid/platform/device_context.h"
D
dangqingqing 已提交
21 22 23 24

namespace paddle {
namespace platform {

25
enum EventType { kMark, kPushRange, kPopRange };
D
dangqingqing 已提交
26 27 28

class Event {
 public:
D
dangqingqing 已提交
29 30
  // The DeviceContext is used to get the cuda stream.
  // If CPU profiling mode, can pass nullptr.
31
  Event(EventType type, std::string name, uint32_t thread_id);
D
dangqingqing 已提交
32

33
  const EventType& type() const;
D
dangqingqing 已提交
34
  std::string name() const { return name_; }
35
  uint32_t thread_id() const { return thread_id_; }
D
dangqingqing 已提交
36 37

#ifdef PADDLE_WITH_CUDA
38
#ifndef PADDLE_WITH_CUPTI
D
dangqingqing 已提交
39 40
  cudaEvent_t event() const { return event_; }
  int device() const { return device_; }
41
#endif
D
dangqingqing 已提交
42 43
#endif

44 45
  double CpuElapsedMs(const Event& e) const;
  double CudaElapsedMs(const Event& e) const;
D
dangqingqing 已提交
46 47

 private:
48
  EventType type_;
D
dangqingqing 已提交
49 50 51 52
  std::string name_;
  uint32_t thread_id_;
  int64_t cpu_ns_;
#ifdef PADDLE_WITH_CUDA
53 54 55 56 57 58 59 60 61 62
#ifdef PADDLE_WITH_CUPTI
  int64_t gpu_ns_ = 0;

 public:
  void AddCudaElapsedTime(int64_t start_ns, int64_t end_ns) {
    gpu_ns_ += end_ns - start_ns;
  }

 private:
#else
D
dangqingqing 已提交
63 64 65
  cudaEvent_t event_ = nullptr;
  int device_ = -1;
#endif
66
#endif
D
dangqingqing 已提交
67 68 69
};

enum ProfilerState {
D
dangqingqing 已提交
70 71 72
  kDisabled,  // disabled state
  kCPU,       // CPU profiling state
  kCUDA,      // GPU profiling state
73
  kAll,       // Profile both CPU and GPU. (Currently experimental).
D
dangqingqing 已提交
74 75
};

76
void Mark(const std::string& name);
D
dangqingqing 已提交
77

78
Event* PushEvent(const std::string& name);
79

80
void PopEvent(const std::string& name);
81

D
dangqingqing 已提交
82
struct RecordEvent {
83
  explicit RecordEvent(const std::string& name);
D
dangqingqing 已提交
84

D
dangqingqing 已提交
85 86
  ~RecordEvent();

X
Xin Pan 已提交
87
  bool is_enabled_;
X
Xin Pan 已提交
88
  uint64_t start_ns_;
Y
Yibing Liu 已提交
89
  // Event name
90
  std::string name_;
91 92 93
  // Need to distinguish name by op type, block_id, program_id and perhaps
  // different kernel invocations within an op.
  std::string full_name_;
D
dangqingqing 已提交
94 95
};

G
gongweibao 已提交
96 97
class RecordRPCEvent {
 public:
98
  explicit RecordRPCEvent(const std::string& name);
G
gongweibao 已提交
99 100 101 102 103 104
  ~RecordRPCEvent() {}

 private:
  std::unique_ptr<RecordEvent> event_;
};

X
Xin Pan 已提交
105 106 107 108 109
struct RecordBlock {
  explicit RecordBlock(int block_id);
  ~RecordBlock();

 private:
X
Xin Pan 已提交
110
  bool is_enabled_;
X
Xin Pan 已提交
111 112 113 114
  std::string name_;
  uint64_t start_ns_;
};

115
// Return the event list of all threads. Assumed the returned value calls
D
dangqingqing 已提交
116
// event_lists, event_lists[i][j] represents the j-th Event of i-th thread.
117
std::vector<std::vector<Event>> GetAllEvents();
D
dangqingqing 已提交
118

119
// Candidate keys to sort the profiling report
C
chengduo 已提交
120 121 122 123 124 125 126 127 128 129
enum EventSortingKey {
  kDefault,
  kCalls,
  kTotal,
  kMin,
  kMax,
  kAve,
  kCPUTime,
  kGPUTime
};
130

131 132 133 134 135 136
// Enable the profiling function.
void EnableProfiler(ProfilerState state);

// Clear the g_all_event_lists, which is total event lists of all threads.
void ResetProfiler();

X
Xin Pan 已提交
137 138
void DisableProfiler(EventSortingKey sorted_key,
                     const std::string& profile_path);
139

X
Xin Pan 已提交
140 141
const int kEnableProfiler = 1;
const int kDisableProfiler = 2;
142 143 144 145 146
// Test if the profiler is currently enabled.
bool IsProfileEnabled();
// Whether the trainer should send profiling state to PS.
bool ShouldSendProfileState();
// Mark current process as PS by assigning a lister id.
X
Xin Pan 已提交
147
void SetProfileListener();
148 149
int64_t ListenerId();

150 151 152 153
#ifdef PADDLE_WITH_CUDA
void DummyKernelAndEvent();
#endif

D
dangqingqing 已提交
154 155
}  // namespace platform
}  // namespace paddle