提交 0358fd01 编写于 作者: D dangqingqing

Refine profiler code.

上级 05a733b0
...@@ -120,8 +120,7 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id, ...@@ -120,8 +120,7 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
VLOG(3) << op->DebugStringEx(local_scope); VLOG(3) << op->DebugStringEx(local_scope);
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
auto dev_ctx = const_cast<platform::DeviceContext*>(pool.Get(place_)); platform::RecordEvent record_event(op->Type(), pool.Get(place_));
platform::RecordEvent record_event(op->Type(), dev_ctx);
op->Run(*local_scope, place_); op->Run(*local_scope, place_);
if (FLAGS_do_memory_benchmark) { if (FLAGS_do_memory_benchmark) {
......
...@@ -47,16 +47,16 @@ inline uint64_t GetTimeInNsec() { ...@@ -47,16 +47,16 @@ inline uint64_t GetTimeInNsec() {
} }
Event::Event(EventKind kind, std::string name, uint32_t thread_id, Event::Event(EventKind kind, std::string name, uint32_t thread_id,
DeviceContext* dev_ctx) const DeviceContext* dev_ctx)
: kind_(kind), name_(name), thread_id_(thread_id), has_cuda_(false) { : kind_(kind), name_(name), thread_id_(thread_id), has_cuda_(false) {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
auto* cuda_dev_ctx = static_cast<const CUDADeviceContext*>(dev_ctx); has_cuda_ = dev_ctx ? platform::is_gpu_place(dev_ctx->GetPlace()) : false;
if (cuda_dev_ctx) { if (has_cuda_) {
auto* cuda_dev_ctx = static_cast<const CUDADeviceContext*>(dev_ctx);
PADDLE_ENFORCE(cudaGetDevice(&device_)); PADDLE_ENFORCE(cudaGetDevice(&device_));
PADDLE_ENFORCE(cudaEventCreate(&event_)); PADDLE_ENFORCE(cudaEventCreate(&event_));
auto stream = cuda_dev_ctx->stream(); auto stream = cuda_dev_ctx->stream();
PADDLE_ENFORCE(cudaEventRecord(event_, stream)); PADDLE_ENFORCE(cudaEventRecord(event_, stream));
has_cuda_ = true;
} }
#endif #endif
cpu_ns_ = GetTimeInNsec(); cpu_ns_ = GetTimeInNsec();
...@@ -114,19 +114,20 @@ inline EventList& GetEventList() { ...@@ -114,19 +114,20 @@ inline EventList& GetEventList() {
return *g_event_list; return *g_event_list;
} }
void Mark(const std::string& name, DeviceContext* dev_ctx) { void Mark(const std::string& name, const DeviceContext* dev_ctx) {
GetEventList().Record(EventKind::kMark, name, g_thread_id, dev_ctx); GetEventList().Record(EventKind::kMark, name, g_thread_id, dev_ctx);
} }
void PushEvent(const std::string& name, DeviceContext* dev_ctx) { void PushEvent(const std::string& name, const DeviceContext* dev_ctx) {
GetEventList().Record(EventKind::kPushRange, name, g_thread_id, dev_ctx); GetEventList().Record(EventKind::kPushRange, name, g_thread_id, dev_ctx);
} }
void PopEvent(const std::string& name, DeviceContext* dev_ctx) { void PopEvent(const std::string& name, const DeviceContext* dev_ctx) {
GetEventList().Record(EventKind::kPopRange, name, g_thread_id, dev_ctx); GetEventList().Record(EventKind::kPopRange, name, g_thread_id, dev_ctx);
} }
RecordEvent::RecordEvent(const std::string& name, DeviceContext* dev_ctx) { RecordEvent::RecordEvent(const std::string& name,
const DeviceContext* dev_ctx) {
if (g_state == ProfilerState::kDisabled) return; if (g_state == ProfilerState::kDisabled) return;
dev_ctx_ = dev_ctx; dev_ctx_ = dev_ctx;
name_ = name; name_ = name;
...@@ -155,6 +156,7 @@ void EnableProfiler(ProfilerState state) { ...@@ -155,6 +156,7 @@ void EnableProfiler(ProfilerState state) {
DeviceContext* dev_ctx = new CUDADeviceContext(CUDAPlace(d)); DeviceContext* dev_ctx = new CUDADeviceContext(CUDAPlace(d));
Mark("_cuda_startup_", dev_ctx); Mark("_cuda_startup_", dev_ctx);
dev_ctx->Wait(); dev_ctx->Wait();
delete dev_ctx;
}); });
} }
} }
......
...@@ -29,7 +29,7 @@ class Event { ...@@ -29,7 +29,7 @@ class Event {
// The DeviceContext is used to get the cuda stream. // The DeviceContext is used to get the cuda stream.
// If CPU profiling mode, can pass nullptr. // If CPU profiling mode, can pass nullptr.
Event(EventKind kind, std::string name, uint32_t thread_id, Event(EventKind kind, std::string name, uint32_t thread_id,
DeviceContext* dev_ctx); const DeviceContext* dev_ctx);
std::string kind() const; std::string kind() const;
std::string name() const { return name_; } std::string name() const { return name_; }
...@@ -95,19 +95,19 @@ enum ProfilerState { ...@@ -95,19 +95,19 @@ enum ProfilerState {
kCUDA, // GPU profiling state kCUDA, // GPU profiling state
}; };
void Mark(const std::string& name, DeviceContext* dev_ctx); void Mark(const std::string& name, const DeviceContext* dev_ctx);
void PushEvent(const std::string& name, DeviceContext* dev_ctx); void PushEvent(const std::string& name, const DeviceContext* dev_ctx);
void PopEvent(const std::string& name, DeviceContext* dev_ctx); void PopEvent(const std::string& name, const DeviceContext* dev_ctx);
struct RecordEvent { struct RecordEvent {
explicit RecordEvent(const std::string& name, DeviceContext* dev_ctx); explicit RecordEvent(const std::string& name, const DeviceContext* dev_ctx);
~RecordEvent(); ~RecordEvent();
// The device context is used by Event to get the current cuda stream. // The device context is used by Event to get the current cuda stream.
DeviceContext* dev_ctx_; const DeviceContext* dev_ctx_;
// Event name // Event name
std::string name_; std::string name_;
}; };
......
...@@ -81,10 +81,11 @@ def profiler(state, sorted_key=None): ...@@ -81,10 +81,11 @@ def profiler(state, sorted_key=None):
to add more records. to add more records.
Args: Args:
state (string) : The profiling state, It should be 'CPU' or 'GPU'. state (string) : The profiling state, which should be 'CPU' or 'GPU',
Although users may define CPUPlace or CUDAPlace when using Fluid, telling the profiler to use CPU timer or GPU timer for profiling.
the profiler doesn't get the state based on this Place. Since the Although users may have already specified the execution place
implementation is an independent part from the Fluid. (CPUPlace/CUDAPlace) in the begining, for flexibility the profiler
would not inherit this place.
sorted_key (string) : If None, the profiling results will be printed sorted_key (string) : If None, the profiling results will be printed
in the order of first end time of events. Otherwise, the profiling in the order of first end time of events. Otherwise, the profiling
results will be sorted by the this flag. This flag should be one results will be sorted by the this flag. This flag should be one
......
...@@ -41,8 +41,8 @@ class TestProfiler(unittest.TestCase): ...@@ -41,8 +41,8 @@ class TestProfiler(unittest.TestCase):
exe.run(fluid.default_main_program(), feed={'data': input}) exe.run(fluid.default_main_program(), feed={'data': input})
os.remove(output_file) os.remove(output_file)
def profiler(self, state): def net_profiler(self, state):
if state == 'GPU' and core.is_compile_gpu(): if state == 'GPU' and not core.is_compile_gpu():
return return
startup_program = fluid.Program() startup_program = fluid.Program()
main_program = fluid.Program() main_program = fluid.Program()
...@@ -79,11 +79,11 @@ class TestProfiler(unittest.TestCase): ...@@ -79,11 +79,11 @@ class TestProfiler(unittest.TestCase):
acc = np.array(outs[1]) acc = np.array(outs[1])
pass_acc = accuracy.eval(exe) pass_acc = accuracy.eval(exe)
def not_test_cpu_profiler(self): def test_cpu_profiler(self):
self.profiler('CPU') self.net_profiler('CPU')
def not_test_cuda_profiler(self): def test_cuda_profiler(self):
self.profiler('GPU') self.net_profiler('GPU')
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册