diff --git a/paddle/platform/profiler.cc b/paddle/platform/profiler.cc index ea16e56076761d63246a9ef86f03604bc3f7085b..abb1e6a89cf9eab69d06a05c70489b6bcef92c8a 100644 --- a/paddle/platform/profiler.cc +++ b/paddle/platform/profiler.cc @@ -15,12 +15,16 @@ limitations under the License. */ #include "paddle/platform/profiler.h" #include #include +#include "gflags/gflags.h" +#include "glog/logging.h" namespace paddle { namespace platform { // The profiler state, the initial value is ProfilerState::kDisabled static ProfilerState g_state = ProfilerState::kDisabled; +// To record which timer the profiler used, CUDA or CPU. +static std::string g_profiler_place = ""; // The thread local event list only can be accessed by the specific thread // The thread index of each thread static thread_local int32_t g_thread_id; @@ -45,10 +49,7 @@ inline uint64_t GetTimeInNsec() { Event::Event(EventKind kind, std::string name, uint32_t thread_id, DeviceContext* dev_ctx) - : kind_(kind), - name_(std::move(name)), - thread_id_(thread_id), - has_cuda_(false) { + : kind_(kind), name_(name), thread_id_(thread_id), has_cuda_(false) { #ifdef PADDLE_WITH_CUDA auto* cuda_dev_ctx = static_cast(dev_ctx); if (cuda_dev_ctx) { @@ -115,22 +116,27 @@ inline EventList& GetEventList() { } void Mark(const std::string& name, DeviceContext* dev_ctx) { - GetEventList().Record(EventKind::kMark, std::move(name), g_thread_id, - dev_ctx); + GetEventList().Record(EventKind::kMark, name, g_thread_id, dev_ctx); +} + +void PushEvent(const std::string& name, DeviceContext* dev_ctx) { + GetEventList().Record(EventKind::kPushRange, name, g_thread_id, dev_ctx); +} + +void PopEvent(const std::string& name, DeviceContext* dev_ctx) { + GetEventList().Record(EventKind::kPopRange, name, g_thread_id, dev_ctx); } RecordEvent::RecordEvent(const std::string& name, DeviceContext* dev_ctx) { if (g_state == ProfilerState::kDisabled) return; dev_ctx_ = dev_ctx; name_ = name; - GetEventList().Record(EventKind::kPushRange, std::move(name), g_thread_id, - dev_ctx_); + PushEvent(name_, dev_ctx_); } RecordEvent::~RecordEvent() { if (g_state == ProfilerState::kDisabled) return; - GetEventList().Record(EventKind::kPopRange, std::move(name_), g_thread_id, - dev_ctx_); + PopEvent(name_, dev_ctx_); } void EnableProfiler(ProfilerState state) { @@ -141,6 +147,7 @@ void EnableProfiler(ProfilerState state) { "The profiling state should be disabled when calling ", "EnableProfiler."); g_state = state; + g_profiler_place = (g_state == ProfilerState::kCUDA) ? "CUDA" : "CPU"; #ifdef PADDLE_WITH_CUDA if (g_state == ProfilerState::kCUDA) { // Generate some dummy evenets first to reduce the startup overhead. @@ -172,56 +179,8 @@ std::vector> DisableProfiler() { return result; } -void PushEvent(const std::string& name, DeviceContext* dev_ctx) { - GetEventList().Record(EventKind::kPushRange, std::move(name), g_thread_id, - dev_ctx); -} - -void PopEvent(const std::string& name, DeviceContext* dev_ctx) { - GetEventList().Record(EventKind::kPopRange, std::move(name), g_thread_id, - dev_ctx); -} - void ParseEvents(std::vector>& events, EventSortingKey sorted_by) { - // Output header information - std::cout << "------------------------->" - << " Profiling Report " - << "<-------------------------" - << "\n\n"; -#ifdef PADDLE_WITH_CUDA - std::cout << "Place: GPU" << std::endl; -#else - std::cout << "Place: CPU" << std::endl; -#endif - std::cout << "Time unit: ms" << std::endl; - std::string sort_domain = "event end time"; - switch (sorted_by) { - case EventSortingKey::kCalls: - sort_domain = "number of calls"; - break; - case EventSortingKey::kTotal: - sort_domain = "total time"; - break; - case EventSortingKey::kMin: - sort_domain = "minimum time"; - break; - case EventSortingKey::kMax: - sort_domain = "maximum time"; - break; - case EventSortingKey::kAve: - sort_domain = "average time"; - break; - default: - if (sorted_by != EventSortingKey::kDefault) { - std::cout << "Warning: unkown sorting key. "; - sorted_by = EventSortingKey::kDefault; - } - } - std::cout << "Sorted by " << sort_domain - << " in descending order in the same thread\n\n"; - - // Parse events std::vector> events_table; size_t max_name_width = 0; for (size_t i = 0; i < events.size(); i++) { @@ -234,19 +193,19 @@ void ParseEvents(std::vector>& events, pushed_events.push_back(events[i][j]); } else if (events[i][j].kind() == "pop") { std::list::reverse_iterator rit = pushed_events.rbegin(); - while (rit->name() != events[i][j].name() && - rit != pushed_events.rend()) { + while (rit != pushed_events.rend() && + rit->name() != events[i][j].name()) { ++rit; } + if (rit != pushed_events.rend()) { -#ifdef PADDLE_WITH_CUDA - double event_time = rit->CudaElapsedMs(events[i][j]); -#else - double event_time = rit->CpuElapsedMs(events[i][j]); -#endif + double event_time = (g_state == ProfilerState::kCUDA) + ? rit->CudaElapsedMs(events[i][j]) + : rit->CpuElapsedMs(events[i][j]); std::string event_name = "thread" + std::to_string(rit->thread_id()) + "::" + rit->name(); max_name_width = std::max(max_name_width, event_name.size()); + if (event_idx.find(event_name) == event_idx.end()) { event_idx[event_name] = event_items.size(); EventItem event_item = {event_name, 1, event_time, @@ -264,11 +223,13 @@ void ParseEvents(std::vector>& events, event_items[index].max_time = std::max(event_time, event_items[index].max_time); } + // remove the start marker from the list pushed_events.erase((++rit).base()); } else { - std::cout << "Warning: can not find the start marker of event " - << events[i][j].name(); + LOG(WARNING) << "Cannot find the push marker of event \'" + << events[i][j].name() + << "\', which will be ignored in profiling report."; } } } @@ -294,19 +255,65 @@ void ParseEvents(std::vector>& events, } }); } + events_table.push_back(event_items); + // To check whether there are events with `push` but without `pop` + std::list::reverse_iterator rit = pushed_events.rbegin(); + while (rit != pushed_events.rend()) { + if (rit->kind() == "push") { + LOG(WARNING) << "Cannot find the pop marker of event \'" << rit->name() + << "\', which will be ignored in profiling report."; + } + ++rit; + } } - // output events table + + // Print report + PrintProfilingReport(events_table, sorted_by, max_name_width + 4, 12); +} + +void PrintProfilingReport(std::vector>& events_table, + EventSortingKey sorted_by, const size_t name_width, + const size_t data_width) { + if (g_profiler_place == "") return; + // Output header information + std::cout << "\n------------------------->" + << " Profiling Report " + << "<-------------------------\n\n"; + std::cout << "Place: " << g_profiler_place << std::endl; + std::cout << "Time unit: ms" << std::endl; + std::string sort_domain = "event end time"; + switch (sorted_by) { + case EventSortingKey::kCalls: + sort_domain = "number of calls"; + break; + case EventSortingKey::kTotal: + sort_domain = "total time"; + break; + case EventSortingKey::kMin: + sort_domain = "minimum time"; + break; + case EventSortingKey::kMax: + sort_domain = "maximum time"; + break; + case EventSortingKey::kAve: + sort_domain = "average time"; + break; + default: + break; + } + std::cout << "Sorted by " << sort_domain + << " in descending order in the same thread\n\n"; + // Output events table std::cout.setf(std::ios::left); - const int data_width = 12; - std::cout << std::setw(max_name_width + 4) << "Event" << std::setw(data_width) + std::cout << std::setw(name_width) << "Event" << std::setw(data_width) << "Calls" << std::setw(data_width) << "Total" << std::setw(data_width) << "Min." << std::setw(data_width) << "Max." << std::setw(data_width) << "Ave." << std::endl; for (size_t i = 0; i < events_table.size(); ++i) { for (size_t j = 0; j < events_table[i].size(); ++j) { EventItem& event_item = events_table[i][j]; - std::cout << std::setw(max_name_width + 4) << event_item.name + std::cout << std::setw(name_width) << event_item.name << std::setw(data_width) << event_item.calls << std::setw(data_width) << event_item.total_time << std::setw(data_width) << event_item.min_time @@ -314,6 +321,7 @@ void ParseEvents(std::vector>& events, << std::setw(data_width) << event_item.ave_time << std::endl; } } + std::cout << std::endl; } } // namespace platform diff --git a/paddle/platform/profiler.h b/paddle/platform/profiler.h index d51ec6443217c6fda882e22dd07f80c12f745d9d..f97a586787780aeea2aeeb970174a8b34544ac95 100644 --- a/paddle/platform/profiler.h +++ b/paddle/platform/profiler.h @@ -134,5 +134,9 @@ enum EventSortingKey { kDefault, kCalls, kTotal, kMin, kMax, kAve }; void ParseEvents(std::vector>&, EventSortingKey sorted_by = EventSortingKey::kDefault); +// Print results +void PrintProfilingReport(std::vector>& events_table, + EventSortingKey sorted_by, const size_t name_width, + const size_t data_width); } // namespace platform } // namespace paddle diff --git a/paddle/platform/profiler_test.cc b/paddle/platform/profiler_test.cc index 3800f3914f18881f336c5d9920d797fbafab232d..13dea713c71e147ed5dd8d090e92d86c96256c09 100644 --- a/paddle/platform/profiler_test.cc +++ b/paddle/platform/profiler_test.cc @@ -99,8 +99,12 @@ TEST(RecordEvent, RecordEvent) { int counter = 1; while (counter != i * 1000) counter++; } + + // Bad Usage: + PushEvent("event_without_pop", dev_ctx); + PopEvent("event_without_push", dev_ctx); std::vector> events = paddle::platform::DisableProfiler(); - // Will remove from test before merging + // Will remove parsing-related code from test later ParseEvents(events, EventSortingKey::kTotal); int cuda_startup_count = 0;