提交 df3b250c 编写于 作者: Y Yibing Liu

Fix bad_alloc bug & refine code in profiler

上级 367a5c9e
...@@ -15,12 +15,16 @@ limitations under the License. */ ...@@ -15,12 +15,16 @@ limitations under the License. */
#include "paddle/platform/profiler.h" #include "paddle/platform/profiler.h"
#include <iomanip> #include <iomanip>
#include <map> #include <map>
#include "gflags/gflags.h"
#include "glog/logging.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
// The profiler state, the initial value is ProfilerState::kDisabled // The profiler state, the initial value is ProfilerState::kDisabled
static ProfilerState g_state = ProfilerState::kDisabled; static ProfilerState g_state = ProfilerState::kDisabled;
// To record which timer the profiler used, CUDA or CPU.
static std::string g_profiler_place = "";
// The thread local event list only can be accessed by the specific thread // The thread local event list only can be accessed by the specific thread
// The thread index of each thread // The thread index of each thread
static thread_local int32_t g_thread_id; static thread_local int32_t g_thread_id;
...@@ -45,10 +49,7 @@ inline uint64_t GetTimeInNsec() { ...@@ -45,10 +49,7 @@ inline uint64_t GetTimeInNsec() {
Event::Event(EventKind kind, std::string name, uint32_t thread_id, Event::Event(EventKind kind, std::string name, uint32_t thread_id,
DeviceContext* dev_ctx) DeviceContext* dev_ctx)
: kind_(kind), : kind_(kind), name_(name), thread_id_(thread_id), has_cuda_(false) {
name_(std::move(name)),
thread_id_(thread_id),
has_cuda_(false) {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
auto* cuda_dev_ctx = static_cast<const CUDADeviceContext*>(dev_ctx); auto* cuda_dev_ctx = static_cast<const CUDADeviceContext*>(dev_ctx);
if (cuda_dev_ctx) { if (cuda_dev_ctx) {
...@@ -115,22 +116,27 @@ inline EventList& GetEventList() { ...@@ -115,22 +116,27 @@ inline EventList& GetEventList() {
} }
void Mark(const std::string& name, DeviceContext* dev_ctx) { void Mark(const std::string& name, DeviceContext* dev_ctx) {
GetEventList().Record(EventKind::kMark, std::move(name), g_thread_id, GetEventList().Record(EventKind::kMark, name, g_thread_id, dev_ctx);
dev_ctx); }
void PushEvent(const std::string& name, DeviceContext* dev_ctx) {
GetEventList().Record(EventKind::kPushRange, name, g_thread_id, dev_ctx);
}
void PopEvent(const std::string& name, DeviceContext* dev_ctx) {
GetEventList().Record(EventKind::kPopRange, name, g_thread_id, dev_ctx);
} }
RecordEvent::RecordEvent(const std::string& name, DeviceContext* dev_ctx) { RecordEvent::RecordEvent(const std::string& name, DeviceContext* dev_ctx) {
if (g_state == ProfilerState::kDisabled) return; if (g_state == ProfilerState::kDisabled) return;
dev_ctx_ = dev_ctx; dev_ctx_ = dev_ctx;
name_ = name; name_ = name;
GetEventList().Record(EventKind::kPushRange, std::move(name), g_thread_id, PushEvent(name_, dev_ctx_);
dev_ctx_);
} }
RecordEvent::~RecordEvent() { RecordEvent::~RecordEvent() {
if (g_state == ProfilerState::kDisabled) return; if (g_state == ProfilerState::kDisabled) return;
GetEventList().Record(EventKind::kPopRange, std::move(name_), g_thread_id, PopEvent(name_, dev_ctx_);
dev_ctx_);
} }
void EnableProfiler(ProfilerState state) { void EnableProfiler(ProfilerState state) {
...@@ -141,6 +147,7 @@ void EnableProfiler(ProfilerState state) { ...@@ -141,6 +147,7 @@ void EnableProfiler(ProfilerState state) {
"The profiling state should be disabled when calling ", "The profiling state should be disabled when calling ",
"EnableProfiler."); "EnableProfiler.");
g_state = state; g_state = state;
g_profiler_place = (g_state == ProfilerState::kCUDA) ? "CUDA" : "CPU";
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
if (g_state == ProfilerState::kCUDA) { if (g_state == ProfilerState::kCUDA) {
// Generate some dummy evenets first to reduce the startup overhead. // Generate some dummy evenets first to reduce the startup overhead.
...@@ -172,56 +179,8 @@ std::vector<std::vector<Event>> DisableProfiler() { ...@@ -172,56 +179,8 @@ std::vector<std::vector<Event>> DisableProfiler() {
return result; return result;
} }
void PushEvent(const std::string& name, DeviceContext* dev_ctx) {
GetEventList().Record(EventKind::kPushRange, std::move(name), g_thread_id,
dev_ctx);
}
void PopEvent(const std::string& name, DeviceContext* dev_ctx) {
GetEventList().Record(EventKind::kPopRange, std::move(name), g_thread_id,
dev_ctx);
}
void ParseEvents(std::vector<std::vector<Event>>& events, void ParseEvents(std::vector<std::vector<Event>>& events,
EventSortingKey sorted_by) { EventSortingKey sorted_by) {
// Output header information
std::cout << "------------------------->"
<< " Profiling Report "
<< "<-------------------------"
<< "\n\n";
#ifdef PADDLE_WITH_CUDA
std::cout << "Place: GPU" << std::endl;
#else
std::cout << "Place: CPU" << std::endl;
#endif
std::cout << "Time unit: ms" << std::endl;
std::string sort_domain = "event end time";
switch (sorted_by) {
case EventSortingKey::kCalls:
sort_domain = "number of calls";
break;
case EventSortingKey::kTotal:
sort_domain = "total time";
break;
case EventSortingKey::kMin:
sort_domain = "minimum time";
break;
case EventSortingKey::kMax:
sort_domain = "maximum time";
break;
case EventSortingKey::kAve:
sort_domain = "average time";
break;
default:
if (sorted_by != EventSortingKey::kDefault) {
std::cout << "Warning: unkown sorting key. ";
sorted_by = EventSortingKey::kDefault;
}
}
std::cout << "Sorted by " << sort_domain
<< " in descending order in the same thread\n\n";
// Parse events
std::vector<std::vector<EventItem>> events_table; std::vector<std::vector<EventItem>> events_table;
size_t max_name_width = 0; size_t max_name_width = 0;
for (size_t i = 0; i < events.size(); i++) { for (size_t i = 0; i < events.size(); i++) {
...@@ -234,19 +193,19 @@ void ParseEvents(std::vector<std::vector<Event>>& events, ...@@ -234,19 +193,19 @@ void ParseEvents(std::vector<std::vector<Event>>& events,
pushed_events.push_back(events[i][j]); pushed_events.push_back(events[i][j]);
} else if (events[i][j].kind() == "pop") { } else if (events[i][j].kind() == "pop") {
std::list<Event>::reverse_iterator rit = pushed_events.rbegin(); std::list<Event>::reverse_iterator rit = pushed_events.rbegin();
while (rit->name() != events[i][j].name() && while (rit != pushed_events.rend() &&
rit != pushed_events.rend()) { rit->name() != events[i][j].name()) {
++rit; ++rit;
} }
if (rit != pushed_events.rend()) { if (rit != pushed_events.rend()) {
#ifdef PADDLE_WITH_CUDA double event_time = (g_state == ProfilerState::kCUDA)
double event_time = rit->CudaElapsedMs(events[i][j]); ? rit->CudaElapsedMs(events[i][j])
#else : rit->CpuElapsedMs(events[i][j]);
double event_time = rit->CpuElapsedMs(events[i][j]);
#endif
std::string event_name = std::string event_name =
"thread" + std::to_string(rit->thread_id()) + "::" + rit->name(); "thread" + std::to_string(rit->thread_id()) + "::" + rit->name();
max_name_width = std::max(max_name_width, event_name.size()); max_name_width = std::max(max_name_width, event_name.size());
if (event_idx.find(event_name) == event_idx.end()) { if (event_idx.find(event_name) == event_idx.end()) {
event_idx[event_name] = event_items.size(); event_idx[event_name] = event_items.size();
EventItem event_item = {event_name, 1, event_time, EventItem event_item = {event_name, 1, event_time,
...@@ -264,11 +223,13 @@ void ParseEvents(std::vector<std::vector<Event>>& events, ...@@ -264,11 +223,13 @@ void ParseEvents(std::vector<std::vector<Event>>& events,
event_items[index].max_time = event_items[index].max_time =
std::max(event_time, event_items[index].max_time); std::max(event_time, event_items[index].max_time);
} }
// remove the start marker from the list // remove the start marker from the list
pushed_events.erase((++rit).base()); pushed_events.erase((++rit).base());
} else { } else {
std::cout << "Warning: can not find the start marker of event " LOG(WARNING) << "Cannot find the push marker of event \'"
<< events[i][j].name(); << events[i][j].name()
<< "\', which will be ignored in profiling report.";
} }
} }
} }
...@@ -294,19 +255,65 @@ void ParseEvents(std::vector<std::vector<Event>>& events, ...@@ -294,19 +255,65 @@ void ParseEvents(std::vector<std::vector<Event>>& events,
} }
}); });
} }
events_table.push_back(event_items); events_table.push_back(event_items);
// To check whether there are events with `push` but without `pop`
std::list<Event>::reverse_iterator rit = pushed_events.rbegin();
while (rit != pushed_events.rend()) {
if (rit->kind() == "push") {
LOG(WARNING) << "Cannot find the pop marker of event \'" << rit->name()
<< "\', which will be ignored in profiling report.";
}
++rit;
}
} }
// output events table
// Print report
PrintProfilingReport(events_table, sorted_by, max_name_width + 4, 12);
}
void PrintProfilingReport(std::vector<std::vector<EventItem>>& events_table,
EventSortingKey sorted_by, const size_t name_width,
const size_t data_width) {
if (g_profiler_place == "") return;
// Output header information
std::cout << "\n------------------------->"
<< " Profiling Report "
<< "<-------------------------\n\n";
std::cout << "Place: " << g_profiler_place << std::endl;
std::cout << "Time unit: ms" << std::endl;
std::string sort_domain = "event end time";
switch (sorted_by) {
case EventSortingKey::kCalls:
sort_domain = "number of calls";
break;
case EventSortingKey::kTotal:
sort_domain = "total time";
break;
case EventSortingKey::kMin:
sort_domain = "minimum time";
break;
case EventSortingKey::kMax:
sort_domain = "maximum time";
break;
case EventSortingKey::kAve:
sort_domain = "average time";
break;
default:
break;
}
std::cout << "Sorted by " << sort_domain
<< " in descending order in the same thread\n\n";
// Output events table
std::cout.setf(std::ios::left); std::cout.setf(std::ios::left);
const int data_width = 12; std::cout << std::setw(name_width) << "Event" << std::setw(data_width)
std::cout << std::setw(max_name_width + 4) << "Event" << std::setw(data_width)
<< "Calls" << std::setw(data_width) << "Total" << "Calls" << std::setw(data_width) << "Total"
<< std::setw(data_width) << "Min." << std::setw(data_width) << std::setw(data_width) << "Min." << std::setw(data_width)
<< "Max." << std::setw(data_width) << "Ave." << std::endl; << "Max." << std::setw(data_width) << "Ave." << std::endl;
for (size_t i = 0; i < events_table.size(); ++i) { for (size_t i = 0; i < events_table.size(); ++i) {
for (size_t j = 0; j < events_table[i].size(); ++j) { for (size_t j = 0; j < events_table[i].size(); ++j) {
EventItem& event_item = events_table[i][j]; EventItem& event_item = events_table[i][j];
std::cout << std::setw(max_name_width + 4) << event_item.name std::cout << std::setw(name_width) << event_item.name
<< std::setw(data_width) << event_item.calls << std::setw(data_width) << event_item.calls
<< std::setw(data_width) << event_item.total_time << std::setw(data_width) << event_item.total_time
<< std::setw(data_width) << event_item.min_time << std::setw(data_width) << event_item.min_time
...@@ -314,6 +321,7 @@ void ParseEvents(std::vector<std::vector<Event>>& events, ...@@ -314,6 +321,7 @@ void ParseEvents(std::vector<std::vector<Event>>& events,
<< std::setw(data_width) << event_item.ave_time << std::endl; << std::setw(data_width) << event_item.ave_time << std::endl;
} }
} }
std::cout << std::endl;
} }
} // namespace platform } // namespace platform
......
...@@ -134,5 +134,9 @@ enum EventSortingKey { kDefault, kCalls, kTotal, kMin, kMax, kAve }; ...@@ -134,5 +134,9 @@ enum EventSortingKey { kDefault, kCalls, kTotal, kMin, kMax, kAve };
void ParseEvents(std::vector<std::vector<Event>>&, void ParseEvents(std::vector<std::vector<Event>>&,
EventSortingKey sorted_by = EventSortingKey::kDefault); EventSortingKey sorted_by = EventSortingKey::kDefault);
// Print results
void PrintProfilingReport(std::vector<std::vector<EventItem>>& events_table,
EventSortingKey sorted_by, const size_t name_width,
const size_t data_width);
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
...@@ -99,8 +99,12 @@ TEST(RecordEvent, RecordEvent) { ...@@ -99,8 +99,12 @@ TEST(RecordEvent, RecordEvent) {
int counter = 1; int counter = 1;
while (counter != i * 1000) counter++; while (counter != i * 1000) counter++;
} }
// Bad Usage:
PushEvent("event_without_pop", dev_ctx);
PopEvent("event_without_push", dev_ctx);
std::vector<std::vector<Event>> events = paddle::platform::DisableProfiler(); std::vector<std::vector<Event>> events = paddle::platform::DisableProfiler();
// Will remove from test before merging // Will remove parsing-related code from test later
ParseEvents(events, EventSortingKey::kTotal); ParseEvents(events, EventSortingKey::kTotal);
int cuda_startup_count = 0; int cuda_startup_count = 0;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册