提交 0aa03a82 编写于 作者: Y Yibing Liu

Enable sorting the profiling result by different keys

上级 2d94eca8
...@@ -182,47 +182,88 @@ void PopEvent(const std::string& name, DeviceContext* dev_ctx) { ...@@ -182,47 +182,88 @@ void PopEvent(const std::string& name, DeviceContext* dev_ctx) {
dev_ctx); dev_ctx);
} }
void ParseEvents(std::vector<std::vector<Event>>& events) { void ParseEvents(std::vector<std::vector<Event>>& events,
// Event name :: counts :: ave :: min :: max :: total EventSortingKey sorted_by) {
std::map<std::string, std::tuple<int, double, double, double, double>> // Output header information
events_table; std::cout << "------------------------->"
<< " Profiling Report "
<< "<-------------------------"
<< "\n\n";
#ifdef PADDLE_WITH_CUDA
std::cout << "Place: GPU" << std::endl;
#else
std::cout << "Place: CPU" << std::endl;
#endif
std::cout << "Time unit: ms" << std::endl;
std::string sort_domain = "event end time";
switch (sorted_by) {
case EventSortingKey::kCalls:
sort_domain = "number of calls";
break;
case EventSortingKey::kTotal:
sort_domain = "total time";
break;
case EventSortingKey::kMin:
sort_domain = "minimum time";
break;
case EventSortingKey::kMax:
sort_domain = "maximum time";
break;
case EventSortingKey::kAve:
sort_domain = "average time";
break;
default:
if (sorted_by != EventSortingKey::kDefault) {
std::cout << "Warning: unkown sorting key. ";
sorted_by = EventSortingKey::kDefault;
}
}
std::cout << "Sorted by " << sort_domain
<< " in descending order in the same thread\n\n";
// Parse events
std::vector<std::vector<EventItem>> events_table;
size_t max_name_width = 0; size_t max_name_width = 0;
for (size_t i = 0; i < events.size(); i++) { for (size_t i = 0; i < events.size(); i++) {
std::list<Event> pushed_events; std::list<Event> pushed_events;
std::vector<EventItem> event_items;
std::unordered_map<std::string, int> event_idx;
for (size_t j = 0; j < events[i].size(); j++) { for (size_t j = 0; j < events[i].size(); j++) {
if (events[i][j].kind() == "push") { if (events[i][j].kind() == "push") {
pushed_events.push_back(events[i][j]); pushed_events.push_back(events[i][j]);
} } else if (events[i][j].kind() == "pop") {
if (events[i][j].kind() == "pop") {
std::list<Event>::reverse_iterator rit = pushed_events.rbegin(); std::list<Event>::reverse_iterator rit = pushed_events.rbegin();
while (rit->name() != events[i][j].name() && while (rit->name() != events[i][j].name() &&
rit != pushed_events.rend()) { rit != pushed_events.rend()) {
++rit; ++rit;
} }
if (rit != pushed_events.rend()) { if (rit != pushed_events.rend()) {
// get event time in ms
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
double event_time = rit->CudaElapsedUs(events[i][j]); double event_time = rit->CudaElapsedUs(events[i][j]) / 1000.0;
#else #else
double event_time = rit->CpuElapsedUs(events[i][j]); double event_time = rit->CpuElapsedUs(events[i][j]) / 1000.0;
#endif #endif
std::string event_name = std::string event_name =
"thread" + std::to_string(rit->thread_id()) + "::" + rit->name(); "thread" + std::to_string(rit->thread_id()) + "::" + rit->name();
max_name_width = std::max(max_name_width, event_name.size()); max_name_width = std::max(max_name_width, event_name.size());
if (events_table.find(event_name) == events_table.end()) { if (event_idx.find(event_name) == event_idx.end()) {
events_table[event_name] = event_idx[event_name] = event_items.size();
std::make_tuple(1, event_time, event_time, event_time, 0); EventItem event_item = {event_name, 1, event_time,
event_time, event_time, event_time};
event_items.push_back(event_item);
} else { } else {
std::get<0>(events_table[event_name]) += 1; int index = event_idx[event_name];
event_items[index].calls += 1;
// total time // total time
std::get<1>(events_table[event_name]) += event_time; event_items[index].total_time += event_time;
// min time // min time
if (std::get<2>(events_table[event_name]) > event_time) { event_items[index].min_time =
std::get<2>(events_table[event_name]) = event_time; std::min(event_time, event_items[index].min_time);
}
// max time // max time
if (std::get<3>(events_table[event_name]) < event_time) { event_items[index].max_time =
std::get<3>(events_table[event_name]) = event_time; std::max(event_time, event_items[index].max_time);
}
} }
// remove the start marker from the list // remove the start marker from the list
pushed_events.erase((++rit).base()); pushed_events.erase((++rit).base());
...@@ -232,6 +273,29 @@ void ParseEvents(std::vector<std::vector<Event>>& events) { ...@@ -232,6 +273,29 @@ void ParseEvents(std::vector<std::vector<Event>>& events) {
} }
} }
} }
// average time
for (auto& item : event_items) {
item.ave_time = item.total_time / item.calls;
}
// sort
if (sorted_by != EventSortingKey::kDefault) {
std::sort(event_items.begin(), event_items.end(),
[&](EventItem& a, EventItem& b) {
switch (sorted_by) {
case EventSortingKey::kCalls:
return a.calls > b.calls;
case EventSortingKey::kTotal:
return a.total_time > b.total_time;
case EventSortingKey::kMin:
return a.min_time > b.min_time;
case EventSortingKey::kMax:
return a.max_time > b.max_time;
default:
return a.ave_time > b.ave_time;
}
});
}
events_table.push_back(event_items);
} }
// output events table // output events table
std::cout.setf(std::ios::left); std::cout.setf(std::ios::left);
...@@ -240,18 +304,16 @@ void ParseEvents(std::vector<std::vector<Event>>& events) { ...@@ -240,18 +304,16 @@ void ParseEvents(std::vector<std::vector<Event>>& events) {
<< "Calls" << std::setw(data_width) << "Total" << "Calls" << std::setw(data_width) << "Total"
<< std::setw(data_width) << "Min." << std::setw(data_width) << std::setw(data_width) << "Min." << std::setw(data_width)
<< "Max." << std::setw(data_width) << "Ave." << std::endl; << "Max." << std::setw(data_width) << "Ave." << std::endl;
for (std::map<std::string, for (size_t i = 0; i < events_table.size(); ++i) {
std::tuple<int, double, double, double, double>>::iterator it = for (size_t j = 0; j < events_table[i].size(); ++j) {
events_table.begin(); EventItem& event_item = events_table[i][j];
it != events_table.end(); ++it) { std::cout << std::setw(max_name_width + 4) << event_item.name
// average time << std::setw(data_width) << event_item.calls
std::get<4>(it->second) = std::get<1>(it->second) / std::get<0>(it->second); << std::setw(data_width) << event_item.total_time
std::cout << std::setw(max_name_width + 4) << it->first << std::setw(data_width) << event_item.min_time
<< std::setw(data_width) << std::get<0>(it->second) << std::setw(data_width) << event_item.max_time
<< std::setw(data_width) << std::get<1>(it->second) << std::setw(data_width) << event_item.ave_time << std::endl;
<< std::setw(data_width) << std::get<2>(it->second) }
<< std::setw(data_width) << std::get<3>(it->second)
<< std::setw(data_width) << std::get<4>(it->second) << std::endl;
} }
} }
......
...@@ -117,7 +117,22 @@ void EnableProfiler(ProfilerState state); ...@@ -117,7 +117,22 @@ void EnableProfiler(ProfilerState state);
// event_lists, event_lists[i][j] represents the j-th Event of i-th thread. // event_lists, event_lists[i][j] represents the j-th Event of i-th thread.
std::vector<std::vector<Event>> DisableProfiler(); std::vector<std::vector<Event>> DisableProfiler();
void ParseEvents(std::vector<std::vector<Event>>&); // The information of each event given in the profiling report
struct EventItem {
std::string name;
int calls;
double total_time;
double min_time;
double max_time;
double ave_time;
};
// Candidate keys to sort the profiling report
enum EventSortingKey { kDefault, kCalls, kTotal, kMin, kMax, kAve };
// Parse the event list and output the profiling report
void ParseEvents(std::vector<std::vector<Event>>&,
EventSortingKey sorted_by = EventSortingKey::kDefault);
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
...@@ -55,6 +55,7 @@ TEST(RecordEvent, RecordEvent) { ...@@ -55,6 +55,7 @@ TEST(RecordEvent, RecordEvent) {
using paddle::platform::EventKind; using paddle::platform::EventKind;
using paddle::platform::RecordEvent; using paddle::platform::RecordEvent;
using paddle::platform::ProfilerState; using paddle::platform::ProfilerState;
using paddle::platform::EventSortingKey;
ProfilerState state = ProfilerState::kCPU; ProfilerState state = ProfilerState::kCPU;
DeviceContext* dev_ctx = nullptr; DeviceContext* dev_ctx = nullptr;
...@@ -70,10 +71,11 @@ TEST(RecordEvent, RecordEvent) { ...@@ -70,10 +71,11 @@ TEST(RecordEvent, RecordEvent) {
/* Usage 1: /* Usage 1:
* PushEvent(evt_name, dev_ctx); * PushEvent(evt_name, dev_ctx);
* ... * ...
* code to time * code to analyze
* ... * ...
* PopEvent(evt_name, dev_ctx); * PopEvent(evt_name, dev_ctx);
*/ */
for (int loop = 0; loop < 3; ++loop) {
for (int i = 1; i < 5; ++i) { for (int i = 1; i < 5; ++i) {
std::string name = "op_" + std::to_string(i); std::string name = "op_" + std::to_string(i);
PushEvent(name, dev_ctx); PushEvent(name, dev_ctx);
...@@ -81,11 +83,14 @@ TEST(RecordEvent, RecordEvent) { ...@@ -81,11 +83,14 @@ TEST(RecordEvent, RecordEvent) {
while (counter != i * 1000) counter++; while (counter != i * 1000) counter++;
PopEvent(name, dev_ctx); PopEvent(name, dev_ctx);
} }
}
/* Usage 2: /* Usage 2:
* { * {
* RecordEvent record_event(name, dev_ctx); * RecordEvent record_event(name, dev_ctx);
* ... * ...
* code to analyze
* ...
* } * }
*/ */
for (int i = 1; i < 5; ++i) { for (int i = 1; i < 5; ++i) {
...@@ -94,19 +99,13 @@ TEST(RecordEvent, RecordEvent) { ...@@ -94,19 +99,13 @@ TEST(RecordEvent, RecordEvent) {
int counter = 1; int counter = 1;
while (counter != i * 1000) counter++; while (counter != i * 1000) counter++;
} }
for (int i = 1; i < 5; ++i) {
std::string name = "evs_op_" + std::to_string(i);
RecordEvent record_event(name, dev_ctx);
int counter = 1;
while (counter != i * 1000) counter++;
}
std::vector<std::vector<Event>> events = paddle::platform::DisableProfiler(); std::vector<std::vector<Event>> events = paddle::platform::DisableProfiler();
// Will remove from test before merging
ParseEvents(events, EventSortingKey::kTotal);
int cuda_startup_count = 0; int cuda_startup_count = 0;
int start_profiler_count = 0; int start_profiler_count = 0;
int stop_profiler_count = 0; int stop_profiler_count = 0;
ParseEvents(events);
for (size_t i = 0; i < events.size(); ++i) { for (size_t i = 0; i < events.size(); ++i) {
for (size_t j = 0; j < events[i].size(); ++j) { for (size_t j = 0; j < events[i].size(); ++j) {
if (events[i][j].name() == "_cuda_startup_") ++cuda_startup_count; if (events[i][j].name() == "_cuda_startup_") ++cuda_startup_count;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册