未验证 提交 5428cb99 编写于 作者: Q Qiao Longfei 提交者: GitHub

Profiler support merge data of all thread (#13811)

* profiler infor merge thread statistic information

* update profiler

* fix bug

* add merge thread msg to report

* optimize report

* statistic the time of ops in each thread but not all

* optimize report format

* optimize profile report

* optimize profile report
test=develop
上级 f99ea99e
...@@ -276,7 +276,7 @@ struct EventItem { ...@@ -276,7 +276,7 @@ struct EventItem {
// Print results // Print results
void PrintProfiler(const std::vector<std::vector<EventItem>>& events_table, void PrintProfiler(const std::vector<std::vector<EventItem>>& events_table,
const std::string& sorted_domain, const size_t name_width, const std::string& sorted_domain, const size_t name_width,
const size_t data_width, double total) { const size_t data_width, bool merge_thread) {
// Output header information // Output header information
std::cout << "\n------------------------->" std::cout << "\n------------------------->"
<< " Profiling Report " << " Profiling Report "
...@@ -292,6 +292,10 @@ void PrintProfiler(const std::vector<std::vector<EventItem>>& events_table, ...@@ -292,6 +292,10 @@ void PrintProfiler(const std::vector<std::vector<EventItem>>& events_table,
PADDLE_THROW("Invalid profiler state", g_state); PADDLE_THROW("Invalid profiler state", g_state);
} }
if (merge_thread) {
std::cout << "Note! This Report merge all thread info into one."
<< std::endl;
}
std::cout << "Place: " << place << std::endl; std::cout << "Place: " << place << std::endl;
std::cout << "Time unit: ms" << std::endl; std::cout << "Time unit: ms" << std::endl;
std::cout << "Sorted by " << sorted_domain std::cout << "Sorted by " << sorted_domain
...@@ -312,8 +316,7 @@ void PrintProfiler(const std::vector<std::vector<EventItem>>& events_table, ...@@ -312,8 +316,7 @@ void PrintProfiler(const std::vector<std::vector<EventItem>>& events_table,
<< std::setw(data_width) << event_item.min_time << std::setw(data_width) << event_item.min_time
<< std::setw(data_width) << event_item.max_time << std::setw(data_width) << event_item.max_time
<< std::setw(data_width) << event_item.ave_time << std::setw(data_width) << event_item.ave_time
<< std::setw(data_width) << event_item.total_time / total << std::setw(data_width) << event_item.ratio << std::endl;
<< std::endl;
} }
} }
std::cout << std::endl; std::cout << std::endl;
...@@ -321,8 +324,10 @@ void PrintProfiler(const std::vector<std::vector<EventItem>>& events_table, ...@@ -321,8 +324,10 @@ void PrintProfiler(const std::vector<std::vector<EventItem>>& events_table,
// Parse the event list and output the profiling report // Parse the event list and output the profiling report
void ParseEvents(const std::vector<std::vector<Event>>& events, void ParseEvents(const std::vector<std::vector<Event>>& events,
bool merge_thread,
EventSortingKey sorted_by = EventSortingKey::kDefault) { EventSortingKey sorted_by = EventSortingKey::kDefault) {
if (g_state == ProfilerState::kDisabled) return; if (g_state == ProfilerState::kDisabled) return;
if (merge_thread && events.size() < 2) return;
std::string sorted_domain; std::string sorted_domain;
std::function<bool(const EventItem&, const EventItem&)> sorted_func; std::function<bool(const EventItem&, const EventItem&)> sorted_func;
...@@ -361,34 +366,55 @@ void ParseEvents(const std::vector<std::vector<Event>>& events, ...@@ -361,34 +366,55 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
sorted_domain = "event first end time"; sorted_domain = "event first end time";
} }
const std::vector<std::vector<Event>>* analyze_events;
std::vector<std::vector<Event>> merged_events_list;
if (merge_thread) {
std::vector<Event> merged_events;
for (int i = 0; i < events.size(); ++i) {
for (int j = 0; j < events[i].size(); ++j) {
merged_events.push_back(events[i][j]);
}
}
merged_events_list.push_back(merged_events);
analyze_events = &merged_events_list;
} else {
analyze_events = &events;
}
std::vector<std::vector<EventItem>> events_table; std::vector<std::vector<EventItem>> events_table;
size_t max_name_width = 0; size_t max_name_width = 0;
double total = 0.; // the total time for (size_t i = 0; i < (*analyze_events).size(); i++) {
for (size_t i = 0; i < events.size(); i++) { double total = 0.; // the total time in one thread
std::list<Event> pushed_events; std::list<Event> pushed_events;
std::vector<EventItem> event_items; std::vector<EventItem> event_items;
std::unordered_map<std::string, int> event_idx; std::unordered_map<std::string, int> event_idx;
for (size_t j = 0; j < events[i].size(); j++) { for (size_t j = 0; j < (*analyze_events)[i].size(); j++) {
if (events[i][j].type() == EventType::kPushRange) { if ((*analyze_events)[i][j].type() == EventType::kPushRange) {
pushed_events.push_back(events[i][j]); pushed_events.push_back((*analyze_events)[i][j]);
} else if (events[i][j].type() == EventType::kPopRange) { } else if ((*analyze_events)[i][j].type() == EventType::kPopRange) {
std::list<Event>::reverse_iterator rit = pushed_events.rbegin(); std::list<Event>::reverse_iterator rit = pushed_events.rbegin();
while (rit != pushed_events.rend() && while (rit != pushed_events.rend() &&
rit->name() != events[i][j].name()) { rit->name() != (*analyze_events)[i][j].name()) {
++rit; ++rit;
} }
if (rit != pushed_events.rend()) { if (rit != pushed_events.rend()) {
double event_time = (g_state == ProfilerState::kCUDA || double event_time = (g_state == ProfilerState::kCUDA ||
g_state == ProfilerState::kAll) g_state == ProfilerState::kAll)
? rit->CudaElapsedMs(events[i][j]) ? rit->CudaElapsedMs((*analyze_events)[i][j])
: rit->CpuElapsedMs(events[i][j]); : rit->CpuElapsedMs((*analyze_events)[i][j]);
total += event_time; total += event_time;
std::string event_name = std::string event_name;
"thread" + std::to_string(rit->thread_id()) + "::" + rit->name(); if (merge_thread) {
max_name_width = std::max(max_name_width, event_name.size()); event_name = rit->name();
max_name_width = std::max(max_name_width, event_name.size());
} else {
event_name = "thread" + std::to_string(rit->thread_id()) + "::" +
rit->name();
max_name_width = std::max(max_name_width, event_name.size());
}
if (event_idx.find(event_name) == event_idx.end()) { if (event_idx.find(event_name) == event_idx.end()) {
event_idx[event_name] = event_items.size(); event_idx[event_name] = event_items.size();
...@@ -413,7 +439,7 @@ void ParseEvents(const std::vector<std::vector<Event>>& events, ...@@ -413,7 +439,7 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
pushed_events.erase((++rit).base()); pushed_events.erase((++rit).base());
} else { } else {
LOG(WARNING) << "Cannot find the push marker of event \'" LOG(WARNING) << "Cannot find the push marker of event \'"
<< events[i][j].name() << (*analyze_events)[i][j].name()
<< "\', which will be ignored in profiling report."; << "\', which will be ignored in profiling report.";
} }
} }
...@@ -421,6 +447,7 @@ void ParseEvents(const std::vector<std::vector<Event>>& events, ...@@ -421,6 +447,7 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
// average time // average time
for (auto& item : event_items) { for (auto& item : event_items) {
item.ave_time = item.total_time / item.calls; item.ave_time = item.total_time / item.calls;
item.ratio = item.total_time / total;
} }
// sort // sort
if (sorted_by != EventSortingKey::kDefault) { if (sorted_by != EventSortingKey::kDefault) {
...@@ -438,7 +465,8 @@ void ParseEvents(const std::vector<std::vector<Event>>& events, ...@@ -438,7 +465,8 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
} }
// Print report // Print report
PrintProfiler(events_table, sorted_domain, max_name_width + 4, 12, total); PrintProfiler(events_table, sorted_domain, max_name_width + 4, 12,
merge_thread);
} }
void DisableProfiler(EventSortingKey sorted_key, void DisableProfiler(EventSortingKey sorted_key,
...@@ -449,7 +477,8 @@ void DisableProfiler(EventSortingKey sorted_key, ...@@ -449,7 +477,8 @@ void DisableProfiler(EventSortingKey sorted_key,
Mark("_stop_profiler_", nullptr); Mark("_stop_profiler_", nullptr);
std::vector<std::vector<Event>> all_events = GetAllEvents(); std::vector<std::vector<Event>> all_events = GetAllEvents();
ParseEvents(all_events, sorted_key); ParseEvents(all_events, true, sorted_key);
ParseEvents(all_events, false, sorted_key);
ResetProfiler(); ResetProfiler();
DeviceTracer* tracer = GetDeviceTracer(); DeviceTracer* tracer = GetDeviceTracer();
if (tracer->IsEnabled()) { if (tracer->IsEnabled()) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册