未验证 提交 8456c3f4 编写于 作者: W wangchaochaohu 提交者: GitHub

polish the profiler_help code (#22811)

上级 2fd1ec1e
...@@ -394,6 +394,38 @@ void GetChildMap(const std::multimap<std::string, EventItem> &sub_child_map, ...@@ -394,6 +394,38 @@ void GetChildMap(const std::multimap<std::string, EventItem> &sub_child_map,
} }
} }
void PrintOverHead(const OverHead &overhead, const size_t data_width) {
double compute_time = overhead.total_time * overhead.compute_ratio;
double framework_time = overhead.total_time * overhead.framework_ratio;
std::cout.setf(std::ios::left);
std::cout << "Total time: " << overhead.total_time << std::endl;
std::cout << std::setw(25) << " Computation time"
<< "Total: " << std::setw(data_width) << compute_time
<< "Ratio: " << overhead.compute_ratio * 100 << "%" << std::endl;
std::cout << std::setw(25) << " Framework overhead"
<< "Total: " << std::setw(data_width) << framework_time
<< "Ratio: " << overhead.framework_ratio * 100 << "%" << std::endl;
std::cout << "\n-------------------------"
<< " GpuMemCpy Summary "
<< "-------------------------\n\n";
std::cout << std::setw(25) << "GpuMemcpy"
<< "Calls: " << std::setw(data_width) << overhead.memcpy_item.calls
<< "Total: " << std::setw(data_width)
<< overhead.memcpy_item.total_time
<< "Ratio: " << overhead.memcpy_item.ratio * 100 << "%"
<< std::endl;
for (size_t i = 0; i < overhead.sub_memcpy_items.size(); ++i) {
EventItem item = overhead.sub_memcpy_items[i];
if (item.calls != 0) {
std::cout << std::setw(25) << " " + item.name
<< "Calls: " << std::setw(data_width) << item.calls
<< "Total: " << std::setw(data_width) << item.total_time
<< "Ratio: " << item.ratio * 100 << "%" << std::endl;
}
}
}
// Print results // Print results
void PrintProfiler(const std::vector<std::vector<EventItem>> &events_table, void PrintProfiler(const std::vector<std::vector<EventItem>> &events_table,
const std::multimap<std::string, EventItem> &child_map, const std::multimap<std::string, EventItem> &child_map,
...@@ -428,38 +460,7 @@ void PrintProfiler(const std::vector<std::vector<EventItem>> &events_table, ...@@ -428,38 +460,7 @@ void PrintProfiler(const std::vector<std::vector<EventItem>> &events_table,
<< " in descending order in the same thread\n\n"; << " in descending order in the same thread\n\n";
if (overhead.print) { if (overhead.print) {
double compute_time = overhead.total_time * overhead.compute_ratio; PrintOverHead(overhead, data_width);
double framework_time = overhead.total_time * overhead.framework_ratio;
std::cout.setf(std::ios::left);
std::cout << "Total time: " << overhead.total_time << std::endl;
std::cout << std::setw(25) << " Computation time"
<< "Total: " << std::setw(data_width) << compute_time
<< "Ratio: " << overhead.compute_ratio * 100 << "%"
<< std::endl;
std::cout << std::setw(25) << " Framework overhead"
<< "Total: " << std::setw(data_width) << framework_time
<< "Ratio: " << overhead.framework_ratio * 100 << "%"
<< std::endl;
std::cout << "\n-------------------------"
<< " GpuMemCpy Summary "
<< "-------------------------\n\n";
std::cout << std::setw(25) << "GpuMemcpy"
<< "Calls: " << std::setw(data_width)
<< overhead.memcpy_item.calls
<< "Total: " << std::setw(data_width)
<< overhead.memcpy_item.total_time
<< "Ratio: " << overhead.memcpy_item.ratio * 100 << "%"
<< std::endl;
for (size_t i = 0; i < overhead.sub_memcpy_items.size(); ++i) {
EventItem item = overhead.sub_memcpy_items[i];
if (item.calls != 0) {
std::cout << std::setw(25) << " " + item.name
<< "Calls: " << std::setw(data_width) << item.calls
<< "Total: " << std::setw(data_width) << item.total_time
<< "Ratio: " << item.ratio * 100 << "%" << std::endl;
}
}
} }
std::cout << "\n-------------------------" std::cout << "\n-------------------------"
<< " Event Summary " << " Event Summary "
...@@ -522,37 +523,13 @@ void PrintProfiler(const std::vector<std::vector<EventItem>> &events_table, ...@@ -522,37 +523,13 @@ void PrintProfiler(const std::vector<std::vector<EventItem>> &events_table,
} }
} }
// Parse the event list and output the profiling report void AnalyzeEvent(
void ParseEvents(const std::vector<std::vector<Event>> &events, const std::vector<std::vector<Event>> *analyze_events,
bool merge_thread, std::vector<std::vector<EventItem>> *events_table,
EventSortingKey sorted_by = EventSortingKey::kDefault) { std::multimap<std::string, EventItem> *child_map,
if (g_state == ProfilerState::kDisabled) return; std::function<bool(const EventItem &, const EventItem &)> sorted_func,
if (merge_thread && events.size() < 2) return; EventSortingKey sorted_by, size_t *max_name_width, OverHead *overhead,
bool merge_thread) {
std::string sorted_domain;
std::function<bool(const EventItem &, const EventItem &)> sorted_func;
sorted_func = SetSortedFunc(sorted_by, &sorted_domain);
const std::vector<std::vector<Event>> *analyze_events;
std::vector<std::vector<Event>> merged_events_list;
if (merge_thread) {
std::vector<Event> merged_events;
for (size_t i = 0; i < events.size(); ++i) {
for (size_t j = 0; j < events[i].size(); ++j) {
merged_events.push_back(events[i][j]);
}
}
merged_events_list.push_back(merged_events);
analyze_events = &merged_events_list;
} else {
analyze_events = &events;
}
std::vector<std::vector<EventItem>> events_table;
std::multimap<std::string, EventItem> child_map;
size_t max_name_width = 0;
OverHead overhead;
for (size_t i = 0; i < (*analyze_events).size(); i++) { for (size_t i = 0; i < (*analyze_events).size(); i++) {
double total = 0.; // the total time in one thread double total = 0.; // the total time in one thread
std::list<Event> pushed_events; std::list<Event> pushed_events;
...@@ -563,7 +540,7 @@ void ParseEvents(const std::vector<std::vector<Event>> &events, ...@@ -563,7 +540,7 @@ void ParseEvents(const std::vector<std::vector<Event>> &events,
for (size_t j = 0; j < (*analyze_events)[i].size(); j++) { for (size_t j = 0; j < (*analyze_events)[i].size(); j++) {
Event analyze_event = (*analyze_events)[i][j]; Event analyze_event = (*analyze_events)[i][j];
SetEvent(merge_thread, analyze_event, &max_name_width, &pushed_events, SetEvent(merge_thread, analyze_event, max_name_width, &pushed_events,
&event_items, &event_idx); &event_items, &event_idx);
} }
...@@ -593,7 +570,6 @@ void ParseEvents(const std::vector<std::vector<Event>> &events, ...@@ -593,7 +570,6 @@ void ParseEvents(const std::vector<std::vector<Event>> &events,
total += event_items[j].total_time; total += event_items[j].total_time;
} }
} }
// average time // average time
for (auto &item : main_event_items) { for (auto &item : main_event_items) {
item.ave_time = item.total_time / item.calls; item.ave_time = item.total_time / item.calls;
...@@ -603,20 +579,18 @@ void ParseEvents(const std::vector<std::vector<Event>> &events, ...@@ -603,20 +579,18 @@ void ParseEvents(const std::vector<std::vector<Event>> &events,
it->second.ratio = it->second.total_time / total; it->second.ratio = it->second.total_time / total;
it->second.ave_time = it->second.total_time / it->second.calls; it->second.ave_time = it->second.total_time / it->second.calls;
} }
// When multi-threaded, overhead are printed only if merge_thread is true // When multi-threaded, overhead are printed only if merge_thread is true
if ((*analyze_events).size() == 1) { if ((*analyze_events).size() == 1) {
overhead.total_time = total; overhead->total_time = total;
overhead.print = true; overhead->print = true;
ComputeOverhead(sub_child_map, &overhead); ComputeOverhead(sub_child_map, overhead);
} }
// sort // sort
if (sorted_by != EventSortingKey::kDefault) { if (sorted_by != EventSortingKey::kDefault) {
std::sort(main_event_items.begin(), main_event_items.end(), sorted_func); std::sort(main_event_items.begin(), main_event_items.end(), sorted_func);
} }
events_table.push_back(main_event_items); events_table->push_back(main_event_items);
// log warning if there are events with `push` but without `pop` // log warning if there are events with `push` but without `pop`
std::list<Event>::reverse_iterator rit = pushed_events.rbegin(); std::list<Event>::reverse_iterator rit = pushed_events.rbegin();
while (rit != pushed_events.rend()) { while (rit != pushed_events.rend()) {
...@@ -625,8 +599,42 @@ void ParseEvents(const std::vector<std::vector<Event>> &events, ...@@ -625,8 +599,42 @@ void ParseEvents(const std::vector<std::vector<Event>> &events,
++rit; ++rit;
} }
GetChildMap(sub_child_map, &child_map); GetChildMap(sub_child_map, child_map);
} }
}
// Parse the event list and output the profiling report
void ParseEvents(const std::vector<std::vector<Event>> &events,
bool merge_thread,
EventSortingKey sorted_by = EventSortingKey::kDefault) {
if (g_state == ProfilerState::kDisabled) return;
if (merge_thread && events.size() < 2) return;
std::string sorted_domain;
std::function<bool(const EventItem &, const EventItem &)> sorted_func;
sorted_func = SetSortedFunc(sorted_by, &sorted_domain);
const std::vector<std::vector<Event>> *analyze_events;
std::vector<std::vector<Event>> merged_events_list;
if (merge_thread) {
std::vector<Event> merged_events;
for (size_t i = 0; i < events.size(); ++i) {
for (size_t j = 0; j < events[i].size(); ++j) {
merged_events.push_back(events[i][j]);
}
}
merged_events_list.push_back(merged_events);
analyze_events = &merged_events_list;
} else {
analyze_events = &events;
}
std::vector<std::vector<EventItem>> events_table;
std::multimap<std::string, EventItem> child_map;
size_t max_name_width = 0;
OverHead overhead;
AnalyzeEvent(analyze_events, &events_table, &child_map, sorted_func,
sorted_by, &max_name_width, &overhead, merge_thread);
// Print report // Print report
PrintProfiler(events_table, child_map, overhead, sorted_domain, PrintProfiler(events_table, child_map, overhead, sorted_domain,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册