未验证 提交 8456c3f4 编写于 作者: W wangchaochaohu 提交者: GitHub

polish the profiler_help code (#22811)

上级 2fd1ec1e
......@@ -394,6 +394,38 @@ void GetChildMap(const std::multimap<std::string, EventItem> &sub_child_map,
}
}
void PrintOverHead(const OverHead &overhead, const size_t data_width) {
double compute_time = overhead.total_time * overhead.compute_ratio;
double framework_time = overhead.total_time * overhead.framework_ratio;
std::cout.setf(std::ios::left);
std::cout << "Total time: " << overhead.total_time << std::endl;
std::cout << std::setw(25) << " Computation time"
<< "Total: " << std::setw(data_width) << compute_time
<< "Ratio: " << overhead.compute_ratio * 100 << "%" << std::endl;
std::cout << std::setw(25) << " Framework overhead"
<< "Total: " << std::setw(data_width) << framework_time
<< "Ratio: " << overhead.framework_ratio * 100 << "%" << std::endl;
std::cout << "\n-------------------------"
<< " GpuMemCpy Summary "
<< "-------------------------\n\n";
std::cout << std::setw(25) << "GpuMemcpy"
<< "Calls: " << std::setw(data_width) << overhead.memcpy_item.calls
<< "Total: " << std::setw(data_width)
<< overhead.memcpy_item.total_time
<< "Ratio: " << overhead.memcpy_item.ratio * 100 << "%"
<< std::endl;
for (size_t i = 0; i < overhead.sub_memcpy_items.size(); ++i) {
EventItem item = overhead.sub_memcpy_items[i];
if (item.calls != 0) {
std::cout << std::setw(25) << " " + item.name
<< "Calls: " << std::setw(data_width) << item.calls
<< "Total: " << std::setw(data_width) << item.total_time
<< "Ratio: " << item.ratio * 100 << "%" << std::endl;
}
}
}
// Print results
void PrintProfiler(const std::vector<std::vector<EventItem>> &events_table,
const std::multimap<std::string, EventItem> &child_map,
......@@ -428,38 +460,7 @@ void PrintProfiler(const std::vector<std::vector<EventItem>> &events_table,
<< " in descending order in the same thread\n\n";
if (overhead.print) {
double compute_time = overhead.total_time * overhead.compute_ratio;
double framework_time = overhead.total_time * overhead.framework_ratio;
std::cout.setf(std::ios::left);
std::cout << "Total time: " << overhead.total_time << std::endl;
std::cout << std::setw(25) << " Computation time"
<< "Total: " << std::setw(data_width) << compute_time
<< "Ratio: " << overhead.compute_ratio * 100 << "%"
<< std::endl;
std::cout << std::setw(25) << " Framework overhead"
<< "Total: " << std::setw(data_width) << framework_time
<< "Ratio: " << overhead.framework_ratio * 100 << "%"
<< std::endl;
std::cout << "\n-------------------------"
<< " GpuMemCpy Summary "
<< "-------------------------\n\n";
std::cout << std::setw(25) << "GpuMemcpy"
<< "Calls: " << std::setw(data_width)
<< overhead.memcpy_item.calls
<< "Total: " << std::setw(data_width)
<< overhead.memcpy_item.total_time
<< "Ratio: " << overhead.memcpy_item.ratio * 100 << "%"
<< std::endl;
for (size_t i = 0; i < overhead.sub_memcpy_items.size(); ++i) {
EventItem item = overhead.sub_memcpy_items[i];
if (item.calls != 0) {
std::cout << std::setw(25) << " " + item.name
<< "Calls: " << std::setw(data_width) << item.calls
<< "Total: " << std::setw(data_width) << item.total_time
<< "Ratio: " << item.ratio * 100 << "%" << std::endl;
}
}
PrintOverHead(overhead, data_width);
}
std::cout << "\n-------------------------"
<< " Event Summary "
......@@ -522,37 +523,13 @@ void PrintProfiler(const std::vector<std::vector<EventItem>> &events_table,
}
}
// Parse the event list and output the profiling report
void ParseEvents(const std::vector<std::vector<Event>> &events,
bool merge_thread,
EventSortingKey sorted_by = EventSortingKey::kDefault) {
if (g_state == ProfilerState::kDisabled) return;
if (merge_thread && events.size() < 2) return;
std::string sorted_domain;
std::function<bool(const EventItem &, const EventItem &)> sorted_func;
sorted_func = SetSortedFunc(sorted_by, &sorted_domain);
const std::vector<std::vector<Event>> *analyze_events;
std::vector<std::vector<Event>> merged_events_list;
if (merge_thread) {
std::vector<Event> merged_events;
for (size_t i = 0; i < events.size(); ++i) {
for (size_t j = 0; j < events[i].size(); ++j) {
merged_events.push_back(events[i][j]);
}
}
merged_events_list.push_back(merged_events);
analyze_events = &merged_events_list;
} else {
analyze_events = &events;
}
std::vector<std::vector<EventItem>> events_table;
std::multimap<std::string, EventItem> child_map;
size_t max_name_width = 0;
OverHead overhead;
void AnalyzeEvent(
const std::vector<std::vector<Event>> *analyze_events,
std::vector<std::vector<EventItem>> *events_table,
std::multimap<std::string, EventItem> *child_map,
std::function<bool(const EventItem &, const EventItem &)> sorted_func,
EventSortingKey sorted_by, size_t *max_name_width, OverHead *overhead,
bool merge_thread) {
for (size_t i = 0; i < (*analyze_events).size(); i++) {
double total = 0.; // the total time in one thread
std::list<Event> pushed_events;
......@@ -563,7 +540,7 @@ void ParseEvents(const std::vector<std::vector<Event>> &events,
for (size_t j = 0; j < (*analyze_events)[i].size(); j++) {
Event analyze_event = (*analyze_events)[i][j];
SetEvent(merge_thread, analyze_event, &max_name_width, &pushed_events,
SetEvent(merge_thread, analyze_event, max_name_width, &pushed_events,
&event_items, &event_idx);
}
......@@ -593,7 +570,6 @@ void ParseEvents(const std::vector<std::vector<Event>> &events,
total += event_items[j].total_time;
}
}
// average time
for (auto &item : main_event_items) {
item.ave_time = item.total_time / item.calls;
......@@ -603,20 +579,18 @@ void ParseEvents(const std::vector<std::vector<Event>> &events,
it->second.ratio = it->second.total_time / total;
it->second.ave_time = it->second.total_time / it->second.calls;
}
// When multi-threaded, overhead are printed only if merge_thread is true
if ((*analyze_events).size() == 1) {
overhead.total_time = total;
overhead.print = true;
ComputeOverhead(sub_child_map, &overhead);
overhead->total_time = total;
overhead->print = true;
ComputeOverhead(sub_child_map, overhead);
}
// sort
if (sorted_by != EventSortingKey::kDefault) {
std::sort(main_event_items.begin(), main_event_items.end(), sorted_func);
}
events_table.push_back(main_event_items);
events_table->push_back(main_event_items);
// log warning if there are events with `push` but without `pop`
std::list<Event>::reverse_iterator rit = pushed_events.rbegin();
while (rit != pushed_events.rend()) {
......@@ -625,8 +599,42 @@ void ParseEvents(const std::vector<std::vector<Event>> &events,
++rit;
}
GetChildMap(sub_child_map, &child_map);
GetChildMap(sub_child_map, child_map);
}
}
// Parse the event list and output the profiling report
void ParseEvents(const std::vector<std::vector<Event>> &events,
bool merge_thread,
EventSortingKey sorted_by = EventSortingKey::kDefault) {
if (g_state == ProfilerState::kDisabled) return;
if (merge_thread && events.size() < 2) return;
std::string sorted_domain;
std::function<bool(const EventItem &, const EventItem &)> sorted_func;
sorted_func = SetSortedFunc(sorted_by, &sorted_domain);
const std::vector<std::vector<Event>> *analyze_events;
std::vector<std::vector<Event>> merged_events_list;
if (merge_thread) {
std::vector<Event> merged_events;
for (size_t i = 0; i < events.size(); ++i) {
for (size_t j = 0; j < events[i].size(); ++j) {
merged_events.push_back(events[i][j]);
}
}
merged_events_list.push_back(merged_events);
analyze_events = &merged_events_list;
} else {
analyze_events = &events;
}
std::vector<std::vector<EventItem>> events_table;
std::multimap<std::string, EventItem> child_map;
size_t max_name_width = 0;
OverHead overhead;
AnalyzeEvent(analyze_events, &events_table, &child_map, sorted_func,
sorted_by, &max_name_width, &overhead, merge_thread);
// Print report
PrintProfiler(events_table, child_map, overhead, sorted_domain,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册