未验证 提交 5428cb99 编写于 作者: Q Qiao Longfei 提交者: GitHub

Profiler support merge data of all thread (#13811)

* profiler infor merge thread statistic information

* update profiler

* fix bug

* add merge thread msg to report

* optimize report

* statistic the time of ops in each thread but not all

* optimize report format

* optimize profile report

* optimize profile report
test=develop
上级 f99ea99e
......@@ -276,7 +276,7 @@ struct EventItem {
// Print results
void PrintProfiler(const std::vector<std::vector<EventItem>>& events_table,
const std::string& sorted_domain, const size_t name_width,
const size_t data_width, double total) {
const size_t data_width, bool merge_thread) {
// Output header information
std::cout << "\n------------------------->"
<< " Profiling Report "
......@@ -292,6 +292,10 @@ void PrintProfiler(const std::vector<std::vector<EventItem>>& events_table,
PADDLE_THROW("Invalid profiler state", g_state);
}
if (merge_thread) {
std::cout << "Note! This Report merge all thread info into one."
<< std::endl;
}
std::cout << "Place: " << place << std::endl;
std::cout << "Time unit: ms" << std::endl;
std::cout << "Sorted by " << sorted_domain
......@@ -312,8 +316,7 @@ void PrintProfiler(const std::vector<std::vector<EventItem>>& events_table,
<< std::setw(data_width) << event_item.min_time
<< std::setw(data_width) << event_item.max_time
<< std::setw(data_width) << event_item.ave_time
<< std::setw(data_width) << event_item.total_time / total
<< std::endl;
<< std::setw(data_width) << event_item.ratio << std::endl;
}
}
std::cout << std::endl;
......@@ -321,8 +324,10 @@ void PrintProfiler(const std::vector<std::vector<EventItem>>& events_table,
// Parse the event list and output the profiling report
void ParseEvents(const std::vector<std::vector<Event>>& events,
bool merge_thread,
EventSortingKey sorted_by = EventSortingKey::kDefault) {
if (g_state == ProfilerState::kDisabled) return;
if (merge_thread && events.size() < 2) return;
std::string sorted_domain;
std::function<bool(const EventItem&, const EventItem&)> sorted_func;
......@@ -361,34 +366,55 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
sorted_domain = "event first end time";
}
const std::vector<std::vector<Event>>* analyze_events;
std::vector<std::vector<Event>> merged_events_list;
if (merge_thread) {
std::vector<Event> merged_events;
for (int i = 0; i < events.size(); ++i) {
for (int j = 0; j < events[i].size(); ++j) {
merged_events.push_back(events[i][j]);
}
}
merged_events_list.push_back(merged_events);
analyze_events = &merged_events_list;
} else {
analyze_events = &events;
}
std::vector<std::vector<EventItem>> events_table;
size_t max_name_width = 0;
double total = 0.; // the total time
for (size_t i = 0; i < events.size(); i++) {
for (size_t i = 0; i < (*analyze_events).size(); i++) {
double total = 0.; // the total time in one thread
std::list<Event> pushed_events;
std::vector<EventItem> event_items;
std::unordered_map<std::string, int> event_idx;
for (size_t j = 0; j < events[i].size(); j++) {
if (events[i][j].type() == EventType::kPushRange) {
pushed_events.push_back(events[i][j]);
} else if (events[i][j].type() == EventType::kPopRange) {
for (size_t j = 0; j < (*analyze_events)[i].size(); j++) {
if ((*analyze_events)[i][j].type() == EventType::kPushRange) {
pushed_events.push_back((*analyze_events)[i][j]);
} else if ((*analyze_events)[i][j].type() == EventType::kPopRange) {
std::list<Event>::reverse_iterator rit = pushed_events.rbegin();
while (rit != pushed_events.rend() &&
rit->name() != events[i][j].name()) {
rit->name() != (*analyze_events)[i][j].name()) {
++rit;
}
if (rit != pushed_events.rend()) {
double event_time = (g_state == ProfilerState::kCUDA ||
g_state == ProfilerState::kAll)
? rit->CudaElapsedMs(events[i][j])
: rit->CpuElapsedMs(events[i][j]);
? rit->CudaElapsedMs((*analyze_events)[i][j])
: rit->CpuElapsedMs((*analyze_events)[i][j]);
total += event_time;
std::string event_name =
"thread" + std::to_string(rit->thread_id()) + "::" + rit->name();
max_name_width = std::max(max_name_width, event_name.size());
std::string event_name;
if (merge_thread) {
event_name = rit->name();
max_name_width = std::max(max_name_width, event_name.size());
} else {
event_name = "thread" + std::to_string(rit->thread_id()) + "::" +
rit->name();
max_name_width = std::max(max_name_width, event_name.size());
}
if (event_idx.find(event_name) == event_idx.end()) {
event_idx[event_name] = event_items.size();
......@@ -413,7 +439,7 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
pushed_events.erase((++rit).base());
} else {
LOG(WARNING) << "Cannot find the push marker of event \'"
<< events[i][j].name()
<< (*analyze_events)[i][j].name()
<< "\', which will be ignored in profiling report.";
}
}
......@@ -421,6 +447,7 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
// average time
for (auto& item : event_items) {
item.ave_time = item.total_time / item.calls;
item.ratio = item.total_time / total;
}
// sort
if (sorted_by != EventSortingKey::kDefault) {
......@@ -438,7 +465,8 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
}
// Print report
PrintProfiler(events_table, sorted_domain, max_name_width + 4, 12, total);
PrintProfiler(events_table, sorted_domain, max_name_width + 4, 12,
merge_thread);
}
void DisableProfiler(EventSortingKey sorted_key,
......@@ -449,7 +477,8 @@ void DisableProfiler(EventSortingKey sorted_key,
Mark("_stop_profiler_", nullptr);
std::vector<std::vector<Event>> all_events = GetAllEvents();
ParseEvents(all_events, sorted_key);
ParseEvents(all_events, true, sorted_key);
ParseEvents(all_events, false, sorted_key);
ResetProfiler();
DeviceTracer* tracer = GetDeviceTracer();
if (tracer->IsEnabled()) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册