未验证 提交 ca9c8b41 编写于 作者: Z Zhang Ting 提交者: GitHub

fix compute ratio of profile, test=develop (#22872)

上级 dbb0b9b3
...@@ -355,25 +355,37 @@ void SetEvent(bool merge_thread, const Event &analyze_event, ...@@ -355,25 +355,37 @@ void SetEvent(bool merge_thread, const Event &analyze_event,
} }
} }
void ComputeOverhead(const std::multimap<std::string, EventItem> &sub_child_map, void UpdateGpuMemcpy(const EventItem &item, EventItem *memcpy_async,
EventItem *memcpy_sync) {
if (item.name.find("GpuMemcpyAsync") != std::string::npos) {
memcpy_async->calls += item.calls;
memcpy_async->total_time += item.total_time;
memcpy_async->ratio += item.ratio;
} else if (item.name.find("GpuMemcpySync") != std::string::npos) {
memcpy_sync->calls += item.calls;
memcpy_sync->total_time += item.total_time;
memcpy_sync->ratio += item.ratio;
}
}
void ComputeOverhead(const std::vector<EventItem> &main_event_items,
const std::multimap<std::string, EventItem> &sub_child_map,
OverHead *overhead) { OverHead *overhead) {
EventItem memcpy_async = { EventItem memcpy_async = {
"GpuMemcpyAsync", 0, 0., 0., 0., 0., 0., 0., 0.0f, EventRole::kOrdinary}; "GpuMemcpyAsync", 0, 0., 0., 0., 0., 0., 0., 0.0f, EventRole::kOrdinary};
EventItem memcpy_sync = {"GpuMemcpySync", 0, 0., 0., 0., 0., 0., 0., 0.0f, EventItem memcpy_sync = {"GpuMemcpySync", 0, 0., 0., 0., 0., 0., 0., 0.0f,
EventRole::kOrdinary}; EventRole::kOrdinary};
// GpuMemcpy may be in main_event_items
for (auto &item : main_event_items) {
UpdateGpuMemcpy(item, &memcpy_async, &memcpy_sync);
}
for (auto it = sub_child_map.begin(); it != sub_child_map.end(); it++) { for (auto it = sub_child_map.begin(); it != sub_child_map.end(); it++) {
if (it->second.name.find("compute") != std::string::npos) { if (it->second.name.find("compute") != std::string::npos &&
it->second.name.find("compute/") == std::string::npos) {
overhead->compute_ratio += it->second.ratio; overhead->compute_ratio += it->second.ratio;
} }
if (it->second.name.find("GpuMemcpyAsync") != std::string::npos) { UpdateGpuMemcpy(it->second, &memcpy_async, &memcpy_sync);
memcpy_async.calls += it->second.calls;
memcpy_async.total_time += it->second.total_time;
memcpy_async.ratio += it->second.ratio;
} else if (it->second.name.find("GpuMemcpySync") != std::string::npos) {
memcpy_sync.calls += it->second.calls;
memcpy_sync.total_time += it->second.total_time;
memcpy_sync.ratio += it->second.ratio;
}
} }
overhead->framework_ratio = 1.0f - overhead->compute_ratio; overhead->framework_ratio = 1.0f - overhead->compute_ratio;
overhead->memcpy_item.calls = memcpy_async.calls + memcpy_sync.calls; overhead->memcpy_item.calls = memcpy_async.calls + memcpy_sync.calls;
...@@ -637,7 +649,7 @@ void AnalyzeEvent( ...@@ -637,7 +649,7 @@ void AnalyzeEvent(
if ((*analyze_events).size() == 1) { if ((*analyze_events).size() == 1) {
overhead->total_time = total; overhead->total_time = total;
overhead->print = true; overhead->print = true;
ComputeOverhead(sub_child_map, overhead); ComputeOverhead(main_event_items, sub_child_map, overhead);
} }
// sort // sort
if (sorted_by != EventSortingKey::kDefault) { if (sorted_by != EventSortingKey::kDefault) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册