未验证 提交 ca9c8b41 编写于 作者: Z Zhang Ting 提交者: GitHub

fix compute ratio of profile, test=develop (#22872)

上级 dbb0b9b3
......@@ -355,25 +355,37 @@ void SetEvent(bool merge_thread, const Event &analyze_event,
}
}
void ComputeOverhead(const std::multimap<std::string, EventItem> &sub_child_map,
void UpdateGpuMemcpy(const EventItem &item, EventItem *memcpy_async,
EventItem *memcpy_sync) {
if (item.name.find("GpuMemcpyAsync") != std::string::npos) {
memcpy_async->calls += item.calls;
memcpy_async->total_time += item.total_time;
memcpy_async->ratio += item.ratio;
} else if (item.name.find("GpuMemcpySync") != std::string::npos) {
memcpy_sync->calls += item.calls;
memcpy_sync->total_time += item.total_time;
memcpy_sync->ratio += item.ratio;
}
}
void ComputeOverhead(const std::vector<EventItem> &main_event_items,
const std::multimap<std::string, EventItem> &sub_child_map,
OverHead *overhead) {
EventItem memcpy_async = {
"GpuMemcpyAsync", 0, 0., 0., 0., 0., 0., 0., 0.0f, EventRole::kOrdinary};
EventItem memcpy_sync = {"GpuMemcpySync", 0, 0., 0., 0., 0., 0., 0., 0.0f,
EventRole::kOrdinary};
// GpuMemcpy may be in main_event_items
for (auto &item : main_event_items) {
UpdateGpuMemcpy(item, &memcpy_async, &memcpy_sync);
}
for (auto it = sub_child_map.begin(); it != sub_child_map.end(); it++) {
if (it->second.name.find("compute") != std::string::npos) {
if (it->second.name.find("compute") != std::string::npos &&
it->second.name.find("compute/") == std::string::npos) {
overhead->compute_ratio += it->second.ratio;
}
if (it->second.name.find("GpuMemcpyAsync") != std::string::npos) {
memcpy_async.calls += it->second.calls;
memcpy_async.total_time += it->second.total_time;
memcpy_async.ratio += it->second.ratio;
} else if (it->second.name.find("GpuMemcpySync") != std::string::npos) {
memcpy_sync.calls += it->second.calls;
memcpy_sync.total_time += it->second.total_time;
memcpy_sync.ratio += it->second.ratio;
}
UpdateGpuMemcpy(it->second, &memcpy_async, &memcpy_sync);
}
overhead->framework_ratio = 1.0f - overhead->compute_ratio;
overhead->memcpy_item.calls = memcpy_async.calls + memcpy_sync.calls;
......@@ -637,7 +649,7 @@ void AnalyzeEvent(
if ((*analyze_events).size() == 1) {
overhead->total_time = total;
overhead->print = true;
ComputeOverhead(sub_child_map, overhead);
ComputeOverhead(main_event_items, sub_child_map, overhead);
}
// sort
if (sorted_by != EventSortingKey::kDefault) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册