fix compute ratio of profile, test=develop (#22872)

ca9c8b41 · Zhang Ting · GitHub · dbb0b9b3 · ca9c8b41
显示空白变更内容
内联并排

Showing with 24 addition and 12 deletion

paddle/fluid/platform/profiler_helper.h paddle/fluid/platform/profiler_helper.h +24 -12

未找到文件。
--- a/paddle/fluid/platform/profiler_helper.h
+++ b/paddle/fluid/platform/profiler_helper.h
@@ -355,25 +355,37 @@ void SetEvent(bool merge_thread, const Event &analyze_event,
  }
 }
-void ComputeOverhead(const std::multimap<std::string, EventItem> &sub_child_map,
+void UpdateGpuMemcpy(const EventItem &item, EventItem *memcpy_async,
+                     EventItem *memcpy_sync) {
+  if (item.name.find("GpuMemcpyAsync") != std::string::npos) {
+    memcpy_async->calls += item.calls;
+    memcpy_async->total_time += item.total_time;
+    memcpy_async->ratio += item.ratio;
+  } else if (item.name.find("GpuMemcpySync") != std::string::npos) {
+    memcpy_sync->calls += item.calls;
+    memcpy_sync->total_time += item.total_time;
+    memcpy_sync->ratio += item.ratio;
+  }
+}
+void ComputeOverhead(const std::vector<EventItem> &main_event_items,
+                     const std::multimap<std::string, EventItem> &sub_child_map,
                     OverHead *overhead) {
  EventItem memcpy_async = {
      "GpuMemcpyAsync", 0, 0., 0., 0., 0., 0., 0., 0.0f, EventRole::kOrdinary};
  EventItem memcpy_sync = {"GpuMemcpySync",     0, 0., 0., 0., 0., 0., 0., 0.0f,
                           EventRole::kOrdinary};
+  // GpuMemcpy may be in main_event_items
+  for (auto &item : main_event_items) {
+    UpdateGpuMemcpy(item, &memcpy_async, &memcpy_sync);
+  }
  for (auto it = sub_child_map.begin(); it != sub_child_map.end(); it++) {
-    if (it->second.name.find("compute") != std::string::npos) {
+    if (it->second.name.find("compute") != std::string::npos &&
+        it->second.name.find("compute/") == std::string::npos) {
      overhead->compute_ratio += it->second.ratio;
    }
-    if (it->second.name.find("GpuMemcpyAsync") != std::string::npos) {
+    UpdateGpuMemcpy(it->second, &memcpy_async, &memcpy_sync);
-      memcpy_async.calls += it->second.calls;
-      memcpy_async.total_time += it->second.total_time;
-      memcpy_async.ratio += it->second.ratio;
-    } else if (it->second.name.find("GpuMemcpySync") != std::string::npos) {
-      memcpy_sync.calls += it->second.calls;
-      memcpy_sync.total_time += it->second.total_time;
-      memcpy_sync.ratio += it->second.ratio;
-    }
  }
  overhead->framework_ratio = 1.0f - overhead->compute_ratio;
  overhead->memcpy_item.calls = memcpy_async.calls + memcpy_sync.calls;
@@ -637,7 +649,7 @@ void AnalyzeEvent(
    if ((*analyze_events).size() == 1) {
      overhead->total_time = total;
      overhead->print = true;
-      ComputeOverhead(sub_child_map, overhead);
+      ComputeOverhead(main_event_items, sub_child_map, overhead);
    }
    // sort
    if (sorted_by != EventSortingKey::kDefault) {