Confirm the contents in profiling report

0f441075 · Yibing Liu · 770aff2c · 0f441075 · 0f441075 · 0f441075
Showing with 43 addition and 16 deletion

paddle/platform/profiler.cc paddle/platform/profiler.cc +36 -16

paddle/platform/profiler.h paddle/platform/profiler.h +1 -0

paddle/platform/profiler_test.cc paddle/platform/profiler_test.cc +6 -0

未找到文件。
--- a/paddle/platform/profiler.cc
+++ b/paddle/platform/profiler.cc
@@ -3,7 +3,7 @@
 licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
-`
+
    http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */

 #include "paddle/platform/profiler.h"
+#include <iomanip>
 #include <map>

 namespace paddle {
@@ -183,7 +184,8 @@ void PopEvent(const std::string& name, DeviceContext* dev_ctx) {

 void ParseEvents(std::vector<std::vector<Event>>& events) {
  // Event name :: counts :: ave  ::  min   ::  max :: total
-  std::map<std::string, std::tuple<int, double, double>> events_table;
+  std::map<std::string, std::tuple<int, double, double, double, double>>
+      events_table;
  for (size_t i = 0; i < events.size(); i++) {
    std::list<Event> pushed_events;
    for (size_t j = 0; j < events[i].size(); j++) {
@@ -197,18 +199,28 @@ void ParseEvents(std::vector<std::vector<Event>>& events) {
          ++rit;
        }
        if (rit != pushed_events.rend()) {
-          Event pushed_event = *rit;
-          double cpu_time = rit->CpuElapsedUs(events[i][j]);
-          double cuda_time = 0;
 #ifdef PADDLE_WITH_CUDA
-          cuda_time = rit->CudaElapsedUs(events[i][j]);
+          double event_time = rit->CudaElapsedUs(events[i][j]);
+#else
+          double event_time = rit->CpuElapsedUs(events[i][j]);
 #endif
-          if (events_table.find(rit->name()) == events_table.end()) {
-            events_table[rit->name()] = std::make_tuple(1, cpu_time, cuda_time);
+          std::string event_name =
+              "thread" + std::to_string(rit->thread_id()) + "::" + rit->name();
+          if (events_table.find(event_name) == events_table.end()) {
+            events_table[event_name] =
+                std::make_tuple(1, event_time, event_time, event_time, 0);
          } else {
-            std::get<0>(events_table[rit->name()]) += 1;
-            std::get<1>(events_table[rit->name()]) += cpu_time;
-            std::get<2>(events_table[rit->name()]) += cuda_time;
+            std::get<0>(events_table[event_name]) += 1;
+            // total time
+            std::get<1>(events_table[event_name]) += event_time;
+            // min time
+            if (std::get<2>(events_table[event_name]) > event_time) {
+              std::get<2>(events_table[event_name]) = event_time;
+            }
+            // max time
+            if (std::get<3>(events_table[event_name]) < event_time) {
+              std::get<3>(events_table[event_name]) = event_time;
+            }
          }
          // remove the start marker from the list
          pushed_events.erase((++rit).base());
@@ -220,13 +232,21 @@ void ParseEvents(std::vector<std::vector<Event>>& events) {
    }
  }
  // output events table
-  std::cout << "\nEvents\t\tCalls\t\tTotal CPU time\t\tTotal GPU time\n";
-  for (std::map<std::string, std::tuple<int, double, double>>::iterator it =
+  std::cout << std::setw(20) << "Events" << std::setw(10) << "Calls"
+            << std::setw(10) << "Total" << std::setw(10) << "Min"
+            << std::setw(10) << "Max" << std::setw(10) << "Ave" << std::endl;
+  for (std::map<std::string,
+                std::tuple<int, double, double, double, double>>::iterator it =
           events_table.begin();
       it != events_table.end(); ++it) {
-    std::cout << it->first << "\t\t" << std::get<0>(it->second) << "\t\t"
-              << std::get<1>(it->second) << "\t\t" << std::get<2>(it->second)
-              << std::endl;
+    // average time
+    std::get<4>(it->second) = std::get<1>(it->second) / std::get<0>(it->second);
+    std::cout << std::setw(20) << it->first << std::setw(10)
+              << std::get<0>(it->second) << std::setw(10)
+              << std::get<1>(it->second) << std::setw(10)
+              << std::get<2>(it->second) << std::setw(10)
+              << std::get<3>(it->second) << std::setw(10)
+              << std::get<4>(it->second) << std::endl;
  }
 }


--- a/paddle/platform/profiler.h
+++ b/paddle/platform/profiler.h
@@ -33,6 +33,7 @@ class Event {

  std::string kind() const;
  std::string name() const { return name_; }
+  uint32_t thread_id() const { return thread_id_; }
  bool has_cuda() const { return has_cuda_; }

 #ifdef PADDLE_WITH_CUDA

--- a/paddle/platform/profiler_test.cc
+++ b/paddle/platform/profiler_test.cc
@@ -94,6 +94,12 @@ TEST(RecordEvent, RecordEvent) {
    int counter = 1;
    while (counter != i * 1000) counter++;
  }
+  for (int i = 1; i < 5; ++i) {
+    std::string name = "evs_op_" + std::to_string(i);
+    RecordEvent record_event(name, dev_ctx);
+    int counter = 1;
+    while (counter != i * 1000) counter++;
+  }
  std::vector<std::vector<Event>> events = paddle::platform::DisableProfiler();
  int cuda_startup_count = 0;
  int start_profiler_count = 0;