diff --git a/paddle/platform/profiler.cc b/paddle/platform/profiler.cc index 40b34b732c9d37a34ec3508370bd01d33f592140..3d95097048c8cc7db720fc4b8a00dab6256513e5 100644 --- a/paddle/platform/profiler.cc +++ b/paddle/platform/profiler.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/platform/profiler.h" +#include namespace paddle { namespace platform { @@ -70,5 +71,63 @@ std::vector> DisableProfiler() { return result; } +void PushEvent(const std::string name, const platform::DeviceContext* dev_ctx) { + GetEventList().Record(EventKind::kPushRange, std::move(name), kThreadId, + dev_ctx); +} + +void PopEvent(const std::string name, const platform::DeviceContext* dev_ctx) { + GetEventList().Record(EventKind::kPopRange, std::move(name), kThreadId, + dev_ctx); +} + +void ParseEvents(std::vector> events) { + std::map> events_table; + for (size_t i = 0; i < events.size(); i++) { + std::list pushed_events; + for (size_t j = 0; j < events[i].size(); j++) { + if (events[i][j].kind() == "push") { + pushed_events.push_back(events[i][j]); + } + if (events[i][j].kind() == "pop") { + std::list::reverse_iterator rit = pushed_events.rbegin(); + while (rit->name() != events[i][j].name() && + rit != pushed_events.rend()) { + ++rit; + } + if (rit != pushed_events.rend()) { + Event pushed_event = *rit; + double cpu_time = rit->CpuElapsedUs(events[i][j]); + double cuda_time = 0; +#ifdef PADDLE_WITH_CUDA + cuda_time = rit->CudaElapsedUs(events[i][j]); +#endif + if (events_table.find(rit->name()) == events_table.end()) { + events_table[rit->name()] = std::make_tuple(1, cpu_time, cuda_time); + } else { + std::get<0>(events_table[rit->name()]) += 1; + std::get<1>(events_table[rit->name()]) += cpu_time; + std::get<2>(events_table[rit->name()]) += cuda_time; + } + // remove the start marker from the list + pushed_events.erase((++rit).base()); + } else { + std::cout << "Warning: can not find the start marker of event " + << events[i][j].name(); + } + } + } + } + // output events table + std::cout << "\nEvents\t\tCalls\t\tTotal CPU time\t\tTotal GPU time\n"; + for (std::map>::iterator it = + events_table.begin(); + it != events_table.end(); ++it) { + std::cout << it->first << "\t\t" << std::get<0>(it->second) << "\t\t" + << std::get<1>(it->second) << "\t\t" << std::get<2>(it->second) + << std::endl; + } +} + } // namespace platform } // namespace paddle diff --git a/paddle/platform/profiler.h b/paddle/platform/profiler.h index 2242635024ca6a31100973017de24ab570ee5b14..5f21ff8c1c39323b5477607fc74e08b43420de23 100644 --- a/paddle/platform/profiler.h +++ b/paddle/platform/profiler.h @@ -173,25 +173,35 @@ inline void Mark(const std::string name, GetEventList().Record(EventKind::kMark, std::move(name), kThreadId, dev_ctx); } +void PushEvent(const std::string name, + const platform::DeviceContext* dev_ctx = nullptr); + +void PopEvent(const std::string name, + const platform::DeviceContext* dev_ctx = nullptr); + struct RecordEvent { explicit RecordEvent(const std::string name, platform::DeviceContext* dev_ctx = nullptr) { if (kState == ProfilerState::kDisabled) return; dev_ctx_ = dev_ctx; + name_ = name; GetEventList().Record(EventKind::kPushRange, std::move(name), kThreadId, dev_ctx_); } ~RecordEvent() { if (kState == ProfilerState::kDisabled) return; - GetEventList().Record(EventKind::kPopRange, std::string(), kThreadId, + GetEventList().Record(EventKind::kPopRange, std::move(name_), kThreadId, dev_ctx_); } platform::DeviceContext* dev_ctx_; + std::string name_; }; void EnableProfiler(ProfilerState state); std::vector> DisableProfiler(); +void ParseEvents(std::vector>); + } // namespace platform } // namespace paddle diff --git a/paddle/platform/profiler_test.cc b/paddle/platform/profiler_test.cc index 5bd0a9d8599306e7437fecefa9fcaa6a00574ad6..b2f1dea46596cd9f302dea8586931016bd56f75d 100644 --- a/paddle/platform/profiler_test.cc +++ b/paddle/platform/profiler_test.cc @@ -67,8 +67,29 @@ TEST(RecordEvent, RecordEvent) { #endif EnableProfiler(state); + /* Usage 1: + * PushEvent(evt_name, dev_ctx); + * ... + * code to time + * ... + * PopEvent(evt_name, dev_ctx); + */ for (int i = 1; i < 5; ++i) { std::string name = "op_" + std::to_string(i); + PushEvent(name, dev_ctx); + int counter = 1; + while (counter != i * 1000) counter++; + PopEvent(name, dev_ctx); + } + + /* Usage 2: + * { + * RecordEvent record_event(name, dev_ctx); + * ... + * } + */ + for (int i = 1; i < 5; ++i) { + std::string name = "evs_op_" + std::to_string(i); RecordEvent record_event(name, dev_ctx); int counter = 1; while (counter != i * 1000) counter++; @@ -77,6 +98,7 @@ TEST(RecordEvent, RecordEvent) { int cuda_startup_count = 0; int start_profiler_count = 0; int stop_profiler_count = 0; + ParseEvents(events); for (size_t i = 0; i < events.size(); ++i) { for (size_t j = 0; j < events[i].size(); ++j) { if (events[i][j].name() == "_cuda_startup_") ++cuda_startup_count;