提交 0f441075 编写于 作者: Y Yibing Liu

Confirm the contents in profiling report

上级 770aff2c
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
licensed under the Apache License, Version 2.0 (the "License"); licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
You may obtain a copy of the License at You may obtain a copy of the License at
`
http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software Unless required by applicable law or agreed to in writing, software
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/platform/profiler.h" #include "paddle/platform/profiler.h"
#include <iomanip>
#include <map> #include <map>
namespace paddle { namespace paddle {
...@@ -183,7 +184,8 @@ void PopEvent(const std::string& name, DeviceContext* dev_ctx) { ...@@ -183,7 +184,8 @@ void PopEvent(const std::string& name, DeviceContext* dev_ctx) {
void ParseEvents(std::vector<std::vector<Event>>& events) { void ParseEvents(std::vector<std::vector<Event>>& events) {
// Event name :: counts :: ave :: min :: max :: total // Event name :: counts :: ave :: min :: max :: total
std::map<std::string, std::tuple<int, double, double>> events_table; std::map<std::string, std::tuple<int, double, double, double, double>>
events_table;
for (size_t i = 0; i < events.size(); i++) { for (size_t i = 0; i < events.size(); i++) {
std::list<Event> pushed_events; std::list<Event> pushed_events;
for (size_t j = 0; j < events[i].size(); j++) { for (size_t j = 0; j < events[i].size(); j++) {
...@@ -197,18 +199,28 @@ void ParseEvents(std::vector<std::vector<Event>>& events) { ...@@ -197,18 +199,28 @@ void ParseEvents(std::vector<std::vector<Event>>& events) {
++rit; ++rit;
} }
if (rit != pushed_events.rend()) { if (rit != pushed_events.rend()) {
Event pushed_event = *rit;
double cpu_time = rit->CpuElapsedUs(events[i][j]);
double cuda_time = 0;
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
cuda_time = rit->CudaElapsedUs(events[i][j]); double event_time = rit->CudaElapsedUs(events[i][j]);
#else
double event_time = rit->CpuElapsedUs(events[i][j]);
#endif #endif
if (events_table.find(rit->name()) == events_table.end()) { std::string event_name =
events_table[rit->name()] = std::make_tuple(1, cpu_time, cuda_time); "thread" + std::to_string(rit->thread_id()) + "::" + rit->name();
if (events_table.find(event_name) == events_table.end()) {
events_table[event_name] =
std::make_tuple(1, event_time, event_time, event_time, 0);
} else { } else {
std::get<0>(events_table[rit->name()]) += 1; std::get<0>(events_table[event_name]) += 1;
std::get<1>(events_table[rit->name()]) += cpu_time; // total time
std::get<2>(events_table[rit->name()]) += cuda_time; std::get<1>(events_table[event_name]) += event_time;
// min time
if (std::get<2>(events_table[event_name]) > event_time) {
std::get<2>(events_table[event_name]) = event_time;
}
// max time
if (std::get<3>(events_table[event_name]) < event_time) {
std::get<3>(events_table[event_name]) = event_time;
}
} }
// remove the start marker from the list // remove the start marker from the list
pushed_events.erase((++rit).base()); pushed_events.erase((++rit).base());
...@@ -220,13 +232,21 @@ void ParseEvents(std::vector<std::vector<Event>>& events) { ...@@ -220,13 +232,21 @@ void ParseEvents(std::vector<std::vector<Event>>& events) {
} }
} }
// output events table // output events table
std::cout << "\nEvents\t\tCalls\t\tTotal CPU time\t\tTotal GPU time\n"; std::cout << std::setw(20) << "Events" << std::setw(10) << "Calls"
for (std::map<std::string, std::tuple<int, double, double>>::iterator it = << std::setw(10) << "Total" << std::setw(10) << "Min"
<< std::setw(10) << "Max" << std::setw(10) << "Ave" << std::endl;
for (std::map<std::string,
std::tuple<int, double, double, double, double>>::iterator it =
events_table.begin(); events_table.begin();
it != events_table.end(); ++it) { it != events_table.end(); ++it) {
std::cout << it->first << "\t\t" << std::get<0>(it->second) << "\t\t" // average time
<< std::get<1>(it->second) << "\t\t" << std::get<2>(it->second) std::get<4>(it->second) = std::get<1>(it->second) / std::get<0>(it->second);
<< std::endl; std::cout << std::setw(20) << it->first << std::setw(10)
<< std::get<0>(it->second) << std::setw(10)
<< std::get<1>(it->second) << std::setw(10)
<< std::get<2>(it->second) << std::setw(10)
<< std::get<3>(it->second) << std::setw(10)
<< std::get<4>(it->second) << std::endl;
} }
} }
......
...@@ -33,6 +33,7 @@ class Event { ...@@ -33,6 +33,7 @@ class Event {
std::string kind() const; std::string kind() const;
std::string name() const { return name_; } std::string name() const { return name_; }
uint32_t thread_id() const { return thread_id_; }
bool has_cuda() const { return has_cuda_; } bool has_cuda() const { return has_cuda_; }
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
......
...@@ -94,6 +94,12 @@ TEST(RecordEvent, RecordEvent) { ...@@ -94,6 +94,12 @@ TEST(RecordEvent, RecordEvent) {
int counter = 1; int counter = 1;
while (counter != i * 1000) counter++; while (counter != i * 1000) counter++;
} }
for (int i = 1; i < 5; ++i) {
std::string name = "evs_op_" + std::to_string(i);
RecordEvent record_event(name, dev_ctx);
int counter = 1;
while (counter != i * 1000) counter++;
}
std::vector<std::vector<Event>> events = paddle::platform::DisableProfiler(); std::vector<std::vector<Event>> events = paddle::platform::DisableProfiler();
int cuda_startup_count = 0; int cuda_startup_count = 0;
int start_profiler_count = 0; int start_profiler_count = 0;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册