提交 0f441075 编写于 作者: Y Yibing Liu

Confirm the contents in profiling report

上级 770aff2c
......@@ -3,7 +3,7 @@
licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
`
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/platform/profiler.h"
#include <iomanip>
#include <map>
namespace paddle {
......@@ -183,7 +184,8 @@ void PopEvent(const std::string& name, DeviceContext* dev_ctx) {
void ParseEvents(std::vector<std::vector<Event>>& events) {
// Event name :: counts :: ave :: min :: max :: total
std::map<std::string, std::tuple<int, double, double>> events_table;
std::map<std::string, std::tuple<int, double, double, double, double>>
events_table;
for (size_t i = 0; i < events.size(); i++) {
std::list<Event> pushed_events;
for (size_t j = 0; j < events[i].size(); j++) {
......@@ -197,18 +199,28 @@ void ParseEvents(std::vector<std::vector<Event>>& events) {
++rit;
}
if (rit != pushed_events.rend()) {
Event pushed_event = *rit;
double cpu_time = rit->CpuElapsedUs(events[i][j]);
double cuda_time = 0;
#ifdef PADDLE_WITH_CUDA
cuda_time = rit->CudaElapsedUs(events[i][j]);
double event_time = rit->CudaElapsedUs(events[i][j]);
#else
double event_time = rit->CpuElapsedUs(events[i][j]);
#endif
if (events_table.find(rit->name()) == events_table.end()) {
events_table[rit->name()] = std::make_tuple(1, cpu_time, cuda_time);
std::string event_name =
"thread" + std::to_string(rit->thread_id()) + "::" + rit->name();
if (events_table.find(event_name) == events_table.end()) {
events_table[event_name] =
std::make_tuple(1, event_time, event_time, event_time, 0);
} else {
std::get<0>(events_table[rit->name()]) += 1;
std::get<1>(events_table[rit->name()]) += cpu_time;
std::get<2>(events_table[rit->name()]) += cuda_time;
std::get<0>(events_table[event_name]) += 1;
// total time
std::get<1>(events_table[event_name]) += event_time;
// min time
if (std::get<2>(events_table[event_name]) > event_time) {
std::get<2>(events_table[event_name]) = event_time;
}
// max time
if (std::get<3>(events_table[event_name]) < event_time) {
std::get<3>(events_table[event_name]) = event_time;
}
}
// remove the start marker from the list
pushed_events.erase((++rit).base());
......@@ -220,13 +232,21 @@ void ParseEvents(std::vector<std::vector<Event>>& events) {
}
}
// output events table
std::cout << "\nEvents\t\tCalls\t\tTotal CPU time\t\tTotal GPU time\n";
for (std::map<std::string, std::tuple<int, double, double>>::iterator it =
std::cout << std::setw(20) << "Events" << std::setw(10) << "Calls"
<< std::setw(10) << "Total" << std::setw(10) << "Min"
<< std::setw(10) << "Max" << std::setw(10) << "Ave" << std::endl;
for (std::map<std::string,
std::tuple<int, double, double, double, double>>::iterator it =
events_table.begin();
it != events_table.end(); ++it) {
std::cout << it->first << "\t\t" << std::get<0>(it->second) << "\t\t"
<< std::get<1>(it->second) << "\t\t" << std::get<2>(it->second)
<< std::endl;
// average time
std::get<4>(it->second) = std::get<1>(it->second) / std::get<0>(it->second);
std::cout << std::setw(20) << it->first << std::setw(10)
<< std::get<0>(it->second) << std::setw(10)
<< std::get<1>(it->second) << std::setw(10)
<< std::get<2>(it->second) << std::setw(10)
<< std::get<3>(it->second) << std::setw(10)
<< std::get<4>(it->second) << std::endl;
}
}
......
......@@ -33,6 +33,7 @@ class Event {
std::string kind() const;
std::string name() const { return name_; }
uint32_t thread_id() const { return thread_id_; }
bool has_cuda() const { return has_cuda_; }
#ifdef PADDLE_WITH_CUDA
......
......@@ -94,6 +94,12 @@ TEST(RecordEvent, RecordEvent) {
int counter = 1;
while (counter != i * 1000) counter++;
}
for (int i = 1; i < 5; ++i) {
std::string name = "evs_op_" + std::to_string(i);
RecordEvent record_event(name, dev_ctx);
int counter = 1;
while (counter != i * 1000) counter++;
}
std::vector<std::vector<Event>> events = paddle::platform::DisableProfiler();
int cuda_startup_count = 0;
int start_profiler_count = 0;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册