提交 41826a31 编写于 作者: 石晓伟 提交者: GitHub

update profiler, test=develop (#2607)

* update profiler, test=develop

* warm up times of profiler, test=develop
上级 b65a6dc9
...@@ -72,10 +72,6 @@ void Run(const std::vector<std::vector<int64_t>>& input_shapes, ...@@ -72,10 +72,6 @@ void Run(const std::vector<std::vector<int64_t>>& input_shapes,
const int thread_num, const int thread_num,
const int repeat, const int repeat,
const int warmup_times = 0) { const int warmup_times = 0) {
#ifdef LITE_WITH_PROFILE
lite::profile::BasicProfiler<lite::profile::BasicTimer>::Global().SetWarmup(
warmup_times);
#endif
lite_api::MobileConfig config; lite_api::MobileConfig config;
config.set_model_dir(model_dir); config.set_model_dir(model_dir);
config.set_power_mode(power_mode); config.set_power_mode(power_mode);
......
...@@ -21,6 +21,13 @@ namespace paddle { ...@@ -21,6 +21,13 @@ namespace paddle {
namespace lite { namespace lite {
namespace profile { namespace profile {
namespace {
auto op_comp = [](const OpCharacter& c1, const OpCharacter& c2) {
return (c1.target < c2.target) || (c1.op_type < c2.op_type) ||
(c1.kernel_name < c2.kernel_name) || (c1.remark < c2.remark);
};
}
int Profiler::NewTimer(const OpCharacter& ch) { int Profiler::NewTimer(const OpCharacter& ch) {
StatisUnit unit; StatisUnit unit;
unit.character = ch; unit.character = ch;
...@@ -50,61 +57,66 @@ float Profiler::StopTiming(const int index, KernelContext* ctx) { ...@@ -50,61 +57,66 @@ float Profiler::StopTiming(const int index, KernelContext* ctx) {
return units_[index].timer->Stop(ctx); return units_[index].timer->Stop(ctx);
} }
std::string Profiler::Summary(bool concise) { std::string Profiler::Summary(bool concise, size_t w) {
using std::setw;
using std::left;
using std::fixed;
STL::stringstream ss; STL::stringstream ss;
auto cout_title = [&ss](const std::string& title, const std::string& name) { std::string title;
// clang-format off // Title.
ss << "===== " << title << ": " << name << " =====" << std::endl; if (concise) {
ss << std::setw(25) << std::left << "Operator Type" \ ss << "Timing cycle = " << units_.front().timer->LapTimes().Size()
<< std::setw(40) << std::left << "Kernel Name" \
<< std::setw(10) << std::left << "Remark" \
<< std::setw(10) << std::left << "Avg (ms)" \
<< std::setw(10) << std::left << "Min (ms)" \
<< std::setw(10) << std::left << "Max (ms)" \
<< std::endl; << std::endl;
// clang-format on ss << "===== Concise Profiler Summary: " << name_ << ", Exclude " << w
}; << " warm-ups =====" << std::endl;
} else {
ss << "===== Detailed Profiler Summary: " << name_ << ", Exclude " << w
<< " warm-ups =====" << std::endl;
}
ss << setw(25) << left << "Operator Type"
<< " " << setw(40) << left << "Kernel Name"
<< " " << setw(12) << left << "Remark"
<< " " << setw(12) << left << "Avg (ms)"
<< " " << setw(12) << left << "Min (ms)"
<< " " << setw(12) << left << "Max (ms)"
<< " " << setw(12) << left << "Last (ms)" << std::endl;
// Profile information.
if (concise) { if (concise) {
auto op_comp = [](const OpCharacter& c1, const OpCharacter& c2) {
return (c1.target < c2.target) || (c1.op_type < c2.op_type) ||
(c1.kernel_name < c2.kernel_name) || (c1.remark < c2.remark);
};
std::map<OpCharacter, TimeInfo, decltype(op_comp)> summary(op_comp); std::map<OpCharacter, TimeInfo, decltype(op_comp)> summary(op_comp);
for (auto& unit : units_) { for (auto& unit : units_) {
auto ch = summary.find(unit.character); auto ch = summary.find(unit.character);
if (ch != summary.end()) { if (ch != summary.end()) {
ch->second.avg += unit.timer->LapTimes().Avg(); ch->second.avg += unit.timer->LapTimes().Avg(w);
ch->second.min += unit.timer->LapTimes().Min(); ch->second.min += unit.timer->LapTimes().Min(w);
ch->second.max += unit.timer->LapTimes().Max(); ch->second.max += unit.timer->LapTimes().Max(w);
} else { } else {
TimeInfo info({unit.timer->LapTimes().Avg(), TimeInfo info({unit.timer->LapTimes().Avg(w),
unit.timer->LapTimes().Min(), unit.timer->LapTimes().Min(w),
unit.timer->LapTimes().Max()}); unit.timer->LapTimes().Max(w)});
summary.insert({unit.character, info}); summary.insert({unit.character, info});
} }
} }
cout_title("Concise Profiler Summary", name_);
for (const auto& item : summary) { for (const auto& item : summary) {
// clang-format off // clang-format off
ss << std::setw(25) << std::left << item.first.op_type \ ss << setw(25) << left << fixed << item.first.op_type \
<< std::setw(40) << std::left << item.first.kernel_name \ << " " << setw(40) << left << fixed << item.first.kernel_name \
<< std::setw(10) << std::left << item.first.remark \ << " " << setw(12) << left << fixed << item.first.remark \
<< std::setw(10) << std::left << item.second.avg \ << " " << setw(12) << left << fixed << item.second.avg \
<< std::setw(10) << std::left << item.second.min \ << " " << setw(12) << left << fixed << item.second.min \
<< std::setw(10) << std::left << item.second.max \ << " " << setw(12) << left << fixed << item.second.max \
<< std::endl; << " " << std::endl;
// clang-format on // clang-format on
} }
} else { } else {
cout_title("Detailed Profiler Summary", name_);
for (auto& unit : units_) { for (auto& unit : units_) {
// clang-format off // clang-format off
ss << std::setw(25) << std::left << unit.character.op_type \ ss << setw(25) << left << fixed << unit.character.op_type \
<< std::setw(40) << std::left << unit.character.kernel_name \ << " " << setw(40) << left << fixed << unit.character.kernel_name \
<< std::setw(10) << std::left << unit.character.remark \ << " " << setw(12) << left << fixed << unit.character.remark \
<< std::setw(10) << std::left << unit.timer->LapTimes().Avg() \ << " " << setw(12) << left << fixed << unit.timer->LapTimes().Avg(w) \
<< std::setw(10) << std::left << unit.timer->LapTimes().Min() \ << " " << setw(12) << left << fixed << unit.timer->LapTimes().Min(w) \
<< std::setw(10) << std::left << unit.timer->LapTimes().Max() \ << " " << setw(12) << left << fixed << unit.timer->LapTimes().Max(w) \
<< " " << setw(12) << left << fixed << unit.timer->LapTimes().Last(w) \
<< std::endl; << std::endl;
// clang-format on // clang-format on
} }
......
...@@ -47,7 +47,7 @@ class Profiler final { ...@@ -47,7 +47,7 @@ class Profiler final {
int NewTimer(const OpCharacter& ch); int NewTimer(const OpCharacter& ch);
void StartTiming(const int index, KernelContext* ctx); void StartTiming(const int index, KernelContext* ctx);
float StopTiming(const int index, KernelContext* ctx); float StopTiming(const int index, KernelContext* ctx);
std::string Summary(bool concise = true); std::string Summary(bool concise = true, size_t warm_up = 10);
private: private:
std::string name_{std::string("N/A")}; std::string name_{std::string("N/A")};
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#pragma once #pragma once
#include <algorithm> #include <algorithm>
#include <chrono> // NOLINT #include <chrono> // NOLINT
#include <list> #include <vector>
#ifdef LITE_WITH_CUDA #ifdef LITE_WITH_CUDA
#include "lite/backends/cuda/cuda_utils.h" #include "lite/backends/cuda/cuda_utils.h"
#endif #endif
...@@ -30,20 +30,44 @@ class TimeList { ...@@ -30,20 +30,44 @@ class TimeList {
public: public:
void Clear() { laps_t_.clear(); } void Clear() { laps_t_.clear(); }
void Add(T t) { laps_t_.push_back(t); } void Add(T t) { laps_t_.push_back(t); }
T Max() const { return *std::max_element(laps_t_.begin(), laps_t_.end()); } T Last(size_t offset = 0) const {
T Min() const { return *std::min_element(laps_t_.begin(), laps_t_.end()); } if (!Size(offset)) {
T Sum() const { return std::accumulate(laps_t_.begin(), laps_t_.end(), 0.0); }
size_t Size() const { return laps_t_.size(); }
T Avg() const {
if (!Size()) {
return 0; return 0;
} }
return Sum() / Size(); return laps_t_.back();
} }
const std::list<T>& Raw() const { return laps_t_; } T Max(size_t offset = 0) const {
if (!Size(offset)) {
return 0;
}
return *std::max_element((laps_t_.begin() + offset), laps_t_.end());
}
T Min(size_t offset = 0) const {
if (!Size(offset)) {
return 0;
}
return *std::min_element((laps_t_.begin() + offset), laps_t_.end());
}
T Sum(size_t offset = 0) const {
if (!Size(offset)) {
return 0;
}
return std::accumulate((laps_t_.begin() + offset), laps_t_.end(), 0.0);
}
size_t Size(size_t offset = 0) const {
size_t size = (laps_t_.size() <= offset) ? 0 : (laps_t_.size() - offset);
return size;
}
T Avg(size_t offset = 0) const {
if (!Size(offset)) {
return 0;
}
return Sum(offset) / Size(offset);
}
const std::vector<T>& Raw() const { return laps_t_; }
private: private:
std::list<T> laps_t_; std::vector<T> laps_t_;
}; };
class Timer { class Timer {
...@@ -69,8 +93,10 @@ class Timer { ...@@ -69,8 +93,10 @@ class Timer {
const TimeList<float>& LapTimes() const { return laps_t_; } const TimeList<float>& LapTimes() const { return laps_t_; }
protected: protected:
std::chrono::time_point<std::chrono::system_clock> t_start_, t_stop_;
TimeList<float> laps_t_; TimeList<float> laps_t_;
private:
std::chrono::time_point<std::chrono::system_clock> t_start_, t_stop_;
}; };
template <TargetType Target> template <TargetType Target>
......
...@@ -147,7 +147,7 @@ void RuntimeProgram::Run() { ...@@ -147,7 +147,7 @@ void RuntimeProgram::Run() {
#endif // LITE_WITH_PROFILE #endif // LITE_WITH_PROFILE
} }
#ifdef LITE_WITH_PROFILE #ifdef LITE_WITH_PROFILE
LOG(INFO) << "\n" << profiler_.Summary(); LOG(INFO) << "\n" << profiler_.Summary(false, 0);
#endif // LITE_WITH_PROFILE #endif // LITE_WITH_PROFILE
} }
......
...@@ -141,6 +141,11 @@ class LITE_API RuntimeProgram { ...@@ -141,6 +141,11 @@ class LITE_API RuntimeProgram {
set_profiler(); set_profiler();
#endif #endif
} }
~RuntimeProgram() {
#ifdef LITE_WITH_PROFILE
LOG(INFO) << "\n" << profiler_.Summary();
#endif // LITE_WITH_PROFILE
}
void Run(); void Run();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册