未验证 提交 aa6623b8 编写于 作者: S sangoly 提交者: GitHub

[Profile] add kernel runtime profile && add op runtime summary test=develop (#2136)

上级 a96d013c
...@@ -92,7 +92,9 @@ add_custom_target(kernel_list_h DEPENDS kernels.h) ...@@ -92,7 +92,9 @@ add_custom_target(kernel_list_h DEPENDS kernels.h)
add_custom_target(all_kernel_faked_cc DEPENDS all_kernel_faked.cc) add_custom_target(all_kernel_faked_cc DEPENDS all_kernel_faked.cc)
#----------------------------------------------- NOT CHANGE ----------------------------------------------- #----------------------------------------------- NOT CHANGE -----------------------------------------------
lite_cc_library(kernel SRCS kernel.cc DEPS context type_system target_wrapper any op_params tensor lite_cc_library(kernel SRCS kernel.cc
DEPS context type_system target_wrapper any op_params tensor
PROFILE_DEPS basic_profiler
) )
lite_cc_library(op SRCS op_lite.cc DEPS scope op_registry target_wrapper kernel lite_cc_library(op SRCS op_lite.cc DEPS scope op_registry target_wrapper kernel
cpp_op_desc tensor cpp_op_desc tensor
......
...@@ -30,6 +30,10 @@ ...@@ -30,6 +30,10 @@
#include "lite/utils/all.h" #include "lite/utils/all.h"
#include "lite/utils/replace_stl/stream.h" #include "lite/utils/replace_stl/stream.h"
#ifdef LITE_WITH_PROFILE
#include "lite/core/profile/basic_profiler.h"
#endif // LITE_WITH_PROFILE
namespace paddle { namespace paddle {
namespace lite { namespace lite {
...@@ -53,6 +57,10 @@ class KernelBase { ...@@ -53,6 +57,10 @@ class KernelBase {
/// Run the kernel. Before Run, both the param_ and context_ should be valid. /// Run the kernel. Before Run, both the param_ and context_ should be valid.
virtual void Run() = 0; virtual void Run() = 0;
#ifdef LITE_WITH_PROFILE
void SetProfileID(uint32_t id) { profile_id_ = id; }
#endif
void Launch() { void Launch() {
/// First run, init kernel, do weights transform once /// First run, init kernel, do weights transform once
if (is_first_epoch_) { if (is_first_epoch_) {
...@@ -72,6 +80,11 @@ class KernelBase { ...@@ -72,6 +80,11 @@ class KernelBase {
#if defined(LITE_WITH_CUDA) #if defined(LITE_WITH_CUDA)
WorkSpace::Global_CUDA().AllocReset(); WorkSpace::Global_CUDA().AllocReset();
#endif #endif
#ifdef LITE_WITH_PROFILE
CHECK_GE(profile_id_, 0) << "Must set profile id first";
profile::ProfileBlock x(profile_id_, "kernel");
#endif
Run(); Run();
} }
...@@ -157,6 +170,10 @@ class KernelBase { ...@@ -157,6 +170,10 @@ class KernelBase {
// is the unique ID for the kernel. // is the unique ID for the kernel.
std::string alias_{}; std::string alias_{};
bool is_first_epoch_{true}; bool is_first_epoch_{true};
#ifdef LITE_WITH_PROFILE
int profile_id_{-1};
#endif
}; };
// Light-weight kernel implementation. // Light-weight kernel implementation.
......
...@@ -80,6 +80,8 @@ class OpLite : public Registry { ...@@ -80,6 +80,8 @@ class OpLite : public Registry {
// Human-readable information. // Human-readable information.
virtual std::string DebugString() const = 0; virtual std::string DebugString() const = 0;
virtual std::string SerializedOpInfo() const { return "N/A"; }
const Place &kernel_place() const { return kernel_place_; } const Place &kernel_place() const { return kernel_place_; }
// Create all the kernels for the valid targets. // Create all the kernels for the valid targets.
......
...@@ -2,7 +2,7 @@ if (NOT LITE_WITH_PROFILE) ...@@ -2,7 +2,7 @@ if (NOT LITE_WITH_PROFILE)
return() return()
endif() endif()
lite_cc_library(basic_profiler SRCS basic_profiler.cc) lite_cc_library(basic_profiler SRCS basic_profiler.cc DEPS gflags)
lite_cc_test(test_basic_profiler SRCS basic_profiler_test.cc DEPS basic_profiler) lite_cc_test(test_basic_profiler SRCS basic_profiler_test.cc DEPS basic_profiler)
...@@ -14,6 +14,14 @@ ...@@ -14,6 +14,14 @@
#include "lite/core/profile/basic_profiler.h" #include "lite/core/profile/basic_profiler.h"
DEFINE_string(time_profile_file,
"time_profile.txt",
"Lite time profile information dump file");
DEFINE_string(time_profile_summary_file,
"time_profile_summary.txt",
"Lite time profile summary information dump file");
namespace paddle { namespace paddle {
namespace lite { namespace lite {
namespace profile { namespace profile {
...@@ -21,6 +29,33 @@ namespace profile { ...@@ -21,6 +29,33 @@ namespace profile {
const int BasicTimer::data_w = 10; const int BasicTimer::data_w = 10;
const int BasicTimer::name_w = 15; const int BasicTimer::name_w = 15;
template class BasicProfiler<BasicTimer>;
template <typename TimerT>
BasicProfiler<TimerT>::~BasicProfiler() {
LOG(INFO) << "Basic Profile dumps:";
auto b_repr = TimerT::basic_repr_header() + "\n" + basic_repr();
LOG(INFO) << "\n" + b_repr;
// Dump to file
std::ofstream basic_ostream(FLAGS_time_profile_file);
CHECK(basic_ostream.is_open()) << "Open " << FLAGS_time_profile_file
<< " failed";
basic_ostream.write(b_repr.c_str(), b_repr.size());
basic_ostream.close();
LOG(INFO) << "Summary Profile dumps:";
auto s_repr = summary_repr_header() + "\n" + summary_repr();
LOG(INFO) << "\n" + s_repr;
// Dump to file
std::ofstream summary_ostream(FLAGS_time_profile_summary_file);
CHECK(summary_ostream.is_open()) << "Open " << FLAGS_time_profile_summary_file
<< " failed";
summary_ostream.write(s_repr.c_str(), s_repr.size());
summary_ostream.close();
}
} // namespace profile } // namespace profile
} // namespace lite } // namespace lite
} // namespace paddle } // namespace paddle
...@@ -18,10 +18,13 @@ ...@@ -18,10 +18,13 @@
* of each kernel. * of each kernel.
*/ */
#pragma once #pragma once
#include <gflags/gflags.h>
#include <time.h> #include <time.h>
#include <algorithm> #include <algorithm>
#include <chrono> // NOLINT #include <chrono> // NOLINT
#include <fstream>
#include <limits> #include <limits>
#include <map>
#include <memory> #include <memory>
#include <string> #include <string>
#include <vector> #include <vector>
...@@ -33,35 +36,48 @@ namespace paddle { ...@@ -33,35 +36,48 @@ namespace paddle {
namespace lite { namespace lite {
namespace profile { namespace profile {
struct TimerInfo {
uint64_t total_{};
uint64_t count_{};
uint32_t max_{std::numeric_limits<uint32_t>::min()};
uint32_t min_{std::numeric_limits<uint32_t>::max()};
uint64_t timer_{};
double ave() const { return total_ * 1. / count_; }
double max() const { return max_; }
double min() const { return min_; }
uint64_t total() const { return total_; }
uint64_t count() const { return count_; }
};
/* Base class of all the profile records */ /* Base class of all the profile records */
template <typename ChildT> template <typename ChildT>
class TimerBase { class TimerBase {
public: public:
void Start() { self()->Start(); } void Start(const std::string& key) { self()->Start(key); }
void Stop() { self()->Stop(); } void Stop(const std::string& key) { self()->Stop(key); }
void Log(uint32_t x) { return self()->Log(x); } void Log(TimerInfo* timer_info, uint32_t x) {
return self()->Log(timer_info, x);
}
std::string basic_repr() const { return const_self()->basic_repr(); } std::string basic_repr() const { return const_self()->basic_repr(); }
void SetId(int id) { self()->SetId(id); } void SetId(int id) { self()->SetId(id); }
void SetKey(const std::string &key) { self()->SetKey(key); } void SetKey(const std::string& key) { self()->SetKey(key); }
int id() const { return const_self()->id(); } int id() const { return const_self()->id(); }
protected: protected:
ChildT *self() { return reinterpret_cast<ChildT *>(this); } ChildT* self() { return reinterpret_cast<ChildT*>(this); }
const ChildT *const_self() const { const ChildT* const_self() const {
return reinterpret_cast<const ChildT *>(this); return reinterpret_cast<const ChildT*>(this);
} }
}; };
class BasicTimer : TimerBase<BasicTimer> { class BasicTimer : TimerBase<BasicTimer> {
uint64_t total_{};
uint64_t count_{};
uint32_t max_{std::numeric_limits<uint32_t>::min()};
uint32_t min_{std::numeric_limits<uint32_t>::max()};
int id_{-1}; int id_{-1};
std::string key_; std::string key_;
uint64_t timer_{}; std::map<std::string, TimerInfo> timer_infos_;
std::map<std::string, std::string> custom_infos_;
// TODO(Superjomn) make static // TODO(Superjomn) make static
static const int name_w; static const int name_w;
...@@ -69,68 +85,109 @@ class BasicTimer : TimerBase<BasicTimer> { ...@@ -69,68 +85,109 @@ class BasicTimer : TimerBase<BasicTimer> {
public: public:
BasicTimer() = default; BasicTimer() = default;
BasicTimer(int id, const std::string &key) : id_(id), key_(key) {} BasicTimer(int id, const std::string& key) : id_(id), key_(key) {}
void SetId(int id) { id_ = id; } void SetId(int id) { id_ = id; }
void SetKey(const std::string &key) { key_ = key; } void SetKey(const std::string& key) { key_ = key; }
void Start() { void Start(const std::string& timer_key) {
timer_ = static_cast<uint64_t>( TimerInfo& timer_info = timer_infos_[timer_key];
timer_info.timer_ = static_cast<uint64_t>(
std::chrono::duration_cast<std::chrono::microseconds>( std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::system_clock::now().time_since_epoch()) std::chrono::system_clock::now().time_since_epoch())
.count()); .count());
} }
void Stop() { void Stop(const std::string& timer_key) {
if (timer_infos_.find(timer_key) == timer_infos_.end()) {
LOG(FATAL) << "Error: Can't found timer key [" << timer_key << "] for "
<< key_;
}
TimerInfo& timer_info = timer_infos_[timer_key];
auto duration = static_cast< auto duration = static_cast<
uint64_t>( // timer unit: microsecond, 1second = 1e6 microsecond uint64_t>( // timer unit: microsecond, 1second = 1e6 microsecond
std::chrono::duration_cast<std::chrono::microseconds>( std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::system_clock::now().time_since_epoch()) std::chrono::system_clock::now().time_since_epoch())
.count() - .count() -
timer_); timer_info.timer_);
Log(duration); Log(&timer_info, duration);
}
void SetCustomInfo(const std::string& key, const std::string& value) {
if (custom_infos_.find(key) != custom_infos_.end()) {
LOG(FATAL) << "Error: Custom Info for key [" << key
<< "] can't be overwritten";
}
custom_infos_[key] = value;
}
std::string GetCustomInfo(const std::string& key) const {
auto iter = custom_infos_.find(key);
if (iter == custom_infos_.end()) {
LOG(FATAL) << "Error: Custom Info for key [" << key << "] can't be found";
}
return iter->second;
} }
int count() const { return count_; } const TimerInfo& GetTimerInfo(const std::string& key) const {
auto iter = timer_infos_.find(key);
if (iter == timer_infos_.end()) {
LOG(FATAL) << "Error: Timer Info for key [" << key << "] can't be found";
}
return iter->second;
}
void Log(uint32_t timespan) { void Log(TimerInfo* timer_info, uint32_t timespan) {
total_ += timespan; CHECK(timer_info);
max_ = std::max(max_, timespan); timer_info->total_ += timespan;
min_ = std::min(min_, timespan); timer_info->max_ = std::max(timer_info->max_, timespan);
count_++; timer_info->min_ = std::min(timer_info->min_, timespan);
timer_info->count_++;
} }
static std::string basic_repr_header() { static std::string basic_repr_header() {
STL::stringstream ss; STL::stringstream ss;
ss << std::setw(name_w) << "kernel" // // clang-format off
<< std::setw(data_w) << "average" // ss << "op" << "\t"
<< std::setw(data_w) << "min" // << "kernel" << "\t"
<< std::setw(data_w) << "max" // << "k_average" << "\t"
<< std::setw(data_w) << "count"; << "k_min" << "\t"
<< "k_max" << "\t"
<< "i_average" << "\t"
<< "i_min" << "\t"
<< "i_max" << "\t"
<< "count" << "\t"
<< "op_info";
// clang-format on
return ss.str(); return ss.str();
} }
std::string basic_repr() const { std::string basic_repr() const {
auto& kernel_timer_info = GetTimerInfo("kernel");
auto& inst_timer_info = GetTimerInfo("instruction");
STL::stringstream ss; STL::stringstream ss;
ss << std::setw(name_w) << key() // // clang-format off
<< std::setw(data_w) << ave() // ss << GetCustomInfo("op_type") << "\t"
<< std::setw(data_w) << min() // << key() << "\t"
<< std::setw(data_w) << max() // << kernel_timer_info.ave() << "\t"
<< std::setw(data_w) << count_; << kernel_timer_info.min() << "\t"
<< kernel_timer_info.max() << "\t"
<< inst_timer_info.ave() << "\t"
<< inst_timer_info.min() << "\t"
<< inst_timer_info.max() << "\t"
<< inst_timer_info.count() << "\t"
<< GetCustomInfo("op_info");
// clang-format on
return ss.str(); return ss.str();
} }
const std::string &key() const { return key_; } const std::string& key() const { return key_; }
int id() const { int id() const {
CHECK_GE(id_, 0) << "id is not inited"; CHECK_GE(id_, 0) << "id is not inited";
return id_; return id_;
} }
double ave() const { return total_ * 1. / count_; }
double max() const { return max_; }
double min() const { return min_; }
// BasicRecord(const BasicRecord &) = delete; // BasicRecord(const BasicRecord &) = delete;
void operator=(const BasicTimer &) = delete; void operator=(const BasicTimer&) = delete;
}; };
/* /*
...@@ -139,28 +196,28 @@ class BasicTimer : TimerBase<BasicTimer> { ...@@ -139,28 +196,28 @@ class BasicTimer : TimerBase<BasicTimer> {
template <typename TimerT> template <typename TimerT>
class BasicProfiler { class BasicProfiler {
public: public:
explicit BasicProfiler(const std::string &name) : name_(name) {} explicit BasicProfiler(const std::string& name) : name_(name) {}
using record_t = TimerT; using record_t = TimerT;
static BasicProfiler &Global() { static BasicProfiler& Global() {
static std::unique_ptr<BasicProfiler> x(new BasicProfiler("[global]")); static std::unique_ptr<BasicProfiler> x(new BasicProfiler("[global]"));
return *x; return *x;
} }
record_t &NewRcd(const std::string &key) { record_t& NewRcd(const std::string& key) {
records_.emplace_back(); records_.emplace_back();
records_.back().SetId(records_.size() - 1); records_.back().SetId(records_.size() - 1);
records_.back().SetKey(key); records_.back().SetKey(key);
return records_.back(); return records_.back();
} }
const record_t &record(int id) { const record_t& record(int id) {
CHECK_LT(id, records_.size()); CHECK_LT(id, records_.size());
CHECK_GE(id, 0); CHECK_GE(id, 0);
return records_[id]; return records_[id];
} }
record_t *mutable_record(int id) { record_t* mutable_record(int id) {
CHECK_GE(id, 0); CHECK_GE(id, 0);
CHECK_LT(static_cast<size_t>(id), records_.size()); CHECK_LT(static_cast<size_t>(id), records_.size());
return &records_[id]; return &records_[id];
...@@ -168,33 +225,92 @@ class BasicProfiler { ...@@ -168,33 +225,92 @@ class BasicProfiler {
std::string basic_repr() const { std::string basic_repr() const {
STL::stringstream ss; STL::stringstream ss;
for (const auto &rcd : records_) { for (const auto& rcd : records_) {
ss << rcd.basic_repr() << "\n"; ss << rcd.basic_repr() << "\n";
} }
return ss.str(); return ss.str();
} }
~BasicProfiler() { std::string summary_repr_header() const {
LOG(INFO) << "Profile dumps:"; STL::stringstream ss;
LOG(INFO) << "\n" + BasicTimer::basic_repr_header() + "\n" + basic_repr(); // clang-format off
ss << "op" << "\t"
<< "average" << "\t"
<< "min" << "\t"
<< "max" << "\t"
<< "op_time" << "\t"
<< "total_time" << "\t"
<< "precent" << "\t"
<< "count";
// clang-format on
return ss.str();
} }
std::string summary_repr() const {
std::map<std::string, TimerInfo> op_summary;
uint64_t total{0};
for (const auto& rcd : records_) {
// We use kernel run time here
auto kernel_timer = rcd.GetTimerInfo("kernel");
auto op_type = rcd.GetCustomInfo("op_type");
auto& op_timer = op_summary[op_type];
total += kernel_timer.total_;
op_timer.total_ += kernel_timer.total_;
op_timer.max_ = std::max(kernel_timer.max_, op_timer.max_);
op_timer.min_ = std::min(kernel_timer.min_, op_timer.min_);
op_timer.count_ += kernel_timer.count_;
}
STL::stringstream ss;
for (auto& iter : op_summary) {
auto& op_timer = iter.second;
// clang-format off
ss << iter.first << "\t"
<< op_timer.ave() << "\t"
<< op_timer.min() << "\t"
<< op_timer.max() << "\t"
<< op_timer.total() << "\t"
<< total << "\t"
<< (op_timer.total() * 1. / total * 100) << "%\t"
<< op_timer.count() << "\t"
<< "\n";
// clang-format on
}
return ss.str();
}
~BasicProfiler();
private: private:
std::string name_; std::string name_;
std::vector<record_t> records_; std::vector<record_t> records_;
}; };
struct ProfileBlock { struct ProfileBlock {
explicit ProfileBlock(int id) : id_(id) { explicit ProfileBlock(int id, const std::string& key) : id_(id), key_(key) {
BasicProfiler<BasicTimer>::Global().mutable_record(id_)->Start(); BasicProfiler<BasicTimer>::Global().mutable_record(id_)->Start(key_);
}
void Record() {
if (has_recorded_) {
LOG(FATAL) << "You can only call Record() once";
}
BasicProfiler<BasicTimer>::Global().mutable_record(id_)->Stop(key_);
has_recorded_ = true;
} }
~ProfileBlock() { ~ProfileBlock() {
BasicProfiler<BasicTimer>::Global().mutable_record(id_)->Stop(); if (!has_recorded_) {
BasicProfiler<BasicTimer>::Global().mutable_record(id_)->Stop(key_);
}
} }
private: private:
int id_{}; int id_{};
bool has_recorded_{false};
std::string key_{};
}; };
#define LITE_PROFILE_ONE(key__) \ #define LITE_PROFILE_ONE(key__) \
...@@ -203,7 +319,8 @@ struct ProfileBlock { ...@@ -203,7 +319,8 @@ struct ProfileBlock {
::paddle::lite::profile::BasicTimer>::Global() \ ::paddle::lite::profile::BasicTimer>::Global() \
.NewRcd(#key__) \ .NewRcd(#key__) \
.id(); \ .id(); \
::paddle::lite::profile::ProfileBlock key__##profiler__(key__##__profiler_id); ::paddle::lite::profile::ProfileBlock key__##profiler__( \
key__##__profiler_id, #key__);
} // namespace profile } // namespace profile
} // namespace lite } // namespace lite
......
...@@ -27,18 +27,21 @@ TEST(basic_record, init) { ...@@ -27,18 +27,21 @@ TEST(basic_record, init) {
timer.SetKey("hello"); timer.SetKey("hello");
} }
TEST(basic_profile, init) { TEST(basic_profile, real_latency) {
auto& rcd = BasicProfiler<BasicTimer>::Global().NewRcd("fc"); auto profile_id = profile::BasicProfiler<profile::BasicTimer>::Global()
for (int i = 11; i < 100; i++) { .NewRcd("test0")
rcd.Log(i); .id();
} auto& profiler =
*BasicProfiler<profile::BasicTimer>::Global().mutable_record(profile_id);
// Set op info
profiler.SetCustomInfo("op_type", "fc");
profiler.SetCustomInfo("op_info", "size:5x6");
LOG(INFO) << BasicProfiler<BasicTimer>::Global().basic_repr(); profile::ProfileBlock x(profile_id, "instruction");
} std::this_thread::sleep_for(std::chrono::milliseconds(1000));
TEST(basic_profile, real_latency) { profile::ProfileBlock y(profile_id, "kernel");
LITE_PROFILE_ONE(test0); std::this_thread::sleep_for(std::chrono::milliseconds(500));
std::this_thread::sleep_for(std::chrono::milliseconds(1200));
} }
} // namespace profile } // namespace profile
......
...@@ -182,11 +182,11 @@ void Program::PrepareWorkspace(const cpp::ProgramDesc& prog) { ...@@ -182,11 +182,11 @@ void Program::PrepareWorkspace(const cpp::ProgramDesc& prog) {
} }
void Instruction::Run() { void Instruction::Run() {
#ifdef LITE_WITH_PROFILE
profile::ProfileBlock x(profile_id_);
#endif // LITE_WITH_PROFILE
CHECK(op_) << "op null"; CHECK(op_) << "op null";
CHECK(kernel_) << "kernel null"; CHECK(kernel_) << "kernel null";
#ifdef LITE_WITH_PROFILE
profile::ProfileBlock x(profile_id_, "instruction");
#endif // LITE_WITH_PROFILE
if (first_epoch_) { if (first_epoch_) {
first_epoch_ = false; first_epoch_ = false;
CHECK(op_->CheckShape()); CHECK(op_->CheckShape());
......
...@@ -92,6 +92,13 @@ struct Instruction { ...@@ -92,6 +92,13 @@ struct Instruction {
profile_id_ = profile::BasicProfiler<profile::BasicTimer>::Global() profile_id_ = profile::BasicProfiler<profile::BasicTimer>::Global()
.NewRcd(kernel_->SerializedKernelType()) .NewRcd(kernel_->SerializedKernelType())
.id(); .id();
kernel_->SetProfileID(profile_id_);
// Set profile custom info
auto& profiler =
*profile::BasicProfiler<profile::BasicTimer>::Global().mutable_record(
profile_id_);
profiler.SetCustomInfo("op_type", op_->Type());
profiler.SetCustomInfo("op_info", op_->SerializedOpInfo());
#endif // LITE_WITH_PROFILE #endif // LITE_WITH_PROFILE
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册