From aa6623b8e3041b0f9b5ebd67ef1eebeba39b5f8b Mon Sep 17 00:00:00 2001 From: sangoly Date: Fri, 27 Sep 2019 12:44:15 +0800 Subject: [PATCH] [Profile] add kernel runtime profile && add op runtime summary test=develop (#2136) --- lite/core/CMakeLists.txt | 4 +- lite/core/kernel.h | 17 ++ lite/core/op_lite.h | 2 + lite/core/profile/CMakeLists.txt | 2 +- lite/core/profile/basic_profiler.cc | 35 ++++ lite/core/profile/basic_profiler.h | 237 +++++++++++++++++------ lite/core/profile/basic_profiler_test.cc | 23 ++- lite/core/program.cc | 6 +- lite/core/program.h | 7 + 9 files changed, 258 insertions(+), 75 deletions(-) diff --git a/lite/core/CMakeLists.txt b/lite/core/CMakeLists.txt index 0983b86d3c..c46411ecde 100644 --- a/lite/core/CMakeLists.txt +++ b/lite/core/CMakeLists.txt @@ -92,7 +92,9 @@ add_custom_target(kernel_list_h DEPENDS kernels.h) add_custom_target(all_kernel_faked_cc DEPENDS all_kernel_faked.cc) #----------------------------------------------- NOT CHANGE ----------------------------------------------- -lite_cc_library(kernel SRCS kernel.cc DEPS context type_system target_wrapper any op_params tensor +lite_cc_library(kernel SRCS kernel.cc + DEPS context type_system target_wrapper any op_params tensor + PROFILE_DEPS basic_profiler ) lite_cc_library(op SRCS op_lite.cc DEPS scope op_registry target_wrapper kernel cpp_op_desc tensor diff --git a/lite/core/kernel.h b/lite/core/kernel.h index b0a4fab312..113ce967d5 100644 --- a/lite/core/kernel.h +++ b/lite/core/kernel.h @@ -30,6 +30,10 @@ #include "lite/utils/all.h" #include "lite/utils/replace_stl/stream.h" +#ifdef LITE_WITH_PROFILE +#include "lite/core/profile/basic_profiler.h" +#endif // LITE_WITH_PROFILE + namespace paddle { namespace lite { @@ -53,6 +57,10 @@ class KernelBase { /// Run the kernel. Before Run, both the param_ and context_ should be valid. virtual void Run() = 0; +#ifdef LITE_WITH_PROFILE + void SetProfileID(uint32_t id) { profile_id_ = id; } +#endif + void Launch() { /// First run, init kernel, do weights transform once if (is_first_epoch_) { @@ -72,6 +80,11 @@ class KernelBase { #if defined(LITE_WITH_CUDA) WorkSpace::Global_CUDA().AllocReset(); #endif + +#ifdef LITE_WITH_PROFILE + CHECK_GE(profile_id_, 0) << "Must set profile id first"; + profile::ProfileBlock x(profile_id_, "kernel"); +#endif Run(); } @@ -157,6 +170,10 @@ class KernelBase { // is the unique ID for the kernel. std::string alias_{}; bool is_first_epoch_{true}; + +#ifdef LITE_WITH_PROFILE + int profile_id_{-1}; +#endif }; // Light-weight kernel implementation. diff --git a/lite/core/op_lite.h b/lite/core/op_lite.h index f843ef6f2b..abba184830 100644 --- a/lite/core/op_lite.h +++ b/lite/core/op_lite.h @@ -80,6 +80,8 @@ class OpLite : public Registry { // Human-readable information. virtual std::string DebugString() const = 0; + virtual std::string SerializedOpInfo() const { return "N/A"; } + const Place &kernel_place() const { return kernel_place_; } // Create all the kernels for the valid targets. diff --git a/lite/core/profile/CMakeLists.txt b/lite/core/profile/CMakeLists.txt index de8a60bdc2..54a2390244 100644 --- a/lite/core/profile/CMakeLists.txt +++ b/lite/core/profile/CMakeLists.txt @@ -2,7 +2,7 @@ if (NOT LITE_WITH_PROFILE) return() endif() -lite_cc_library(basic_profiler SRCS basic_profiler.cc) +lite_cc_library(basic_profiler SRCS basic_profiler.cc DEPS gflags) lite_cc_test(test_basic_profiler SRCS basic_profiler_test.cc DEPS basic_profiler) diff --git a/lite/core/profile/basic_profiler.cc b/lite/core/profile/basic_profiler.cc index 031b86beb6..5f2ffad787 100644 --- a/lite/core/profile/basic_profiler.cc +++ b/lite/core/profile/basic_profiler.cc @@ -14,6 +14,14 @@ #include "lite/core/profile/basic_profiler.h" +DEFINE_string(time_profile_file, + "time_profile.txt", + "Lite time profile information dump file"); + +DEFINE_string(time_profile_summary_file, + "time_profile_summary.txt", + "Lite time profile summary information dump file"); + namespace paddle { namespace lite { namespace profile { @@ -21,6 +29,33 @@ namespace profile { const int BasicTimer::data_w = 10; const int BasicTimer::name_w = 15; +template class BasicProfiler; + +template +BasicProfiler::~BasicProfiler() { + LOG(INFO) << "Basic Profile dumps:"; + auto b_repr = TimerT::basic_repr_header() + "\n" + basic_repr(); + LOG(INFO) << "\n" + b_repr; + + // Dump to file + std::ofstream basic_ostream(FLAGS_time_profile_file); + CHECK(basic_ostream.is_open()) << "Open " << FLAGS_time_profile_file + << " failed"; + basic_ostream.write(b_repr.c_str(), b_repr.size()); + basic_ostream.close(); + + LOG(INFO) << "Summary Profile dumps:"; + auto s_repr = summary_repr_header() + "\n" + summary_repr(); + LOG(INFO) << "\n" + s_repr; + + // Dump to file + std::ofstream summary_ostream(FLAGS_time_profile_summary_file); + CHECK(summary_ostream.is_open()) << "Open " << FLAGS_time_profile_summary_file + << " failed"; + summary_ostream.write(s_repr.c_str(), s_repr.size()); + summary_ostream.close(); +} + } // namespace profile } // namespace lite } // namespace paddle diff --git a/lite/core/profile/basic_profiler.h b/lite/core/profile/basic_profiler.h index f55a5764a0..3ae6e1f159 100644 --- a/lite/core/profile/basic_profiler.h +++ b/lite/core/profile/basic_profiler.h @@ -18,10 +18,13 @@ * of each kernel. */ #pragma once +#include #include #include #include // NOLINT +#include #include +#include #include #include #include @@ -33,35 +36,48 @@ namespace paddle { namespace lite { namespace profile { +struct TimerInfo { + uint64_t total_{}; + uint64_t count_{}; + uint32_t max_{std::numeric_limits::min()}; + uint32_t min_{std::numeric_limits::max()}; + uint64_t timer_{}; + + double ave() const { return total_ * 1. / count_; } + double max() const { return max_; } + double min() const { return min_; } + uint64_t total() const { return total_; } + uint64_t count() const { return count_; } +}; + /* Base class of all the profile records */ template class TimerBase { public: - void Start() { self()->Start(); } - void Stop() { self()->Stop(); } - void Log(uint32_t x) { return self()->Log(x); } + void Start(const std::string& key) { self()->Start(key); } + void Stop(const std::string& key) { self()->Stop(key); } + void Log(TimerInfo* timer_info, uint32_t x) { + return self()->Log(timer_info, x); + } std::string basic_repr() const { return const_self()->basic_repr(); } void SetId(int id) { self()->SetId(id); } - void SetKey(const std::string &key) { self()->SetKey(key); } + void SetKey(const std::string& key) { self()->SetKey(key); } int id() const { return const_self()->id(); } protected: - ChildT *self() { return reinterpret_cast(this); } - const ChildT *const_self() const { - return reinterpret_cast(this); + ChildT* self() { return reinterpret_cast(this); } + const ChildT* const_self() const { + return reinterpret_cast(this); } }; class BasicTimer : TimerBase { - uint64_t total_{}; - uint64_t count_{}; - uint32_t max_{std::numeric_limits::min()}; - uint32_t min_{std::numeric_limits::max()}; int id_{-1}; std::string key_; - uint64_t timer_{}; + std::map timer_infos_; + std::map custom_infos_; // TODO(Superjomn) make static static const int name_w; @@ -69,68 +85,109 @@ class BasicTimer : TimerBase { public: BasicTimer() = default; - BasicTimer(int id, const std::string &key) : id_(id), key_(key) {} + BasicTimer(int id, const std::string& key) : id_(id), key_(key) {} void SetId(int id) { id_ = id; } - void SetKey(const std::string &key) { key_ = key; } - void Start() { - timer_ = static_cast( + void SetKey(const std::string& key) { key_ = key; } + void Start(const std::string& timer_key) { + TimerInfo& timer_info = timer_infos_[timer_key]; + timer_info.timer_ = static_cast( std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch()) .count()); } - void Stop() { + void Stop(const std::string& timer_key) { + if (timer_infos_.find(timer_key) == timer_infos_.end()) { + LOG(FATAL) << "Error: Can't found timer key [" << timer_key << "] for " + << key_; + } + TimerInfo& timer_info = timer_infos_[timer_key]; auto duration = static_cast< uint64_t>( // timer unit: microsecond, 1second = 1e6 microsecond std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch()) .count() - - timer_); - Log(duration); + timer_info.timer_); + Log(&timer_info, duration); + } + + void SetCustomInfo(const std::string& key, const std::string& value) { + if (custom_infos_.find(key) != custom_infos_.end()) { + LOG(FATAL) << "Error: Custom Info for key [" << key + << "] can't be overwritten"; + } + custom_infos_[key] = value; + } + + std::string GetCustomInfo(const std::string& key) const { + auto iter = custom_infos_.find(key); + if (iter == custom_infos_.end()) { + LOG(FATAL) << "Error: Custom Info for key [" << key << "] can't be found"; + } + return iter->second; } - int count() const { return count_; } + const TimerInfo& GetTimerInfo(const std::string& key) const { + auto iter = timer_infos_.find(key); + if (iter == timer_infos_.end()) { + LOG(FATAL) << "Error: Timer Info for key [" << key << "] can't be found"; + } + return iter->second; + } - void Log(uint32_t timespan) { - total_ += timespan; - max_ = std::max(max_, timespan); - min_ = std::min(min_, timespan); - count_++; + void Log(TimerInfo* timer_info, uint32_t timespan) { + CHECK(timer_info); + timer_info->total_ += timespan; + timer_info->max_ = std::max(timer_info->max_, timespan); + timer_info->min_ = std::min(timer_info->min_, timespan); + timer_info->count_++; } static std::string basic_repr_header() { STL::stringstream ss; - ss << std::setw(name_w) << "kernel" // - << std::setw(data_w) << "average" // - << std::setw(data_w) << "min" // - << std::setw(data_w) << "max" // - << std::setw(data_w) << "count"; + // clang-format off + ss << "op" << "\t" + << "kernel" << "\t" + << "k_average" << "\t" + << "k_min" << "\t" + << "k_max" << "\t" + << "i_average" << "\t" + << "i_min" << "\t" + << "i_max" << "\t" + << "count" << "\t" + << "op_info"; + // clang-format on return ss.str(); } std::string basic_repr() const { + auto& kernel_timer_info = GetTimerInfo("kernel"); + auto& inst_timer_info = GetTimerInfo("instruction"); STL::stringstream ss; - ss << std::setw(name_w) << key() // - << std::setw(data_w) << ave() // - << std::setw(data_w) << min() // - << std::setw(data_w) << max() // - << std::setw(data_w) << count_; + // clang-format off + ss << GetCustomInfo("op_type") << "\t" + << key() << "\t" + << kernel_timer_info.ave() << "\t" + << kernel_timer_info.min() << "\t" + << kernel_timer_info.max() << "\t" + << inst_timer_info.ave() << "\t" + << inst_timer_info.min() << "\t" + << inst_timer_info.max() << "\t" + << inst_timer_info.count() << "\t" + << GetCustomInfo("op_info"); + // clang-format on return ss.str(); } - const std::string &key() const { return key_; } + const std::string& key() const { return key_; } int id() const { CHECK_GE(id_, 0) << "id is not inited"; return id_; } - double ave() const { return total_ * 1. / count_; } - double max() const { return max_; } - double min() const { return min_; } - // BasicRecord(const BasicRecord &) = delete; - void operator=(const BasicTimer &) = delete; + void operator=(const BasicTimer&) = delete; }; /* @@ -139,28 +196,28 @@ class BasicTimer : TimerBase { template class BasicProfiler { public: - explicit BasicProfiler(const std::string &name) : name_(name) {} + explicit BasicProfiler(const std::string& name) : name_(name) {} using record_t = TimerT; - static BasicProfiler &Global() { + static BasicProfiler& Global() { static std::unique_ptr x(new BasicProfiler("[global]")); return *x; } - record_t &NewRcd(const std::string &key) { + record_t& NewRcd(const std::string& key) { records_.emplace_back(); records_.back().SetId(records_.size() - 1); records_.back().SetKey(key); return records_.back(); } - const record_t &record(int id) { + const record_t& record(int id) { CHECK_LT(id, records_.size()); CHECK_GE(id, 0); return records_[id]; } - record_t *mutable_record(int id) { + record_t* mutable_record(int id) { CHECK_GE(id, 0); CHECK_LT(static_cast(id), records_.size()); return &records_[id]; @@ -168,42 +225,102 @@ class BasicProfiler { std::string basic_repr() const { STL::stringstream ss; - for (const auto &rcd : records_) { + for (const auto& rcd : records_) { ss << rcd.basic_repr() << "\n"; } return ss.str(); } - ~BasicProfiler() { - LOG(INFO) << "Profile dumps:"; - LOG(INFO) << "\n" + BasicTimer::basic_repr_header() + "\n" + basic_repr(); + std::string summary_repr_header() const { + STL::stringstream ss; + // clang-format off + ss << "op" << "\t" + << "average" << "\t" + << "min" << "\t" + << "max" << "\t" + << "op_time" << "\t" + << "total_time" << "\t" + << "precent" << "\t" + << "count"; + // clang-format on + return ss.str(); } + std::string summary_repr() const { + std::map op_summary; + uint64_t total{0}; + + for (const auto& rcd : records_) { + // We use kernel run time here + auto kernel_timer = rcd.GetTimerInfo("kernel"); + auto op_type = rcd.GetCustomInfo("op_type"); + auto& op_timer = op_summary[op_type]; + + total += kernel_timer.total_; + op_timer.total_ += kernel_timer.total_; + op_timer.max_ = std::max(kernel_timer.max_, op_timer.max_); + op_timer.min_ = std::min(kernel_timer.min_, op_timer.min_); + op_timer.count_ += kernel_timer.count_; + } + + STL::stringstream ss; + for (auto& iter : op_summary) { + auto& op_timer = iter.second; + // clang-format off + ss << iter.first << "\t" + << op_timer.ave() << "\t" + << op_timer.min() << "\t" + << op_timer.max() << "\t" + << op_timer.total() << "\t" + << total << "\t" + << (op_timer.total() * 1. / total * 100) << "%\t" + << op_timer.count() << "\t" + << "\n"; + // clang-format on + } + return ss.str(); + } + + ~BasicProfiler(); + private: std::string name_; std::vector records_; }; struct ProfileBlock { - explicit ProfileBlock(int id) : id_(id) { - BasicProfiler::Global().mutable_record(id_)->Start(); + explicit ProfileBlock(int id, const std::string& key) : id_(id), key_(key) { + BasicProfiler::Global().mutable_record(id_)->Start(key_); + } + + void Record() { + if (has_recorded_) { + LOG(FATAL) << "You can only call Record() once"; + } + BasicProfiler::Global().mutable_record(id_)->Stop(key_); + has_recorded_ = true; } ~ProfileBlock() { - BasicProfiler::Global().mutable_record(id_)->Stop(); + if (!has_recorded_) { + BasicProfiler::Global().mutable_record(id_)->Stop(key_); + } } private: int id_{}; + bool has_recorded_{false}; + std::string key_{}; }; -#define LITE_PROFILE_ONE(key__) \ - static int key__##__profiler_id = \ - ::paddle::lite::profile::BasicProfiler< \ - ::paddle::lite::profile::BasicTimer>::Global() \ - .NewRcd(#key__) \ - .id(); \ - ::paddle::lite::profile::ProfileBlock key__##profiler__(key__##__profiler_id); +#define LITE_PROFILE_ONE(key__) \ + static int key__##__profiler_id = \ + ::paddle::lite::profile::BasicProfiler< \ + ::paddle::lite::profile::BasicTimer>::Global() \ + .NewRcd(#key__) \ + .id(); \ + ::paddle::lite::profile::ProfileBlock key__##profiler__( \ + key__##__profiler_id, #key__); } // namespace profile } // namespace lite diff --git a/lite/core/profile/basic_profiler_test.cc b/lite/core/profile/basic_profiler_test.cc index 928fdd61cb..e61d3383d0 100644 --- a/lite/core/profile/basic_profiler_test.cc +++ b/lite/core/profile/basic_profiler_test.cc @@ -27,18 +27,21 @@ TEST(basic_record, init) { timer.SetKey("hello"); } -TEST(basic_profile, init) { - auto& rcd = BasicProfiler::Global().NewRcd("fc"); - for (int i = 11; i < 100; i++) { - rcd.Log(i); - } +TEST(basic_profile, real_latency) { + auto profile_id = profile::BasicProfiler::Global() + .NewRcd("test0") + .id(); + auto& profiler = + *BasicProfiler::Global().mutable_record(profile_id); + // Set op info + profiler.SetCustomInfo("op_type", "fc"); + profiler.SetCustomInfo("op_info", "size:5x6"); - LOG(INFO) << BasicProfiler::Global().basic_repr(); -} + profile::ProfileBlock x(profile_id, "instruction"); + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); -TEST(basic_profile, real_latency) { - LITE_PROFILE_ONE(test0); - std::this_thread::sleep_for(std::chrono::milliseconds(1200)); + profile::ProfileBlock y(profile_id, "kernel"); + std::this_thread::sleep_for(std::chrono::milliseconds(500)); } } // namespace profile diff --git a/lite/core/program.cc b/lite/core/program.cc index 2a1f748cb5..535272a69d 100644 --- a/lite/core/program.cc +++ b/lite/core/program.cc @@ -182,11 +182,11 @@ void Program::PrepareWorkspace(const cpp::ProgramDesc& prog) { } void Instruction::Run() { -#ifdef LITE_WITH_PROFILE - profile::ProfileBlock x(profile_id_); -#endif // LITE_WITH_PROFILE CHECK(op_) << "op null"; CHECK(kernel_) << "kernel null"; +#ifdef LITE_WITH_PROFILE + profile::ProfileBlock x(profile_id_, "instruction"); +#endif // LITE_WITH_PROFILE if (first_epoch_) { first_epoch_ = false; CHECK(op_->CheckShape()); diff --git a/lite/core/program.h b/lite/core/program.h index 1b3c036db5..f0f5304c08 100644 --- a/lite/core/program.h +++ b/lite/core/program.h @@ -92,6 +92,13 @@ struct Instruction { profile_id_ = profile::BasicProfiler::Global() .NewRcd(kernel_->SerializedKernelType()) .id(); + kernel_->SetProfileID(profile_id_); + // Set profile custom info + auto& profiler = + *profile::BasicProfiler::Global().mutable_record( + profile_id_); + profiler.SetCustomInfo("op_type", op_->Type()); + profiler.SetCustomInfo("op_info", op_->SerializedOpInfo()); #endif // LITE_WITH_PROFILE } -- GitLab