diff --git a/lite/core/kernel.h b/lite/core/kernel.h index 86193235a2984b15a33c2eeaff15865d9f126eeb..18a1243c11652afc181f13f0f5a497858a30885f 100644 --- a/lite/core/kernel.h +++ b/lite/core/kernel.h @@ -83,14 +83,11 @@ class KernelBase { #if defined(LITE_WITH_CUDA) WorkSpace::Global_CUDA().AllocReset(); #endif - #ifdef LITE_WITH_PROFILE - CHECK(profiler_) << "Profiler pointer of kernel can not be nullptr. " - "When LITE_WITH_PROFILE is defined, please set a " - "Profiler for Instruction."; - profiler_->StartTiming(profile_id_, ctx_.get()); + profiler_->StopTiming(profile::Type::kCreate, profile_id_, ctx_.get()); + profiler_->StartTiming(profile::Type::kDispatch, profile_id_, ctx_.get()); Run(); - profiler_->StopTiming(profile_id_, ctx_.get()); + profiler_->StopTiming(profile::Type::kDispatch, profile_id_, ctx_.get()); #else Run(); #endif diff --git a/lite/core/profile/profiler.cc b/lite/core/profile/profiler.cc index 78317f78ac6bf7024c1984c2127434d55b738ad6..f4d0e3c0afbe1f9df4e381a502e1800a3d58ba68 100644 --- a/lite/core/profile/profiler.cc +++ b/lite/core/profile/profiler.cc @@ -28,36 +28,55 @@ auto op_comp = [](const OpCharacter& c1, const OpCharacter& c2) { }; } -int Profiler::NewTimer(const OpCharacter& ch) { - StatisUnit unit; - unit.character = ch; +std::map TypeStr{ + {Type::kUnk, "Unknown"}, + {Type::kCreate, "Create"}, + {Type::kDispatch, "Dispatch"}, +}; + +StatisUnit::StatisUnit(const OpCharacter& ch) : character(ch) { + create_t.reset(new DeviceTimer()); if (ch.target == TargetType::kCUDA) { #ifdef LITE_WITH_CUDA - unit.timer.reset(new DeviceTimer()); + dispatch_t.reset(new DeviceTimer()); #else LOG(ERROR) << "The timer type specified as cuda is uninitialized, so the " "default x86 timer is used instead."; #endif } else { - unit.timer.reset(new DeviceTimer()); + dispatch_t.reset(new DeviceTimer()); } +} + +lite::profile::Timer* StatisUnit::Timer(Type type) { + if (type == Type::kCreate) { + return create_t.get(); + } else if (type == Type::kDispatch) { + return dispatch_t.get(); + } + LOG(FATAL) << "Timer cannot be returned for unknown platforms."; + return nullptr; +} + +int Profiler::NewTimer(const OpCharacter& ch) { + StatisUnit unit(ch); units_.push_back(std::move(unit)); return units_.size() - 1; } -void Profiler::StartTiming(const int index, KernelContext* ctx) { +void Profiler::StartTiming(Type type, const int index, KernelContext* ctx) { CHECK_LT(index, units_.size()) << "The timer index in the profiler is out of range."; - units_[index].timer->Start(ctx); + units_[index].Timer(type)->Start(ctx); } -float Profiler::StopTiming(const int index, KernelContext* ctx) { +float Profiler::StopTiming(Type type, const int index, KernelContext* ctx) { CHECK_LT(index, units_.size()) << "The timer index in the profiler is out of range."; - return units_[index].timer->Stop(ctx); + return units_[index].Timer(type)->Stop(ctx); } -std::string Profiler::Summary(bool concise, size_t w) { +std::string Profiler::Summary(Type type, bool concise, size_t w) { using std::setw; using std::left; using std::fixed; @@ -65,12 +84,14 @@ std::string Profiler::Summary(bool concise, size_t w) { std::string title; // Title. if (concise) { - ss << "Timing cycle = " << units_.front().timer->LapTimes().Size() + ss << "Timing cycle = " << units_.front().Timer(type)->LapTimes().Size() << std::endl; - ss << "===== Concise Profiler Summary: " << name_ << ", Exclude " << w + ss << "===== Concise " << TypeStr.find(type)->second + << " Profiler Summary: " << name_ << ", Exclude " << w << " warm-ups =====" << std::endl; } else { - ss << "===== Detailed Profiler Summary: " << name_ << ", Exclude " << w + ss << "===== Detailed " << TypeStr.find(type)->second + << " Profiler Summary: " << name_ << ", Exclude " << w << " warm-ups =====" << std::endl; } ss << setw(25) << left << "Operator Type" @@ -84,16 +105,16 @@ std::string Profiler::Summary(bool concise, size_t w) { if (concise) { std::map summary(op_comp); for (auto& unit : units_) { - auto ch = summary.find(unit.character); + auto ch = summary.find(unit.Character()); if (ch != summary.end()) { - ch->second.avg += unit.timer->LapTimes().Avg(w); - ch->second.min += unit.timer->LapTimes().Min(w); - ch->second.max += unit.timer->LapTimes().Max(w); + ch->second.avg += unit.Timer(type)->LapTimes().Avg(w); + ch->second.min += unit.Timer(type)->LapTimes().Min(w); + ch->second.max += unit.Timer(type)->LapTimes().Max(w); } else { - TimeInfo info({unit.timer->LapTimes().Avg(w), - unit.timer->LapTimes().Min(w), - unit.timer->LapTimes().Max(w)}); - summary.insert({unit.character, info}); + TimeInfo info({unit.Timer(type)->LapTimes().Avg(w), + unit.Timer(type)->LapTimes().Min(w), + unit.Timer(type)->LapTimes().Max(w)}); + summary.insert({unit.Character(), info}); } } for (const auto& item : summary) { @@ -109,14 +130,15 @@ std::string Profiler::Summary(bool concise, size_t w) { } } else { for (auto& unit : units_) { + const auto& times = unit.Timer(type)->LapTimes(); // clang-format off - ss << setw(25) << left << fixed << unit.character.op_type \ - << " " << setw(40) << left << fixed << unit.character.kernel_name \ - << " " << setw(12) << left << fixed << unit.character.remark \ - << " " << setw(12) << left << fixed << unit.timer->LapTimes().Avg(w) \ - << " " << setw(12) << left << fixed << unit.timer->LapTimes().Min(w) \ - << " " << setw(12) << left << fixed << unit.timer->LapTimes().Max(w) \ - << " " << setw(12) << left << fixed << unit.timer->LapTimes().Last(w) \ + ss << setw(25) << left << fixed << unit.Character().op_type \ + << " " << setw(40) << left << fixed << unit.Character().kernel_name \ + << " " << setw(12) << left << fixed << unit.Character().remark \ + << " " << setw(12) << left << fixed << times.Avg(w) \ + << " " << setw(12) << left << fixed << times.Min(w) \ + << " " << setw(12) << left << fixed << times.Max(w) \ + << " " << setw(12) << left << fixed << times.Last(w) \ << std::endl; // clang-format on } diff --git a/lite/core/profile/profiler.h b/lite/core/profile/profiler.h index 4e9e9ae31c1a6d7f331eac2e77c4971986bd42a1..3933e5ba01ebcb20420494a955cbc0e202879f76 100644 --- a/lite/core/profile/profiler.h +++ b/lite/core/profile/profiler.h @@ -13,6 +13,7 @@ // limitations under the License. #pragma once +#include #include #include #include @@ -22,6 +23,14 @@ namespace paddle { namespace lite { namespace profile { +enum class Type { + kUnk = 0, + kCreate, + kDispatch, +}; + +extern std::map TypeStr; + struct TimeInfo { float avg; float min; @@ -35,8 +44,15 @@ struct OpCharacter { std::string remark{std::string("N/A")}; }; -struct StatisUnit { - std::unique_ptr timer; +class StatisUnit final { + public: + explicit StatisUnit(const OpCharacter& ch); + lite::profile::Timer* Timer(Type type); + const OpCharacter& Character() const { return character; } + + protected: + std::unique_ptr create_t; + std::unique_ptr dispatch_t; OpCharacter character; }; @@ -45,9 +61,9 @@ class Profiler final { Profiler() = default; explicit Profiler(const std::string& name) : name_(name) {} int NewTimer(const OpCharacter& ch); - void StartTiming(const int index, KernelContext* ctx); - float StopTiming(const int index, KernelContext* ctx); - std::string Summary(bool concise = true, size_t warm_up = 10); + void StartTiming(Type type, const int index, KernelContext* ctx); + float StopTiming(Type type, const int index, KernelContext* ctx); + std::string Summary(Type type, bool concise = true, size_t warm_up = 10); private: std::string name_{std::string("N/A")}; diff --git a/lite/core/profile/test_timer.cc b/lite/core/profile/test_timer.cc index 6f49698ef4a8f83e4192a16801566fdcbd7baf9a..3841f0151890d377a87f4f5d4b6d069ee75b560e 100644 --- a/lite/core/profile/test_timer.cc +++ b/lite/core/profile/test_timer.cc @@ -69,10 +69,10 @@ TEST(profiler, real_latency) { ch.op_type = "operator/1"; ch.kernel_name = "kernel/1"; int idx = profiler.NewTimer(ch); - profiler.StartTiming(idx, &ctx); + profiler.StartTiming(Type::kDispatch, idx, &ctx); std::this_thread::sleep_for(std::chrono::milliseconds(10)); - profiler.StopTiming(idx, &ctx); - std::cout << profiler.Summary(); + profiler.StopTiming(Type::kDispatch, idx, &ctx); + std::cout << profiler.Summary(Type::kDispatch); } #endif diff --git a/lite/core/program.cc b/lite/core/program.cc index 8dc8fb0dddc54d7d83b2368b31b5f30725469296..a26e97886f6b7cc6bd894093ba744613acf31419 100644 --- a/lite/core/program.cc +++ b/lite/core/program.cc @@ -147,7 +147,7 @@ void RuntimeProgram::Run() { #endif // LITE_WITH_PROFILE } #ifdef LITE_WITH_PROFILE - LOG(INFO) << "\n" << profiler_.Summary(false, 0); + LOG(INFO) << "\n" << profiler_.Summary(profile::Type::kDispatch, false, 0); #endif // LITE_WITH_PROFILE } @@ -252,8 +252,16 @@ void Program::PrepareWorkspace(const cpp::ProgramDesc& prog) { } void Instruction::Run() { +#ifdef LITE_WITH_PROFILE + CHECK(profiler_) << "Profiler pointer of kernel can not be nullptr. " + "When LITE_WITH_PROFILE is defined, please set a " + "Profiler for Instruction."; + profiler_->StartTiming( + profile::Type::kCreate, profile_id_, kernel_->mutable_context()); +#endif CHECK(op_) << "op null"; CHECK(kernel_) << "kernel null"; + if (first_epoch_) { first_epoch_ = false; CHECK(op_->CheckShape()); @@ -263,10 +271,7 @@ void Instruction::Run() { return; } - // VLOG(4) << "kernel launch"; op_->InferShape(); - // VLOG(4) << ">> Running kernel: " << op_->op_info()->Repr() << " on Target " - // << TargetToStr(kernel_->target()); kernel_->Launch(); has_run_ = true; } diff --git a/lite/core/program.h b/lite/core/program.h index 291252619b396f18576b935a0189f4ecdba7867f..e3f6642d3a41add4af531f64c6ee697c755274e6 100644 --- a/lite/core/program.h +++ b/lite/core/program.h @@ -143,7 +143,8 @@ class LITE_API RuntimeProgram { } ~RuntimeProgram() { #ifdef LITE_WITH_PROFILE - LOG(INFO) << "\n" << profiler_.Summary(); + LOG(INFO) << "\n" << profiler_.Summary(profile::Type::kCreate); + LOG(INFO) << "\n" << profiler_.Summary(profile::Type::kDispatch); #endif // LITE_WITH_PROFILE }