提交 b82a9eec 编写于 作者: S sangoly 提交者: GitHub

[Profile] add kernel runtime profile && add op runtime summary test=develop (#2136)

上级 7ff455ac
......@@ -92,7 +92,9 @@ add_custom_target(kernel_list_h DEPENDS kernels.h)
add_custom_target(all_kernel_faked_cc DEPENDS all_kernel_faked.cc)
#----------------------------------------------- NOT CHANGE -----------------------------------------------
lite_cc_library(kernel SRCS kernel.cc DEPS context type_system target_wrapper any op_params tensor
lite_cc_library(kernel SRCS kernel.cc
DEPS context type_system target_wrapper any op_params tensor
PROFILE_DEPS basic_profiler
)
lite_cc_library(op SRCS op_lite.cc DEPS scope op_registry target_wrapper kernel
cpp_op_desc tensor
......
......@@ -30,6 +30,10 @@
#include "lite/utils/all.h"
#include "lite/utils/replace_stl/stream.h"
#ifdef LITE_WITH_PROFILE
#include "lite/core/profile/basic_profiler.h"
#endif // LITE_WITH_PROFILE
namespace paddle {
namespace lite {
......@@ -53,6 +57,10 @@ class KernelBase {
/// Run the kernel. Before Run, both the param_ and context_ should be valid.
virtual void Run() = 0;
#ifdef LITE_WITH_PROFILE
void SetProfileID(uint32_t id) { profile_id_ = id; }
#endif
void Launch() {
/// First run, init kernel, do weights transform once
if (is_first_epoch_) {
......@@ -72,6 +80,11 @@ class KernelBase {
#if defined(LITE_WITH_CUDA)
WorkSpace::Global_CUDA().AllocReset();
#endif
#ifdef LITE_WITH_PROFILE
CHECK_GE(profile_id_, 0) << "Must set profile id first";
profile::ProfileBlock x(profile_id_, "kernel");
#endif
Run();
}
......@@ -157,6 +170,10 @@ class KernelBase {
// is the unique ID for the kernel.
std::string alias_{};
bool is_first_epoch_{true};
#ifdef LITE_WITH_PROFILE
int profile_id_{-1};
#endif
};
// Light-weight kernel implementation.
......
......@@ -80,6 +80,8 @@ class OpLite : public Registry {
// Human-readable information.
virtual std::string DebugString() const = 0;
virtual std::string SerializedOpInfo() const { return "N/A"; }
const Place &kernel_place() const { return kernel_place_; }
// Create all the kernels for the valid targets.
......
......@@ -2,7 +2,7 @@ if (NOT LITE_WITH_PROFILE)
return()
endif()
lite_cc_library(basic_profiler SRCS basic_profiler.cc)
lite_cc_library(basic_profiler SRCS basic_profiler.cc DEPS gflags)
lite_cc_test(test_basic_profiler SRCS basic_profiler_test.cc DEPS basic_profiler)
......@@ -14,6 +14,14 @@
#include "lite/core/profile/basic_profiler.h"
DEFINE_string(time_profile_file,
"time_profile.txt",
"Lite time profile information dump file");
DEFINE_string(time_profile_summary_file,
"time_profile_summary.txt",
"Lite time profile summary information dump file");
namespace paddle {
namespace lite {
namespace profile {
......@@ -21,6 +29,33 @@ namespace profile {
const int BasicTimer::data_w = 10;
const int BasicTimer::name_w = 15;
template class BasicProfiler<BasicTimer>;
template <typename TimerT>
BasicProfiler<TimerT>::~BasicProfiler() {
LOG(INFO) << "Basic Profile dumps:";
auto b_repr = TimerT::basic_repr_header() + "\n" + basic_repr();
LOG(INFO) << "\n" + b_repr;
// Dump to file
std::ofstream basic_ostream(FLAGS_time_profile_file);
CHECK(basic_ostream.is_open()) << "Open " << FLAGS_time_profile_file
<< " failed";
basic_ostream.write(b_repr.c_str(), b_repr.size());
basic_ostream.close();
LOG(INFO) << "Summary Profile dumps:";
auto s_repr = summary_repr_header() + "\n" + summary_repr();
LOG(INFO) << "\n" + s_repr;
// Dump to file
std::ofstream summary_ostream(FLAGS_time_profile_summary_file);
CHECK(summary_ostream.is_open()) << "Open " << FLAGS_time_profile_summary_file
<< " failed";
summary_ostream.write(s_repr.c_str(), s_repr.size());
summary_ostream.close();
}
} // namespace profile
} // namespace lite
} // namespace paddle
......@@ -18,10 +18,13 @@
* of each kernel.
*/
#pragma once
#include <gflags/gflags.h>
#include <time.h>
#include <algorithm>
#include <chrono> // NOLINT
#include <fstream>
#include <limits>
#include <map>
#include <memory>
#include <string>
#include <vector>
......@@ -33,35 +36,48 @@ namespace paddle {
namespace lite {
namespace profile {
struct TimerInfo {
uint64_t total_{};
uint64_t count_{};
uint32_t max_{std::numeric_limits<uint32_t>::min()};
uint32_t min_{std::numeric_limits<uint32_t>::max()};
uint64_t timer_{};
double ave() const { return total_ * 1. / count_; }
double max() const { return max_; }
double min() const { return min_; }
uint64_t total() const { return total_; }
uint64_t count() const { return count_; }
};
/* Base class of all the profile records */
template <typename ChildT>
class TimerBase {
public:
void Start() { self()->Start(); }
void Stop() { self()->Stop(); }
void Log(uint32_t x) { return self()->Log(x); }
void Start(const std::string& key) { self()->Start(key); }
void Stop(const std::string& key) { self()->Stop(key); }
void Log(TimerInfo* timer_info, uint32_t x) {
return self()->Log(timer_info, x);
}
std::string basic_repr() const { return const_self()->basic_repr(); }
void SetId(int id) { self()->SetId(id); }
void SetKey(const std::string &key) { self()->SetKey(key); }
void SetKey(const std::string& key) { self()->SetKey(key); }
int id() const { return const_self()->id(); }
protected:
ChildT *self() { return reinterpret_cast<ChildT *>(this); }
const ChildT *const_self() const {
return reinterpret_cast<const ChildT *>(this);
ChildT* self() { return reinterpret_cast<ChildT*>(this); }
const ChildT* const_self() const {
return reinterpret_cast<const ChildT*>(this);
}
};
class BasicTimer : TimerBase<BasicTimer> {
uint64_t total_{};
uint64_t count_{};
uint32_t max_{std::numeric_limits<uint32_t>::min()};
uint32_t min_{std::numeric_limits<uint32_t>::max()};
int id_{-1};
std::string key_;
uint64_t timer_{};
std::map<std::string, TimerInfo> timer_infos_;
std::map<std::string, std::string> custom_infos_;
// TODO(Superjomn) make static
static const int name_w;
......@@ -69,68 +85,109 @@ class BasicTimer : TimerBase<BasicTimer> {
public:
BasicTimer() = default;
BasicTimer(int id, const std::string &key) : id_(id), key_(key) {}
BasicTimer(int id, const std::string& key) : id_(id), key_(key) {}
void SetId(int id) { id_ = id; }
void SetKey(const std::string &key) { key_ = key; }
void Start() {
timer_ = static_cast<uint64_t>(
void SetKey(const std::string& key) { key_ = key; }
void Start(const std::string& timer_key) {
TimerInfo& timer_info = timer_infos_[timer_key];
timer_info.timer_ = static_cast<uint64_t>(
std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::system_clock::now().time_since_epoch())
.count());
}
void Stop() {
void Stop(const std::string& timer_key) {
if (timer_infos_.find(timer_key) == timer_infos_.end()) {
LOG(FATAL) << "Error: Can't found timer key [" << timer_key << "] for "
<< key_;
}
TimerInfo& timer_info = timer_infos_[timer_key];
auto duration = static_cast<
uint64_t>( // timer unit: microsecond, 1second = 1e6 microsecond
std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::system_clock::now().time_since_epoch())
.count() -
timer_);
Log(duration);
timer_info.timer_);
Log(&timer_info, duration);
}
void SetCustomInfo(const std::string& key, const std::string& value) {
if (custom_infos_.find(key) != custom_infos_.end()) {
LOG(FATAL) << "Error: Custom Info for key [" << key
<< "] can't be overwritten";
}
custom_infos_[key] = value;
}
std::string GetCustomInfo(const std::string& key) const {
auto iter = custom_infos_.find(key);
if (iter == custom_infos_.end()) {
LOG(FATAL) << "Error: Custom Info for key [" << key << "] can't be found";
}
return iter->second;
}
int count() const { return count_; }
const TimerInfo& GetTimerInfo(const std::string& key) const {
auto iter = timer_infos_.find(key);
if (iter == timer_infos_.end()) {
LOG(FATAL) << "Error: Timer Info for key [" << key << "] can't be found";
}
return iter->second;
}
void Log(uint32_t timespan) {
total_ += timespan;
max_ = std::max(max_, timespan);
min_ = std::min(min_, timespan);
count_++;
void Log(TimerInfo* timer_info, uint32_t timespan) {
CHECK(timer_info);
timer_info->total_ += timespan;
timer_info->max_ = std::max(timer_info->max_, timespan);
timer_info->min_ = std::min(timer_info->min_, timespan);
timer_info->count_++;
}
static std::string basic_repr_header() {
STL::stringstream ss;
ss << std::setw(name_w) << "kernel" //
<< std::setw(data_w) << "average" //
<< std::setw(data_w) << "min" //
<< std::setw(data_w) << "max" //
<< std::setw(data_w) << "count";
// clang-format off
ss << "op" << "\t"
<< "kernel" << "\t"
<< "k_average" << "\t"
<< "k_min" << "\t"
<< "k_max" << "\t"
<< "i_average" << "\t"
<< "i_min" << "\t"
<< "i_max" << "\t"
<< "count" << "\t"
<< "op_info";
// clang-format on
return ss.str();
}
std::string basic_repr() const {
auto& kernel_timer_info = GetTimerInfo("kernel");
auto& inst_timer_info = GetTimerInfo("instruction");
STL::stringstream ss;
ss << std::setw(name_w) << key() //
<< std::setw(data_w) << ave() //
<< std::setw(data_w) << min() //
<< std::setw(data_w) << max() //
<< std::setw(data_w) << count_;
// clang-format off
ss << GetCustomInfo("op_type") << "\t"
<< key() << "\t"
<< kernel_timer_info.ave() << "\t"
<< kernel_timer_info.min() << "\t"
<< kernel_timer_info.max() << "\t"
<< inst_timer_info.ave() << "\t"
<< inst_timer_info.min() << "\t"
<< inst_timer_info.max() << "\t"
<< inst_timer_info.count() << "\t"
<< GetCustomInfo("op_info");
// clang-format on
return ss.str();
}
const std::string &key() const { return key_; }
const std::string& key() const { return key_; }
int id() const {
CHECK_GE(id_, 0) << "id is not inited";
return id_;
}
double ave() const { return total_ * 1. / count_; }
double max() const { return max_; }
double min() const { return min_; }
// BasicRecord(const BasicRecord &) = delete;
void operator=(const BasicTimer &) = delete;
void operator=(const BasicTimer&) = delete;
};
/*
......@@ -139,28 +196,28 @@ class BasicTimer : TimerBase<BasicTimer> {
template <typename TimerT>
class BasicProfiler {
public:
explicit BasicProfiler(const std::string &name) : name_(name) {}
explicit BasicProfiler(const std::string& name) : name_(name) {}
using record_t = TimerT;
static BasicProfiler &Global() {
static BasicProfiler& Global() {
static std::unique_ptr<BasicProfiler> x(new BasicProfiler("[global]"));
return *x;
}
record_t &NewRcd(const std::string &key) {
record_t& NewRcd(const std::string& key) {
records_.emplace_back();
records_.back().SetId(records_.size() - 1);
records_.back().SetKey(key);
return records_.back();
}
const record_t &record(int id) {
const record_t& record(int id) {
CHECK_LT(id, records_.size());
CHECK_GE(id, 0);
return records_[id];
}
record_t *mutable_record(int id) {
record_t* mutable_record(int id) {
CHECK_GE(id, 0);
CHECK_LT(static_cast<size_t>(id), records_.size());
return &records_[id];
......@@ -168,42 +225,102 @@ class BasicProfiler {
std::string basic_repr() const {
STL::stringstream ss;
for (const auto &rcd : records_) {
for (const auto& rcd : records_) {
ss << rcd.basic_repr() << "\n";
}
return ss.str();
}
~BasicProfiler() {
LOG(INFO) << "Profile dumps:";
LOG(INFO) << "\n" + BasicTimer::basic_repr_header() + "\n" + basic_repr();
std::string summary_repr_header() const {
STL::stringstream ss;
// clang-format off
ss << "op" << "\t"
<< "average" << "\t"
<< "min" << "\t"
<< "max" << "\t"
<< "op_time" << "\t"
<< "total_time" << "\t"
<< "precent" << "\t"
<< "count";
// clang-format on
return ss.str();
}
std::string summary_repr() const {
std::map<std::string, TimerInfo> op_summary;
uint64_t total{0};
for (const auto& rcd : records_) {
// We use kernel run time here
auto kernel_timer = rcd.GetTimerInfo("kernel");
auto op_type = rcd.GetCustomInfo("op_type");
auto& op_timer = op_summary[op_type];
total += kernel_timer.total_;
op_timer.total_ += kernel_timer.total_;
op_timer.max_ = std::max(kernel_timer.max_, op_timer.max_);
op_timer.min_ = std::min(kernel_timer.min_, op_timer.min_);
op_timer.count_ += kernel_timer.count_;
}
STL::stringstream ss;
for (auto& iter : op_summary) {
auto& op_timer = iter.second;
// clang-format off
ss << iter.first << "\t"
<< op_timer.ave() << "\t"
<< op_timer.min() << "\t"
<< op_timer.max() << "\t"
<< op_timer.total() << "\t"
<< total << "\t"
<< (op_timer.total() * 1. / total * 100) << "%\t"
<< op_timer.count() << "\t"
<< "\n";
// clang-format on
}
return ss.str();
}
~BasicProfiler();
private:
std::string name_;
std::vector<record_t> records_;
};
struct ProfileBlock {
explicit ProfileBlock(int id) : id_(id) {
BasicProfiler<BasicTimer>::Global().mutable_record(id_)->Start();
explicit ProfileBlock(int id, const std::string& key) : id_(id), key_(key) {
BasicProfiler<BasicTimer>::Global().mutable_record(id_)->Start(key_);
}
void Record() {
if (has_recorded_) {
LOG(FATAL) << "You can only call Record() once";
}
BasicProfiler<BasicTimer>::Global().mutable_record(id_)->Stop(key_);
has_recorded_ = true;
}
~ProfileBlock() {
BasicProfiler<BasicTimer>::Global().mutable_record(id_)->Stop();
if (!has_recorded_) {
BasicProfiler<BasicTimer>::Global().mutable_record(id_)->Stop(key_);
}
}
private:
int id_{};
bool has_recorded_{false};
std::string key_{};
};
#define LITE_PROFILE_ONE(key__) \
static int key__##__profiler_id = \
::paddle::lite::profile::BasicProfiler< \
::paddle::lite::profile::BasicTimer>::Global() \
.NewRcd(#key__) \
.id(); \
::paddle::lite::profile::ProfileBlock key__##profiler__(key__##__profiler_id);
#define LITE_PROFILE_ONE(key__) \
static int key__##__profiler_id = \
::paddle::lite::profile::BasicProfiler< \
::paddle::lite::profile::BasicTimer>::Global() \
.NewRcd(#key__) \
.id(); \
::paddle::lite::profile::ProfileBlock key__##profiler__( \
key__##__profiler_id, #key__);
} // namespace profile
} // namespace lite
......
......@@ -27,18 +27,21 @@ TEST(basic_record, init) {
timer.SetKey("hello");
}
TEST(basic_profile, init) {
auto& rcd = BasicProfiler<BasicTimer>::Global().NewRcd("fc");
for (int i = 11; i < 100; i++) {
rcd.Log(i);
}
TEST(basic_profile, real_latency) {
auto profile_id = profile::BasicProfiler<profile::BasicTimer>::Global()
.NewRcd("test0")
.id();
auto& profiler =
*BasicProfiler<profile::BasicTimer>::Global().mutable_record(profile_id);
// Set op info
profiler.SetCustomInfo("op_type", "fc");
profiler.SetCustomInfo("op_info", "size:5x6");
LOG(INFO) << BasicProfiler<BasicTimer>::Global().basic_repr();
}
profile::ProfileBlock x(profile_id, "instruction");
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
TEST(basic_profile, real_latency) {
LITE_PROFILE_ONE(test0);
std::this_thread::sleep_for(std::chrono::milliseconds(1200));
profile::ProfileBlock y(profile_id, "kernel");
std::this_thread::sleep_for(std::chrono::milliseconds(500));
}
} // namespace profile
......
......@@ -182,11 +182,11 @@ void Program::PrepareWorkspace(const cpp::ProgramDesc& prog) {
}
void Instruction::Run() {
#ifdef LITE_WITH_PROFILE
profile::ProfileBlock x(profile_id_);
#endif // LITE_WITH_PROFILE
CHECK(op_) << "op null";
CHECK(kernel_) << "kernel null";
#ifdef LITE_WITH_PROFILE
profile::ProfileBlock x(profile_id_, "instruction");
#endif // LITE_WITH_PROFILE
if (first_epoch_) {
first_epoch_ = false;
CHECK(op_->CheckShape());
......
......@@ -92,6 +92,13 @@ struct Instruction {
profile_id_ = profile::BasicProfiler<profile::BasicTimer>::Global()
.NewRcd(kernel_->SerializedKernelType())
.id();
kernel_->SetProfileID(profile_id_);
// Set profile custom info
auto& profiler =
*profile::BasicProfiler<profile::BasicTimer>::Global().mutable_record(
profile_id_);
profiler.SetCustomInfo("op_type", op_->Type());
profiler.SetCustomInfo("op_info", op_->SerializedOpInfo());
#endif // LITE_WITH_PROFILE
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册