提交 e9bdfb67 编写于 作者: 石晓伟 提交者: GitHub

update profiler, test=develop (#2644)

上级 d47f309a
......@@ -83,14 +83,11 @@ class KernelBase {
#if defined(LITE_WITH_CUDA)
WorkSpace::Global_CUDA().AllocReset();
#endif
#ifdef LITE_WITH_PROFILE
CHECK(profiler_) << "Profiler pointer of kernel can not be nullptr. "
"When LITE_WITH_PROFILE is defined, please set a "
"Profiler for Instruction.";
profiler_->StartTiming(profile_id_, ctx_.get());
profiler_->StopTiming(profile::Type::kCreate, profile_id_, ctx_.get());
profiler_->StartTiming(profile::Type::kDispatch, profile_id_, ctx_.get());
Run();
profiler_->StopTiming(profile_id_, ctx_.get());
profiler_->StopTiming(profile::Type::kDispatch, profile_id_, ctx_.get());
#else
Run();
#endif
......
......@@ -28,36 +28,55 @@ auto op_comp = [](const OpCharacter& c1, const OpCharacter& c2) {
};
}
int Profiler::NewTimer(const OpCharacter& ch) {
StatisUnit unit;
unit.character = ch;
std::map<Type, std::string> TypeStr{
{Type::kUnk, "Unknown"},
{Type::kCreate, "Create"},
{Type::kDispatch, "Dispatch"},
};
StatisUnit::StatisUnit(const OpCharacter& ch) : character(ch) {
create_t.reset(new DeviceTimer<TargetType::kHost>());
if (ch.target == TargetType::kCUDA) {
#ifdef LITE_WITH_CUDA
unit.timer.reset(new DeviceTimer<TargetType::kCUDA>());
dispatch_t.reset(new DeviceTimer<TargetType::kCUDA>());
#else
LOG(ERROR) << "The timer type specified as cuda is uninitialized, so the "
"default x86 timer is used instead.";
#endif
} else {
unit.timer.reset(new DeviceTimer<TargetType::kHost>());
dispatch_t.reset(new DeviceTimer<TargetType::kHost>());
}
}
lite::profile::Timer* StatisUnit::Timer(Type type) {
if (type == Type::kCreate) {
return create_t.get();
} else if (type == Type::kDispatch) {
return dispatch_t.get();
}
LOG(FATAL) << "Timer cannot be returned for unknown platforms.";
return nullptr;
}
int Profiler::NewTimer(const OpCharacter& ch) {
StatisUnit unit(ch);
units_.push_back(std::move(unit));
return units_.size() - 1;
}
void Profiler::StartTiming(const int index, KernelContext* ctx) {
void Profiler::StartTiming(Type type, const int index, KernelContext* ctx) {
CHECK_LT(index, units_.size())
<< "The timer index in the profiler is out of range.";
units_[index].timer->Start(ctx);
units_[index].Timer(type)->Start(ctx);
}
float Profiler::StopTiming(const int index, KernelContext* ctx) {
float Profiler::StopTiming(Type type, const int index, KernelContext* ctx) {
CHECK_LT(index, units_.size())
<< "The timer index in the profiler is out of range.";
return units_[index].timer->Stop(ctx);
return units_[index].Timer(type)->Stop(ctx);
}
std::string Profiler::Summary(bool concise, size_t w) {
std::string Profiler::Summary(Type type, bool concise, size_t w) {
using std::setw;
using std::left;
using std::fixed;
......@@ -65,12 +84,14 @@ std::string Profiler::Summary(bool concise, size_t w) {
std::string title;
// Title.
if (concise) {
ss << "Timing cycle = " << units_.front().timer->LapTimes().Size()
ss << "Timing cycle = " << units_.front().Timer(type)->LapTimes().Size()
<< std::endl;
ss << "===== Concise Profiler Summary: " << name_ << ", Exclude " << w
ss << "===== Concise " << TypeStr.find(type)->second
<< " Profiler Summary: " << name_ << ", Exclude " << w
<< " warm-ups =====" << std::endl;
} else {
ss << "===== Detailed Profiler Summary: " << name_ << ", Exclude " << w
ss << "===== Detailed " << TypeStr.find(type)->second
<< " Profiler Summary: " << name_ << ", Exclude " << w
<< " warm-ups =====" << std::endl;
}
ss << setw(25) << left << "Operator Type"
......@@ -84,16 +105,16 @@ std::string Profiler::Summary(bool concise, size_t w) {
if (concise) {
std::map<OpCharacter, TimeInfo, decltype(op_comp)> summary(op_comp);
for (auto& unit : units_) {
auto ch = summary.find(unit.character);
auto ch = summary.find(unit.Character());
if (ch != summary.end()) {
ch->second.avg += unit.timer->LapTimes().Avg(w);
ch->second.min += unit.timer->LapTimes().Min(w);
ch->second.max += unit.timer->LapTimes().Max(w);
ch->second.avg += unit.Timer(type)->LapTimes().Avg(w);
ch->second.min += unit.Timer(type)->LapTimes().Min(w);
ch->second.max += unit.Timer(type)->LapTimes().Max(w);
} else {
TimeInfo info({unit.timer->LapTimes().Avg(w),
unit.timer->LapTimes().Min(w),
unit.timer->LapTimes().Max(w)});
summary.insert({unit.character, info});
TimeInfo info({unit.Timer(type)->LapTimes().Avg(w),
unit.Timer(type)->LapTimes().Min(w),
unit.Timer(type)->LapTimes().Max(w)});
summary.insert({unit.Character(), info});
}
}
for (const auto& item : summary) {
......@@ -109,14 +130,15 @@ std::string Profiler::Summary(bool concise, size_t w) {
}
} else {
for (auto& unit : units_) {
const auto& times = unit.Timer(type)->LapTimes();
// clang-format off
ss << setw(25) << left << fixed << unit.character.op_type \
<< " " << setw(40) << left << fixed << unit.character.kernel_name \
<< " " << setw(12) << left << fixed << unit.character.remark \
<< " " << setw(12) << left << fixed << unit.timer->LapTimes().Avg(w) \
<< " " << setw(12) << left << fixed << unit.timer->LapTimes().Min(w) \
<< " " << setw(12) << left << fixed << unit.timer->LapTimes().Max(w) \
<< " " << setw(12) << left << fixed << unit.timer->LapTimes().Last(w) \
ss << setw(25) << left << fixed << unit.Character().op_type \
<< " " << setw(40) << left << fixed << unit.Character().kernel_name \
<< " " << setw(12) << left << fixed << unit.Character().remark \
<< " " << setw(12) << left << fixed << times.Avg(w) \
<< " " << setw(12) << left << fixed << times.Min(w) \
<< " " << setw(12) << left << fixed << times.Max(w) \
<< " " << setw(12) << left << fixed << times.Last(w) \
<< std::endl;
// clang-format on
}
......
......@@ -13,6 +13,7 @@
// limitations under the License.
#pragma once
#include <map>
#include <memory>
#include <string>
#include <vector>
......@@ -22,6 +23,14 @@ namespace paddle {
namespace lite {
namespace profile {
enum class Type {
kUnk = 0,
kCreate,
kDispatch,
};
extern std::map<Type, std::string> TypeStr;
struct TimeInfo {
float avg;
float min;
......@@ -35,8 +44,15 @@ struct OpCharacter {
std::string remark{std::string("N/A")};
};
struct StatisUnit {
std::unique_ptr<Timer> timer;
class StatisUnit final {
public:
explicit StatisUnit(const OpCharacter& ch);
lite::profile::Timer* Timer(Type type);
const OpCharacter& Character() const { return character; }
protected:
std::unique_ptr<lite::profile::Timer> create_t;
std::unique_ptr<lite::profile::Timer> dispatch_t;
OpCharacter character;
};
......@@ -45,9 +61,9 @@ class Profiler final {
Profiler() = default;
explicit Profiler(const std::string& name) : name_(name) {}
int NewTimer(const OpCharacter& ch);
void StartTiming(const int index, KernelContext* ctx);
float StopTiming(const int index, KernelContext* ctx);
std::string Summary(bool concise = true, size_t warm_up = 10);
void StartTiming(Type type, const int index, KernelContext* ctx);
float StopTiming(Type type, const int index, KernelContext* ctx);
std::string Summary(Type type, bool concise = true, size_t warm_up = 10);
private:
std::string name_{std::string("N/A")};
......
......@@ -69,10 +69,10 @@ TEST(profiler, real_latency) {
ch.op_type = "operator/1";
ch.kernel_name = "kernel/1";
int idx = profiler.NewTimer(ch);
profiler.StartTiming(idx, &ctx);
profiler.StartTiming(Type::kDispatch, idx, &ctx);
std::this_thread::sleep_for(std::chrono::milliseconds(10));
profiler.StopTiming(idx, &ctx);
std::cout << profiler.Summary();
profiler.StopTiming(Type::kDispatch, idx, &ctx);
std::cout << profiler.Summary(Type::kDispatch);
}
#endif
......
......@@ -147,7 +147,7 @@ void RuntimeProgram::Run() {
#endif // LITE_WITH_PROFILE
}
#ifdef LITE_WITH_PROFILE
LOG(INFO) << "\n" << profiler_.Summary(false, 0);
LOG(INFO) << "\n" << profiler_.Summary(profile::Type::kDispatch, false, 0);
#endif // LITE_WITH_PROFILE
}
......@@ -252,8 +252,16 @@ void Program::PrepareWorkspace(const cpp::ProgramDesc& prog) {
}
void Instruction::Run() {
#ifdef LITE_WITH_PROFILE
CHECK(profiler_) << "Profiler pointer of kernel can not be nullptr. "
"When LITE_WITH_PROFILE is defined, please set a "
"Profiler for Instruction.";
profiler_->StartTiming(
profile::Type::kCreate, profile_id_, kernel_->mutable_context());
#endif
CHECK(op_) << "op null";
CHECK(kernel_) << "kernel null";
if (first_epoch_) {
first_epoch_ = false;
CHECK(op_->CheckShape());
......@@ -263,10 +271,7 @@ void Instruction::Run() {
return;
}
// VLOG(4) << "kernel launch";
op_->InferShape();
// VLOG(4) << ">> Running kernel: " << op_->op_info()->Repr() << " on Target "
// << TargetToStr(kernel_->target());
kernel_->Launch();
has_run_ = true;
}
......
......@@ -143,7 +143,8 @@ class LITE_API RuntimeProgram {
}
~RuntimeProgram() {
#ifdef LITE_WITH_PROFILE
LOG(INFO) << "\n" << profiler_.Summary();
LOG(INFO) << "\n" << profiler_.Summary(profile::Type::kCreate);
LOG(INFO) << "\n" << profiler_.Summary(profile::Type::kDispatch);
#endif // LITE_WITH_PROFILE
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册