From bc770dbe6bd5d57b97eec720d2fc3b732cd010b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=9F=B3=E6=99=93=E4=BC=9F?= <39303645+Shixiaowei02@users.noreply.github.com> Date: Wed, 4 Dec 2019 12:55:11 +0800 Subject: [PATCH] refactor profile tools, test=develop (#2536) --- lite/api/model_test.cc | 16 +-- lite/core/CMakeLists.txt | 4 +- lite/core/arena/framework.cc | 3 + lite/core/kernel.h | 18 ++- lite/core/profile/CMakeLists.txt | 3 +- lite/core/profile/profiler.cc | 117 ++++++++++++++++++ lite/core/profile/profiler.h | 59 +++++++++ lite/core/profile/test_timer.cc | 81 ++++++++++++ lite/core/profile/timer.h | 114 +++++++++++++++++ lite/core/program.cc | 8 +- lite/core/program.h | 48 ++++--- lite/tests/cv/image_convert_test.cc | 12 +- lite/tests/math/conv_compute_test.cc | 16 +-- lite/tests/math/conv_int8_compute_test.cc | 30 ++--- .../tests/math/conv_transpose_compute_test.cc | 16 +-- lite/tests/math/gemm_int8_compute_test.cc | 30 ++--- lite/tests/math/gemv_int8_compute_test.cc | 30 ++--- lite/tests/math/layout_compute_test.cc | 52 ++++---- lite/tests/math/pool_compute_test.cc | 16 +-- lite/tests/math/sgemm_c4_compute_test.cc | 16 +-- lite/tests/math/sgemm_compute_test.cc | 16 +-- lite/tests/math/sgemv_compute_test.cc | 18 +-- lite/tests/utils/timer.h | 105 ---------------- 23 files changed, 555 insertions(+), 273 deletions(-) create mode 100644 lite/core/profile/profiler.cc create mode 100644 lite/core/profile/profiler.h create mode 100644 lite/core/profile/test_timer.cc create mode 100644 lite/core/profile/timer.h delete mode 100644 lite/tests/utils/timer.h diff --git a/lite/api/model_test.cc b/lite/api/model_test.cc index 1358267000..a04e86b7d2 100644 --- a/lite/api/model_test.cc +++ b/lite/api/model_test.cc @@ -21,14 +21,14 @@ #include "lite/api/paddle_use_passes.h" #include "lite/api/test_helper.h" #include "lite/core/device_info.h" -#include "lite/tests/utils/timer.h" +#include "lite/core/profile/timer.h" #include "lite/utils/cp_logging.h" #include "lite/utils/string.h" #ifdef LITE_WITH_PROFILE #include "lite/core/profile/basic_profiler.h" #endif // LITE_WITH_PROFILE -using paddle::lite::Timer; +using paddle::lite::profile::Timer; DEFINE_string(input_shape, "1,3,224,224", @@ -102,20 +102,20 @@ void Run(const std::vector>& input_shapes, Timer ti; for (int j = 0; j < repeat; ++j) { - ti.start(); + ti.Start(); predictor->Run(); - ti.end(); - LOG(INFO) << "iter: " << j << ", time: " << ti.latest_time() << " ms"; + float t = ti.Stop(); + LOG(INFO) << "iter: " << j << ", time: " << t << " ms"; } LOG(INFO) << "================== Speed Report ==================="; LOG(INFO) << "Model: " << model_dir << ", power_mode: " << static_cast(power_mode) << ", threads num " << thread_num << ", warmup: " << warmup_times - << ", repeats: " << repeat << ", avg time: " << ti.get_average_ms() + << ", repeats: " << repeat << ", avg time: " << ti.LapTimes().Avg() << " ms" - << ", min time: " << ti.get_min_time() << " ms" - << ", max time: " << ti.get_max_time() << " ms."; + << ", min time: " << ti.LapTimes().Min() << " ms" + << ", max time: " << ti.LapTimes().Max() << " ms."; auto output = predictor->GetOutput(0); auto out = output->data(); diff --git a/lite/core/CMakeLists.txt b/lite/core/CMakeLists.txt index 5eecf1d815..a93b962a47 100644 --- a/lite/core/CMakeLists.txt +++ b/lite/core/CMakeLists.txt @@ -99,7 +99,7 @@ add_custom_target(all_kernel_faked_cc DEPENDS all_kernel_faked.cc) #----------------------------------------------- NOT CHANGE ----------------------------------------------- lite_cc_library(kernel SRCS kernel.cc DEPS context type_system target_wrapper any op_params tensor - PROFILE_DEPS basic_profiler + PROFILE_DEPS lite_profiler ) lite_cc_library(op SRCS op_lite.cc DEPS scope op_registry target_wrapper kernel cpp_op_desc tensor @@ -113,7 +113,7 @@ lite_cc_library(type_system SRCS type_system.cc DEPS tensor target_wrapper) lite_cc_library(program SRCS program.cc DEPS op kernel model_parser ${ops} ${cpp_wrapper} - PROFILE_DEPS basic_profiler) + PROFILE_DEPS lite_profiler) if (NOT LITE_ON_TINY_PUBLISH) lite_cc_library(optimizer SRCS optimizer.cc DEPS mir_pass_manager model_parser program) diff --git a/lite/core/arena/framework.cc b/lite/core/arena/framework.cc index c59c078787..561a508d20 100644 --- a/lite/core/arena/framework.cc +++ b/lite/core/arena/framework.cc @@ -37,6 +37,9 @@ void TestCase::CreateInstruction() { // prepare context (*it)->SetContext(std::move(ctx_)); instruction_.reset(new Instruction(op, std::move(*it))); +#ifdef LITE_WITH_PROFILE + instruction_->set_profiler(new profile::Profiler()); +#endif } void TestCase::PrepareInputsForInstruction() { diff --git a/lite/core/kernel.h b/lite/core/kernel.h index 05d7a6b333..86193235a2 100644 --- a/lite/core/kernel.h +++ b/lite/core/kernel.h @@ -31,7 +31,7 @@ #include "lite/utils/replace_stl/stream.h" #ifdef LITE_WITH_PROFILE -#include "lite/core/profile/basic_profiler.h" +#include "lite/core/profile/profiler.h" #endif // LITE_WITH_PROFILE namespace paddle { @@ -58,7 +58,10 @@ class KernelBase { virtual void Run() = 0; #ifdef LITE_WITH_PROFILE - void SetProfileID(uint32_t id) { profile_id_ = id; } + void SetProfiler(profile::Profiler* profiler, int id) { + profiler_ = profiler; + profile_id_ = id; + } #endif void Launch() { @@ -82,10 +85,12 @@ class KernelBase { #endif #ifdef LITE_WITH_PROFILE - if (profile_id_ >= 0) { - profile::ProfileBlock x(profile_id_, "kernel"); - Run(); - } + CHECK(profiler_) << "Profiler pointer of kernel can not be nullptr. " + "When LITE_WITH_PROFILE is defined, please set a " + "Profiler for Instruction."; + profiler_->StartTiming(profile_id_, ctx_.get()); + Run(); + profiler_->StopTiming(profile_id_, ctx_.get()); #else Run(); #endif @@ -175,6 +180,7 @@ class KernelBase { bool is_first_epoch_{true}; #ifdef LITE_WITH_PROFILE + profile::Profiler* profiler_{nullptr}; int profile_id_{-1}; #endif }; diff --git a/lite/core/profile/CMakeLists.txt b/lite/core/profile/CMakeLists.txt index 54a2390244..b7ddd810af 100644 --- a/lite/core/profile/CMakeLists.txt +++ b/lite/core/profile/CMakeLists.txt @@ -5,4 +5,5 @@ endif() lite_cc_library(basic_profiler SRCS basic_profiler.cc DEPS gflags) lite_cc_test(test_basic_profiler SRCS basic_profiler_test.cc DEPS basic_profiler) - +lite_cc_library(lite_profiler SRCS profiler.cc DEPS context) +lite_cc_test(test_lite_timer SRCS test_timer.cc DEPS lite_profiler) diff --git a/lite/core/profile/profiler.cc b/lite/core/profile/profiler.cc new file mode 100644 index 0000000000..a51b769c8f --- /dev/null +++ b/lite/core/profile/profiler.cc @@ -0,0 +1,117 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/core/profile/profiler.h" +#include +#include +#include + +namespace paddle { +namespace lite { +namespace profile { + +int Profiler::NewTimer(const OpCharacter& ch) { + StatisUnit unit; + unit.character = ch; + if (ch.target == TargetType::kCUDA) { +#ifdef LITE_WITH_CUDA + unit.timer.reset(new DeviceTimer()); +#else + LOG(ERROR) << "The timer type specified as cuda is uninitialized, so the " + "default x86 timer is used instead."; +#endif + } else { + unit.timer.reset(new DeviceTimer()); + } + units_.push_back(std::move(unit)); + return units_.size() - 1; +} + +void Profiler::StartTiming(const int index, KernelContext* ctx) { + CHECK_LT(index, units_.size()) + << "The timer index in the profiler is out of range."; + units_[index].timer->Start(ctx); +} + +float Profiler::StopTiming(const int index, KernelContext* ctx) { + CHECK_LT(index, units_.size()) + << "The timer index in the profiler is out of range."; + return units_[index].timer->Stop(ctx); +} + +std::string Profiler::Summary(bool concise) { + STL::stringstream ss; + auto cout_title = [&ss](const std::string& title, const std::string& name) { + // clang-format off + ss << "===== " << title << ": " << name << " =====" << std::endl; + ss << std::setw(25) << std::left << "Operator Type" \ + << std::setw(40) << std::left << "Kernel Name" \ + << std::setw(10) << std::left << "Remark" \ + << std::setw(10) << std::left << "Avg (ms)" \ + << std::setw(10) << std::left << "Min (ms)" \ + << std::setw(10) << std::left << "Max (ms)" \ + << std::endl; + // clang-format on + }; + if (concise) { + auto op_comp = [](const OpCharacter& c1, const OpCharacter& c2) { + return (c1.target < c2.target) || (c1.op_type < c2.op_type) || + (c1.kernel_name < c2.kernel_name) || (c1.remark < c2.remark); + }; + std::map summary(op_comp); + for (auto& unit : units_) { + auto ch = summary.find(unit.character); + if (ch != summary.end()) { + ch->second.avg += unit.timer->LapTimes().Avg(); + ch->second.min += unit.timer->LapTimes().Min(); + ch->second.max += unit.timer->LapTimes().Max(); + } else { + TimeInfo info({unit.timer->LapTimes().Avg(), + unit.timer->LapTimes().Min(), + unit.timer->LapTimes().Max()}); + summary.insert({unit.character, info}); + } + } + cout_title("Concise Profiler Summary", name_); + for (const auto& item : summary) { + // clang-format off + ss << std::setw(25) << std::left << item.first.op_type \ + << std::setw(40) << std::left << item.first.kernel_name \ + << std::setw(10) << std::left << item.first.remark \ + << std::setw(10) << std::left << item.second.avg \ + << std::setw(10) << std::left << item.second.min \ + << std::setw(10) << std::left << item.second.max \ + << std::endl; + // clang-format on + } + } else { + cout_title("Detailed Profiler Summary", name_); + for (auto& unit : units_) { + // clang-format off + ss << std::setw(25) << std::left << unit.character.op_type \ + << std::setw(40) << std::left << unit.character.kernel_name \ + << std::setw(10) << std::left << unit.character.remark \ + << std::setw(10) << std::left << unit.timer->LapTimes().Avg() \ + << std::setw(10) << std::left << unit.timer->LapTimes().Min() \ + << std::setw(10) << std::left << unit.timer->LapTimes().Max() \ + << std::endl; + // clang-format on + } + } + return ss.str(); +} + +} // namespace profile +} // namespace lite +} // namespace paddle diff --git a/lite/core/profile/profiler.h b/lite/core/profile/profiler.h new file mode 100644 index 0000000000..0fce8167cd --- /dev/null +++ b/lite/core/profile/profiler.h @@ -0,0 +1,59 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include +#include "lite/core/profile/timer.h" + +namespace paddle { +namespace lite { +namespace profile { + +struct TimeInfo { + float avg; + float min; + float max; +}; + +struct OpCharacter { + TargetType target; + std::string op_type{std::string("N/A")}; + std::string kernel_name{std::string("N/A")}; + std::string remark{std::string("N/A")}; +}; + +struct StatisUnit { + std::unique_ptr timer; + OpCharacter character; +}; + +class Profiler final { + public: + Profiler() = default; + explicit Profiler(const std::string& name) : name_(name) {} + int NewTimer(const OpCharacter& ch); + void StartTiming(const int index, KernelContext* ctx); + float StopTiming(const int index, KernelContext* ctx); + std::string Summary(bool concise = true); + + private: + std::string name_{std::string("N/A")}; + std::vector units_; +}; + +} // namespace profile +} // namespace lite +} // namespace paddle diff --git a/lite/core/profile/test_timer.cc b/lite/core/profile/test_timer.cc new file mode 100644 index 0000000000..6f49698ef4 --- /dev/null +++ b/lite/core/profile/test_timer.cc @@ -0,0 +1,81 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include // NOLINT +#include // NOLINT +#include "lite/core/context.h" +#include "lite/core/profile/profiler.h" +#include "lite/core/profile/timer.h" +#include "lite/utils/cp_logging.h" + +namespace paddle { +namespace lite { +namespace profile { + +TEST(timer, real_latency) { + Timer timer; + + timer.Start(); + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + timer.Stop(); + + timer.Start(); + std::this_thread::sleep_for(std::chrono::milliseconds(50)); + timer.Stop(); + + LOG(INFO) << "LapTimes().Avg() = " << timer.LapTimes().Avg(); +} + +#ifdef LITE_WITH_CUDA +TEST(gpu_timer, real_latency) { + DeviceTimer timer; + KernelContext ctx; + cudaStream_t exec_stream; + cudaStreamCreate(&exec_stream); + (&ctx.As())->SetExecStream(exec_stream); + + timer.Start(&ctx); + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + timer.Stop(&ctx); + + (&timer)->Start(&ctx); + std::this_thread::sleep_for(std::chrono::milliseconds(50)); + timer.Stop(&ctx); + + LOG(INFO) << "LapTimes().Avg() = " << timer.LapTimes().Avg(); +} + +TEST(profiler, real_latency) { + KernelContext ctx; + cudaStream_t exec_stream; + cudaStreamCreate(&exec_stream); + (&ctx.As())->SetExecStream(exec_stream); + + Profiler profiler("name"); + profile::OpCharacter ch; + ch.target = TargetType::kCUDA; + ch.op_type = "operator/1"; + ch.kernel_name = "kernel/1"; + int idx = profiler.NewTimer(ch); + profiler.StartTiming(idx, &ctx); + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + profiler.StopTiming(idx, &ctx); + std::cout << profiler.Summary(); +} +#endif + +} // namespace profile +} // namespace lite +} // namespace paddle diff --git a/lite/core/profile/timer.h b/lite/core/profile/timer.h new file mode 100644 index 0000000000..1e86f0d7b9 --- /dev/null +++ b/lite/core/profile/timer.h @@ -0,0 +1,114 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include // NOLINT +#include +#ifdef LITE_WITH_CUDA +#include "lite/backends/cuda/cuda_utils.h" +#endif +#include "lite/core/context.h" + +namespace paddle { +namespace lite { +namespace profile { + +template +class TimeList { + public: + void Clear() { laps_t_.clear(); } + void Add(T t) { laps_t_.push_back(t); } + T Max() const { return *std::max_element(laps_t_.begin(), laps_t_.end()); } + T Min() const { return *std::min_element(laps_t_.begin(), laps_t_.end()); } + T Sum() const { return std::accumulate(laps_t_.begin(), laps_t_.end(), 0.0); } + size_t Size() const { return laps_t_.size(); } + T Avg() const { + if (!Size()) { + return 0; + } + return Sum() / Size(); + } + const std::list& Raw() const { return laps_t_; } + + private: + std::list laps_t_; +}; + +class Timer { + public: + Timer() = default; + virtual ~Timer() = default; + + void Reset() { laps_t_.Clear(); } + void Start() { t_start_ = std::chrono::system_clock::now(); } + float Stop() { + t_stop_ = std::chrono::system_clock::now(); + auto ts = std::chrono::duration_cast(t_stop_ - + t_start_); + float elapse_ms = 1000.f * static_cast(ts.count()) * + std::chrono::microseconds::period::num / + std::chrono::microseconds::period::den; + this->laps_t_.Add(elapse_ms); + return elapse_ms; + } + virtual void Start(KernelContext* ctx) { return Start(); } + virtual float Stop(KernelContext* ctx) { return Stop(); } + float AvgLapTimeMs() const { return laps_t_.Avg(); } + const TimeList& LapTimes() const { return laps_t_; } + + protected: + std::chrono::time_point t_start_, t_stop_; + TimeList laps_t_; +}; + +template +class DeviceTimer final : public Timer {}; + +#ifdef LITE_WITH_CUDA +template <> +class DeviceTimer final : public Timer { + public: + DeviceTimer() { + CUDA_CALL(cudaEventCreate(&e_start_)); + CUDA_CALL(cudaEventCreate(&e_stop_)); + } + ~DeviceTimer() { + CUDA_CALL(cudaEventDestroy(e_start_)); + CUDA_CALL(cudaEventDestroy(e_stop_)); + } + void Start(KernelContext* ctx) { + cudaStream_t stream; + stream = ctx->As().exec_stream(); + CUDA_CALL(cudaEventRecord(e_start_, stream)); + } + float Stop(KernelContext* ctx) { + cudaStream_t stream; + stream = ctx->As().exec_stream(); + CUDA_CALL(cudaEventRecord(e_stop_, stream)); + CUDA_CALL(cudaEventSynchronize(e_stop_)); + float elapse_ms = 1.f; + CUDA_CALL(cudaEventElapsedTime(&elapse_ms, e_start_, e_stop_)); + this->laps_t_.Add(elapse_ms); + return elapse_ms; + } + + private: + cudaEvent_t e_start_, e_stop_; +}; +#endif + +} // namespace profile +} // namespace lite +} // namespace paddle diff --git a/lite/core/program.cc b/lite/core/program.cc index b60f279c0f..45796a478b 100644 --- a/lite/core/program.cc +++ b/lite/core/program.cc @@ -122,6 +122,9 @@ void RuntimeProgram::Run() { #endif // LITE_WITH_PRECISION_PROFILE #endif // LITE_WITH_PROFILE } +#ifdef LITE_WITH_PROFILE + LOG(INFO) << "\n" << profiler_.Summary(); +#endif // LITE_WITH_PROFILE } void Program::Build(const cpp::ProgramDesc& prog) { @@ -183,11 +186,6 @@ void Program::PrepareWorkspace(const cpp::ProgramDesc& prog) { void Instruction::Run() { CHECK(op_) << "op null"; CHECK(kernel_) << "kernel null"; -#ifdef LITE_WITH_PROFILE - if (profile_id_ >= 0) { - profile::ProfileBlock x(profile_id_, "instruction"); - } -#endif // LITE_WITH_PROFILE if (first_epoch_) { first_epoch_ = false; CHECK(op_->CheckShape()); diff --git a/lite/core/program.h b/lite/core/program.h index 7a6700da61..1c1e4975c3 100644 --- a/lite/core/program.h +++ b/lite/core/program.h @@ -22,9 +22,6 @@ #include "lite/core/op_lite.h" #include "lite/core/op_registry.h" #include "lite/model_parser/cpp/program_desc.h" -#ifdef LITE_WITH_PROFILE -#include "lite/core/profile/basic_profiler.h" -#endif // LITE_WITH_PROFILE namespace paddle { namespace lite { @@ -87,22 +84,7 @@ struct Program { struct Instruction { Instruction(const std::shared_ptr& op, std::unique_ptr&& kernel) - : op_(op), kernel_(std::move(kernel)) { -#ifdef LITE_WITH_PROFILE - if (op_->Type() != "feed" && op_->Type() != "fetch") { - profile_id_ = profile::BasicProfiler::Global() - .NewRcd(kernel_->SerializedKernelType()) - .id(); - kernel_->SetProfileID(profile_id_); - // Set profile custom info - auto& profiler = - *profile::BasicProfiler::Global().mutable_record( - profile_id_); - profiler.SetCustomInfo("op_type", op_->Type()); - profiler.SetCustomInfo("op_info", op_->SerializedOpInfo()); - } -#endif // LITE_WITH_PROFILE - } + : op_(op), kernel_(std::move(kernel)) {} // Run the instruction. void Run(); @@ -113,6 +95,20 @@ struct Instruction { const KernelBase* kernel() const { return kernel_.get(); } KernelBase* mutable_kernel() { return kernel_.get(); } +#ifdef LITE_WITH_PROFILE + void set_profiler(profile::Profiler* profiler) { + profiler_ = profiler; + if (op_->Type() != "feed" && op_->Type() != "fetch") { + profile::OpCharacter ch; + ch.target = kernel()->target(); + ch.op_type = op_->Type(); + ch.kernel_name = kernel()->name(); + profile_id_ = profiler->NewTimer(ch); + kernel_->SetProfiler(profiler_, profile_id_); + } + } +#endif + private: std::shared_ptr op_; std::unique_ptr kernel_; @@ -120,7 +116,7 @@ struct Instruction { bool has_run_{false}; #ifdef LITE_WITH_PROFILE - // for profiler + profile::Profiler* profiler_; int profile_id_{-1}; #endif // LITE_WITH_PROFILE }; @@ -135,6 +131,9 @@ class LITE_API RuntimeProgram { if (instructions_.empty()) { LOG(FATAL) << "no instructions"; } +#ifdef LITE_WITH_PROFILE + set_profiler(); +#endif } void Run(); @@ -159,6 +158,15 @@ class LITE_API RuntimeProgram { RuntimeProgram(const RuntimeProgram&) = delete; std::vector instructions_; lite::Scope* exec_scope_{}; + +#ifdef LITE_WITH_PROFILE + profile::Profiler profiler_; + void set_profiler() { + for (auto i = instructions_.begin(); i != instructions_.end(); ++i) { + i->set_profiler(&profiler_); + } + } +#endif }; } // namespace lite diff --git a/lite/tests/cv/image_convert_test.cc b/lite/tests/cv/image_convert_test.cc index 7c0f867fae..eefd30f74f 100644 --- a/lite/tests/cv/image_convert_test.cc +++ b/lite/tests/cv/image_convert_test.cc @@ -17,8 +17,8 @@ #include #include #include "lite/core/context.h" +#include "lite/core/profile/timer.h" #include "lite/tests/cv/cv_basic.h" -#include "lite/tests/utils/timer.h" #include "lite/utils/cv/paddle_image_preprocess.h" DEFINE_int32(cluster, 3, "cluster id"); @@ -46,7 +46,7 @@ typedef paddle::lite::utils::cv::ImagePreprocess ImagePreprocess; typedef paddle::lite_api::Tensor Tensor_api; typedef paddle::lite::Tensor Tensor; -using paddle::lite::Timer; +using paddle::lite::profile::Timer; void fill_tensor_host_rand(uint8_t* dio, int64_t size) { uint seed = 256; @@ -285,8 +285,8 @@ void test_img(const std::vector& cluster_id, ImagePreprocess image_preprocess(srcFormat, dstFormat, tparam); for (int i = 0; i < test_iter; ++i) { - t1.clear(); - t1.start(); + t1.Reset(); + t1.Start(); LOG(INFO) << "image convert saber compute"; // 方法一: image_preprocess.imageCovert(src, lite_dst); @@ -329,8 +329,8 @@ void test_img(const std::vector& cluster_id, means, scales); - t1.end(); - double tdiff = t1.get_average_ms(); + t1.Stop(); + double tdiff = t1.LapTimes().Avg(); to += tdiff; if (tdiff < min_time) { min_time = tdiff; diff --git a/lite/tests/math/conv_compute_test.cc b/lite/tests/math/conv_compute_test.cc index 194d7ab1c3..bda50d3563 100644 --- a/lite/tests/math/conv_compute_test.cc +++ b/lite/tests/math/conv_compute_test.cc @@ -15,10 +15,10 @@ #include #include #include "lite/core/context.h" +#include "lite/core/profile/timer.h" #include "lite/operators/op_params.h" #include "lite/tests/utils/naive_math_impl.h" #include "lite/tests/utils/tensor_utils.h" -#include "lite/tests/utils/timer.h" #ifdef LITE_WITH_ARM #include "lite/kernels/arm/conv_compute.h" @@ -59,7 +59,7 @@ DEFINE_bool(flag_bias, true, "with bias"); typedef paddle::lite::DDim DDim; typedef paddle::lite::Tensor Tensor; typedef paddle::lite::operators::ConvParam ConvParam; -using paddle::lite::Timer; +using paddle::lite::profile::Timer; DDim compute_out_dim(const DDim& dim_in, const paddle::lite::operators::ConvParam& param) { @@ -205,19 +205,19 @@ void test_conv_fp32(const std::vector& input_dims, /// compute Timer t0; for (int i = 0; i < FLAGS_repeats; ++i) { - t0.start(); + t0.Start(); conv.Launch(); - t0.end(); + t0.Stop(); } double gops = 2.0 * dim_out.production() * dim_in[1] * weight_dim[2] * weight_dim[3] / param.groups; LOG(INFO) << "conv fp32: input shape: " << dim_in << ", output shape" - << dim_out << ",running time, avg: " << t0.get_average_ms() - << ", min time: " << t0.get_min_time() + << dim_out << ",running time, avg: " << t0.LapTimes().Avg() + << ", min time: " << t0.LapTimes().Min() << ", total GOPS: " << 1e-9 * gops - << " GOPS, avg GOPs: " << 1e-6 * gops / t0.get_average_ms() - << " GOPs, max GOPs: " << 1e-6 * gops / t0.get_min_time(); + << " GOPS, avg GOPs: " << 1e-6 * gops / t0.LapTimes().Avg() + << " GOPs, max GOPs: " << 1e-6 * gops / t0.LapTimes().Min(); if (FLAGS_check_result) { double max_ratio = 0; diff --git a/lite/tests/math/conv_int8_compute_test.cc b/lite/tests/math/conv_int8_compute_test.cc index 6af9bbd431..27c186d7ce 100644 --- a/lite/tests/math/conv_int8_compute_test.cc +++ b/lite/tests/math/conv_int8_compute_test.cc @@ -15,10 +15,10 @@ #include #include #include "lite/core/context.h" +#include "lite/core/profile/timer.h" #include "lite/operators/op_params.h" #include "lite/tests/utils/naive_math_impl.h" #include "lite/tests/utils/tensor_utils.h" -#include "lite/tests/utils/timer.h" #ifdef LITE_WITH_ARM #include "lite/kernels/arm/conv_compute.h" @@ -59,7 +59,7 @@ DEFINE_bool(flag_bias, true, "with bias"); typedef paddle::lite::DDim DDim; typedef paddle::lite::Tensor Tensor; typedef paddle::lite::operators::ConvParam ConvParam; -using paddle::lite::Timer; +using paddle::lite::profile::Timer; DDim compute_out_dim(const DDim& dim_in, const paddle::lite::operators::ConvParam& param) { @@ -309,30 +309,30 @@ void test_conv_int8(const std::vector& input_dims, /// compute fp32 output Timer t0; for (int i = 0; i < FLAGS_repeats; ++i) { - t0.start(); + t0.Start(); conv_int8_fp32.Launch(); - t0.end(); + t0.Stop(); } LOG(INFO) << "int8 conv, fp32 output: output shape" << dim_out - << ",running time, avg: " << t0.get_average_ms() - << ", min time: " << t0.get_min_time() + << ",running time, avg: " << t0.LapTimes().Avg() + << ", min time: " << t0.LapTimes().Min() << ", total GOPS: " << 1e-9 * gops - << " GOPS, avg GOPs: " << 1e-6 * gops / t0.get_average_ms() - << " GOPs, max GOPs: " << 1e-6 * gops / t0.get_min_time(); + << " GOPS, avg GOPs: " << 1e-6 * gops / t0.LapTimes().Avg() + << " GOPs, max GOPs: " << 1e-6 * gops / t0.LapTimes().Min(); /// compute int8 output - t0.clear(); + t0.Reset(); for (int i = 0; i < FLAGS_repeats; ++i) { - t0.start(); + t0.Start(); conv_int8_int8.Launch(); - t0.end(); + t0.Stop(); } LOG(INFO) << "int8 conv, int8 output: output shape" << dim_out - << ",running time, avg: " << t0.get_average_ms() - << ", min time: " << t0.get_min_time() + << ",running time, avg: " << t0.LapTimes().Avg() + << ", min time: " << t0.LapTimes().Min() << ", total GOPS: " << 1e-9 * gops - << " GOPS, avg GOPs: " << 1e-6 * gops / t0.get_average_ms() - << " GOPs, max GOPs: " << 1e-6 * gops / t0.get_min_time(); + << " GOPS, avg GOPs: " << 1e-6 * gops / t0.LapTimes().Avg() + << " GOPs, max GOPs: " << 1e-6 * gops / t0.LapTimes().Min(); /// compare result fp32 output if (FLAGS_check_result) { diff --git a/lite/tests/math/conv_transpose_compute_test.cc b/lite/tests/math/conv_transpose_compute_test.cc index 7a56b5836b..398e745d94 100644 --- a/lite/tests/math/conv_transpose_compute_test.cc +++ b/lite/tests/math/conv_transpose_compute_test.cc @@ -15,10 +15,10 @@ #include #include #include "lite/core/context.h" +#include "lite/core/profile/timer.h" #include "lite/operators/op_params.h" #include "lite/tests/utils/naive_math_impl.h" #include "lite/tests/utils/tensor_utils.h" -#include "lite/tests/utils/timer.h" #ifdef LITE_WITH_ARM #include "lite/kernels/arm/conv_transpose_compute.h" @@ -59,7 +59,7 @@ DEFINE_bool(flag_bias, false, "with bias"); typedef paddle::lite::DDim DDim; typedef paddle::lite::Tensor Tensor; typedef paddle::lite::operators::ConvParam ConvParam; -using paddle::lite::Timer; +using paddle::lite::profile::Timer; DDim compute_out_dim(const DDim& dim_in, const paddle::lite::operators::ConvParam& param) { @@ -187,19 +187,19 @@ void test_conv_transpose_fp32(const std::vector& input_dims, /// compute Timer t0; for (int i = 0; i < FLAGS_repeats; ++i) { - t0.start(); + t0.Start(); conv_t.Launch(); - t0.end(); + t0.Stop(); } float gops = 2.f * tmp_weights.numel() * dim_in[0] * dim_in[2] * dim_in[3]; LOG(INFO) << "conv fp32: input shape: " << dim_in << ", output shape" - << dim_out << ",running time, avg: " << t0.get_average_ms() - << ", min time: " << t0.get_min_time() + << dim_out << ",running time, avg: " << t0.LapTimes().Avg() + << ", min time: " << t0.LapTimes().Min() << ", total GOPS: " << 1e-9 * gops - << " GOPS, avg GOPs: " << 1e-6 * gops / t0.get_average_ms() - << " GOPs, max GOPs: " << 1e-6 * gops / t0.get_min_time(); + << " GOPS, avg GOPs: " << 1e-6 * gops / t0.LapTimes().Avg() + << " GOPs, max GOPs: " << 1e-6 * gops / t0.LapTimes().Min(); if (FLAGS_check_result) { double max_ratio = 0; diff --git a/lite/tests/math/gemm_int8_compute_test.cc b/lite/tests/math/gemm_int8_compute_test.cc index 06a1a0a65e..fde5aacb1c 100644 --- a/lite/tests/math/gemm_int8_compute_test.cc +++ b/lite/tests/math/gemm_int8_compute_test.cc @@ -20,12 +20,12 @@ #include "lite/backends/arm/math/funcs.h" #endif // LITE_WITH_ARM #include "lite/core/context.h" +#include "lite/core/profile/timer.h" #include "lite/core/tensor.h" #include "lite/tests/utils/tensor_utils.h" -#include "lite/tests/utils/timer.h" typedef paddle::lite::Tensor Tensor; -using paddle::lite::Timer; +using paddle::lite::profile::Timer; DEFINE_int32(power_mode, 3, @@ -193,7 +193,7 @@ bool test_gemm_int8(bool tra, dbias_int8[l] = dbias[l] / scale_c[0]; } for (int i = 0; i < FLAGS_repeats; ++i) { - t0.start(); + t0.Start(); paddle::lite::arm::math::gemm_prepack_int8(tpackedA.data(), db, dbias_int8, @@ -206,21 +206,21 @@ bool test_gemm_int8(bool tra, trb, scale_merge_int8.data(), &ctx); - t0.end(); + t0.Stop(); } LOG(INFO) << "gemm_int8_int8 output: M: " << m << ", N: " << n << ", K: " << k << ", power_mode: " << cls << ", threads: " << ths << ", GOPS: " << ops * 1e-9f - << " GOPS, avg time: " << t0.get_average_ms() - << " ms, min time: " << t0.get_min_time() - << " ms, mean GOPs: " << ops * 1e-6f / t0.get_average_ms() - << " GOPs, max GOPs: " << ops * 1e-6f / t0.get_min_time() + << " GOPS, avg time: " << t0.LapTimes().Avg() + << " ms, min time: " << t0.LapTimes().Min() + << " ms, mean GOPs: " << ops * 1e-6f / t0.LapTimes().Avg() + << " GOPs, max GOPs: " << ops * 1e-6f / t0.LapTimes().Min() << " GOPs"; /// fp32 output compute - t0.clear(); + t0.Reset(); for (int i = 0; i < FLAGS_repeats; ++i) { - t0.start(); + t0.Start(); paddle::lite::arm::math::gemm_prepack_int8(tpackedA.data(), db, dbias, @@ -233,15 +233,15 @@ bool test_gemm_int8(bool tra, trb, scale_merge_fp32.data(), &ctx); - t0.end(); + t0.Stop(); } LOG(INFO) << "gemm_int8_fp32 output: M: " << m << ", N: " << n << ", K: " << k << ", power_mode: " << cls << ", threads: " << ths << ", GOPS: " << ops * 1e-9f - << " GOPS, avg time: " << t0.get_average_ms() - << " ms, min time: " << t0.get_min_time() - << " ms, mean GOPs: " << ops * 1e-6f / t0.get_average_ms() - << " GOPs, max GOPs: " << ops * 1e-6f / t0.get_min_time() + << " GOPS, avg time: " << t0.LapTimes().Avg() + << " ms, min time: " << t0.LapTimes().Min() + << " ms, mean GOPs: " << ops * 1e-6f / t0.LapTimes().Avg() + << " GOPs, max GOPs: " << ops * 1e-6f / t0.LapTimes().Min() << " GOPs"; if (FLAGS_check_result) { diff --git a/lite/tests/math/gemv_int8_compute_test.cc b/lite/tests/math/gemv_int8_compute_test.cc index c64e78d66a..623615c8da 100644 --- a/lite/tests/math/gemv_int8_compute_test.cc +++ b/lite/tests/math/gemv_int8_compute_test.cc @@ -20,12 +20,12 @@ #include "lite/backends/arm/math/funcs.h" #endif // LITE_WITH_ARM #include "lite/core/context.h" +#include "lite/core/profile/timer.h" #include "lite/core/tensor.h" #include "lite/tests/utils/tensor_utils.h" -#include "lite/tests/utils/timer.h" typedef paddle::lite::Tensor Tensor; -using paddle::lite::Timer; +using paddle::lite::profile::Timer; DEFINE_int32(power_mode, 3, @@ -165,7 +165,7 @@ bool test_gemv_int8( dbias_int8[l] = dbias[l] / scale_c[0]; } for (int i = 0; i < FLAGS_repeats; ++i) { - t0.start(); + t0.Start(); paddle::lite::arm::math::gemv_int8(da, db, dc_fp32, @@ -177,21 +177,21 @@ bool test_gemv_int8( dbias, has_relu, &ctx); - t0.end(); + t0.Stop(); } LOG(INFO) << "gemv_int8_int8 output: M: " << m << ", N: " << n << ", power_mode: " << cls << ", threads: " << ths << ", GOPS: " << ops * 1e-9f - << " GOPS, avg time: " << t0.get_average_ms() - << " ms, min time: " << t0.get_min_time() - << " ms, mean GOPs: " << ops * 1e-6f / t0.get_average_ms() - << " GOPs, max GOPs: " << ops * 1e-6f / t0.get_min_time() + << " GOPS, avg time: " << t0.LapTimes().Avg() + << " ms, min time: " << t0.LapTimes().Min() + << " ms, mean GOPs: " << ops * 1e-6f / t0.LapTimes().Avg() + << " GOPs, max GOPs: " << ops * 1e-6f / t0.LapTimes().Min() << " GOPs"; /// fp32 output compute - t0.clear(); + t0.Reset(); for (int i = 0; i < FLAGS_repeats; ++i) { - t0.start(); + t0.Start(); paddle::lite::arm::math::gemv_int8(da, db, dc_int8, @@ -203,15 +203,15 @@ bool test_gemv_int8( dbias_int8, has_relu, &ctx); - t0.end(); + t0.Stop(); } LOG(INFO) << "gemm_int8_fp32 output: M: " << m << ", N: " << n << ", power_mode: " << cls << ", threads: " << ths << ", GOPS: " << ops * 1e-9f - << " GOPS, avg time: " << t0.get_average_ms() - << " ms, min time: " << t0.get_min_time() - << " ms, mean GOPs: " << ops * 1e-6f / t0.get_average_ms() - << " GOPs, max GOPs: " << ops * 1e-6f / t0.get_min_time() + << " GOPS, avg time: " << t0.LapTimes().Avg() + << " ms, min time: " << t0.LapTimes().Min() + << " ms, mean GOPs: " << ops * 1e-6f / t0.LapTimes().Avg() + << " GOPs, max GOPs: " << ops * 1e-6f / t0.LapTimes().Min() << " GOPs"; if (FLAGS_check_result) { diff --git a/lite/tests/math/layout_compute_test.cc b/lite/tests/math/layout_compute_test.cc index 29f8f749db..a566924548 100644 --- a/lite/tests/math/layout_compute_test.cc +++ b/lite/tests/math/layout_compute_test.cc @@ -15,10 +15,10 @@ #include #include #include "lite/core/context.h" +#include "lite/core/profile/timer.h" #include "lite/operators/op_params.h" #include "lite/tests/utils/naive_math_impl.h" #include "lite/tests/utils/tensor_utils.h" -#include "lite/tests/utils/timer.h" #ifdef LITE_WITH_ARM #include "lite/kernels/arm/layout_compute.h" @@ -48,7 +48,7 @@ typedef paddle::lite::DDim DDim; typedef paddle::lite::Tensor Tensor; typedef paddle::lite::operators::LayoutParam LayoutParam; -using paddle::lite::Timer; +using paddle::lite::profile::Timer; #define IN(n, c, h, w) \ input_data[w + h * input_w + c * input_h * input_w + \ @@ -165,17 +165,17 @@ void test_layout_fp32_nchw(DDim dim_in, /// compute Timer t0; for (int i = 0; i < FLAGS_repeats; ++i) { - t0.start(); + t0.Start(); layout.Run(); - t0.end(); + t0.Stop(); } double gops = 2.0 * dim_out.production(); LOG(INFO) << "layout fp32: input shape: " << dim_in << ", output shape" - << dim_out << ",running time, avg: " << t0.get_average_ms() - << ", min time: " << t0.get_min_time() + << dim_out << ",running time, avg: " << t0.LapTimes().Avg() + << ", min time: " << t0.LapTimes().Min() << ", total GOPS: " << 1e-9 * gops - << " GOPS, avg GOPs: " << 1e-6 * gops / t0.get_average_ms() - << " GOPs, max GOPs: " << 1e-6 * gops / t0.get_min_time(); + << " GOPS, avg GOPs: " << 1e-6 * gops / t0.LapTimes().Avg() + << " GOPs, max GOPs: " << 1e-6 * gops / t0.LapTimes().Min(); if (FLAGS_check_result) { double max_ratio = 0; @@ -268,17 +268,17 @@ void test_layout_fp32_nhwc(DDim dim_in, /// compute Timer t0; for (int i = 0; i < FLAGS_repeats; ++i) { - t0.start(); + t0.Start(); layout.Run(); - t0.end(); + t0.Stop(); } double gops = 2.0 * dim_out.production(); LOG(INFO) << "layout fp32: input shape: " << dim_in << ", output shape" - << dim_out << ",running time, avg: " << t0.get_average_ms() - << ", min time: " << t0.get_min_time() + << dim_out << ",running time, avg: " << t0.LapTimes().Avg() + << ", min time: " << t0.LapTimes().Min() << ", total GOPS: " << 1e-9 * gops - << " GOPS, avg GOPs: " << 1e-6 * gops / t0.get_average_ms() - << " GOPs, max GOPs: " << 1e-6 * gops / t0.get_min_time(); + << " GOPS, avg GOPs: " << 1e-6 * gops / t0.LapTimes().Avg() + << " GOPs, max GOPs: " << 1e-6 * gops / t0.LapTimes().Min(); if (FLAGS_check_result) { double max_ratio = 0; @@ -370,18 +370,18 @@ void test_layout_int8_nchw(DDim dim_in, /// compute Timer t0; for (int i = 0; i < FLAGS_repeats; ++i) { - t0.start(); + t0.Start(); layout.Run(); - t0.end(); + t0.Stop(); } LOG(INFO) << "saber compute end"; double gops = 2.0 * dim_out.production(); LOG(INFO) << "layout int8: input shape: " << dim_in << ", output shape" - << dim_out << ",running time, avg: " << t0.get_average_ms() - << ", min time: " << t0.get_min_time() + << dim_out << ",running time, avg: " << t0.LapTimes().Avg() + << ", min time: " << t0.LapTimes().Min() << ", total GOPS: " << 1e-9 * gops - << " GOPS, avg GOPs: " << 1e-6 * gops / t0.get_average_ms() - << " GOPs, max GOPs: " << 1e-6 * gops / t0.get_min_time(); + << " GOPS, avg GOPs: " << 1e-6 * gops / t0.LapTimes().Avg() + << " GOPs, max GOPs: " << 1e-6 * gops / t0.LapTimes().Min(); if (FLAGS_check_result) { double max_ratio = 0; @@ -474,18 +474,18 @@ void test_layout_int8_nhwc(DDim dim_in, /// compute Timer t0; for (int i = 0; i < FLAGS_repeats; ++i) { - t0.start(); + t0.Start(); layout.Run(); - t0.end(); + t0.Stop(); } LOG(INFO) << "run"; double gops = 2.0 * dim_out.production(); LOG(INFO) << "layout int8: input shape: " << dim_in << ", output shape" - << dim_out << ",running time, avg: " << t0.get_average_ms() - << ", min time: " << t0.get_min_time() + << dim_out << ",running time, avg: " << t0.LapTimes().Avg() + << ", min time: " << t0.LapTimes().Min() << ", total GOPS: " << 1e-9 * gops - << " GOPS, avg GOPs: " << 1e-6 * gops / t0.get_average_ms() - << " GOPs, max GOPs: " << 1e-6 * gops / t0.get_min_time(); + << " GOPS, avg GOPs: " << 1e-6 * gops / t0.LapTimes().Avg() + << " GOPs, max GOPs: " << 1e-6 * gops / t0.LapTimes().Min(); if (FLAGS_check_result) { double max_ratio = 0; diff --git a/lite/tests/math/pool_compute_test.cc b/lite/tests/math/pool_compute_test.cc index 2d6a0be628..73a5ba5606 100644 --- a/lite/tests/math/pool_compute_test.cc +++ b/lite/tests/math/pool_compute_test.cc @@ -15,10 +15,10 @@ #include #include #include "lite/core/context.h" +#include "lite/core/profile/timer.h" #include "lite/operators/op_params.h" #include "lite/tests/utils/naive_math_impl.h" #include "lite/tests/utils/tensor_utils.h" -#include "lite/tests/utils/timer.h" #ifdef LITE_WITH_ARM #include "lite/kernels/arm/pool_compute.h" @@ -60,7 +60,7 @@ DEFINE_string(pooling_type, "max", "do max pooling"); typedef paddle::lite::DDim DDim; typedef paddle::lite::Tensor Tensor; typedef paddle::lite::operators::PoolParam PoolParam; -using paddle::lite::Timer; +using paddle::lite::profile::Timer; DDim compute_out_dim(const DDim& dim_in, const paddle::lite::operators::PoolParam& param) { @@ -320,18 +320,18 @@ void test_pool_fp32(const std::vector& input_dims, /// compute Timer t0; for (int i = 0; i < FLAGS_repeats; ++i) { - t0.start(); + t0.Start(); pool.Launch(); - t0.end(); + t0.Stop(); } double gops = 2.0 * dim_out.production() * ksize[0] * ksize[1]; LOG(INFO) << "pool fp32: input shape: " << dim_in << ", output shape" - << dim_out << ", running time, avg: " << t0.get_average_ms() - << ", min time: " << t0.get_min_time() + << dim_out << ", running time, avg: " << t0.LapTimes().Avg() + << ", min time: " << t0.LapTimes().Min() << ", total GOPS: " << 1e-9 * gops - << " GOPS, avg GOPs: " << 1e-6 * gops / t0.get_average_ms() - << " GOPs, max GOPs: " << 1e-6 * gops / t0.get_min_time(); + << " GOPS, avg GOPs: " << 1e-6 * gops / t0.LapTimes().Avg() + << " GOPs, max GOPs: " << 1e-6 * gops / t0.LapTimes().Min(); if (FLAGS_check_result) { double max_ratio = 0; diff --git a/lite/tests/math/sgemm_c4_compute_test.cc b/lite/tests/math/sgemm_c4_compute_test.cc index 5fcc54f338..886dba6ac5 100644 --- a/lite/tests/math/sgemm_c4_compute_test.cc +++ b/lite/tests/math/sgemm_c4_compute_test.cc @@ -20,12 +20,12 @@ #include "lite/backends/arm/math/funcs.h" #endif // LITE_WITH_ARM #include "lite/core/context.h" +#include "lite/core/profile/timer.h" #include "lite/core/tensor.h" #include "lite/tests/utils/tensor_utils.h" -#include "lite/tests/utils/timer.h" typedef paddle::lite::Tensor Tensor; -using paddle::lite::Timer; +using paddle::lite::profile::Timer; DEFINE_int32(power_mode, 3, @@ -134,18 +134,18 @@ bool test_sgemm_c4( } for (int i = 0; i < FLAGS_repeats; ++i) { - t0.start(); + t0.Start(); paddle::lite::arm::math::sgemm_prepack_c4( m, n, k, da_c4, db_c4, dc, dbias, has_bias, has_relu, &ctx); - t0.end(); + t0.Stop(); } LOG(INFO) << "M: " << m << ", N: " << n << ", K: " << k << ", power_mode: " << cls << ", threads: " << ths << ", GOPS: " << ops * 1e-9f - << " GOPS, avg time: " << t0.get_average_ms() - << " ms, min time: " << t0.get_min_time() - << " ms, mean GOPs: " << ops * 1e-6f / t0.get_average_ms() - << " GOPs, max GOPs: " << ops * 1e-6f / t0.get_min_time() + << " GOPS, avg time: " << t0.LapTimes().Avg() + << " ms, min time: " << t0.LapTimes().Min() + << " ms, mean GOPs: " << ops * 1e-6f / t0.LapTimes().Avg() + << " GOPs, max GOPs: " << ops * 1e-6f / t0.LapTimes().Min() << " GOPs"; if (FLAGS_check_result) { diff --git a/lite/tests/math/sgemm_compute_test.cc b/lite/tests/math/sgemm_compute_test.cc index 1621ceb904..6df5e671fe 100644 --- a/lite/tests/math/sgemm_compute_test.cc +++ b/lite/tests/math/sgemm_compute_test.cc @@ -20,12 +20,12 @@ #include "lite/backends/arm/math/funcs.h" #endif // LITE_WITH_ARM #include "lite/core/context.h" +#include "lite/core/profile/timer.h" #include "lite/core/tensor.h" #include "lite/tests/utils/tensor_utils.h" -#include "lite/tests/utils/timer.h" typedef paddle::lite::Tensor Tensor; -using paddle::lite::Timer; +using paddle::lite::profile::Timer; DEFINE_int32(power_mode, 3, @@ -171,7 +171,7 @@ bool test_sgemm(bool tra, if (i == FLAGS_repeats - 1) { memcpy(dc, dc_backup, sizeof(float) * m * ldc); } - t0.start(); + t0.Start(); paddle::lite::arm::math::sgemm_prepack(trb, m, n, @@ -186,15 +186,15 @@ bool test_sgemm(bool tra, has_bias, has_relu, &ctx); - t0.end(); + t0.Stop(); } LOG(INFO) << "M: " << m << ", N: " << n << ", K: " << k << ", power_mode: " << cls << ", threads: " << ths << ", GOPS: " << ops * 1e-9f - << " GOPS, avg time: " << t0.get_average_ms() - << " ms, min time: " << t0.get_min_time() - << " ms, mean GOPs: " << ops * 1e-6f / t0.get_average_ms() - << " GOPs, max GOPs: " << ops * 1e-6f / t0.get_min_time() + << " GOPS, avg time: " << t0.LapTimes().Avg() + << " ms, min time: " << t0.LapTimes().Min() + << " ms, mean GOPs: " << ops * 1e-6f / t0.LapTimes().Avg() + << " GOPs, max GOPs: " << ops * 1e-6f / t0.LapTimes().Min() << " GOPs"; if (FLAGS_check_result) { diff --git a/lite/tests/math/sgemv_compute_test.cc b/lite/tests/math/sgemv_compute_test.cc index 3c8965cb2c..5dd2d32295 100644 --- a/lite/tests/math/sgemv_compute_test.cc +++ b/lite/tests/math/sgemv_compute_test.cc @@ -20,9 +20,9 @@ #include "lite/backends/arm/math/funcs.h" #endif // LITE_WITH_ARM #include "lite/core/context.h" +#include "lite/core/profile/timer.h" #include "lite/core/tensor.h" #include "lite/tests/utils/tensor_utils.h" -#include "lite/tests/utils/timer.h" typedef paddle::lite::Tensor Tensor; @@ -83,7 +83,7 @@ bool test_sgemv( basic_gemv( m, k, da, db, dbias, dc_basic, 1.f, 0.f, tra, has_bias, has_relu); } - paddle::lite::Timer t0; + paddle::lite::profile::Timer t0; //! compute double ops = 2.0 * m * k; std::unique_ptr ctx1( @@ -96,19 +96,19 @@ bool test_sgemv( da, db, dc, tra, m, k, has_bias, dbias, has_relu, &ctx); } - t0.clear(); + t0.Reset(); for (int i = 0; i < FLAGS_repeats; ++i) { - t0.start(); + t0.Start(); paddle::lite::arm::math::sgemv( da, db, dc, tra, m, k, has_bias, dbias, has_relu, &ctx); - t0.end(); + t0.Stop(); } LOG(INFO) << "gemv output: M: " << m << ", K: " << k << ", cluster: " << cls << ", threads: " << ths << ", GOPS: " << ops * 1e-9f - << " GOPS, avg time: " << t0.get_average_ms() - << " ms, min time: " << t0.get_min_time() - << " ms, mean GOPs: " << ops * 1e-6f / t0.get_average_ms() - << " GOPs, max GOPs: " << ops * 1e-6f / t0.get_min_time() + << " GOPS, avg time: " << t0.LapTimes().Avg() + << " ms, min time: " << t0.LapTimes().Min() + << " ms, mean GOPs: " << ops * 1e-6f / t0.LapTimes().Avg() + << " GOPs, max GOPs: " << ops * 1e-6f / t0.LapTimes().Min() << " GOPs"; if (FLAGS_check_result) { diff --git a/lite/tests/utils/timer.h b/lite/tests/utils/timer.h deleted file mode 100644 index 095f32046e..0000000000 --- a/lite/tests/utils/timer.h +++ /dev/null @@ -1,105 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include // NOLINT -#include - -namespace paddle { -namespace lite { - -class Timer final { - public: - Timer() {} - - ~Timer() {} - - void clear() { ms_time_.clear(); } - - void start() { tstart_ = std::chrono::system_clock::now(); } - - void end() { - tend_ = std::chrono::system_clock::now(); - auto ts = - std::chrono::duration_cast(tend_ - tstart_); - latest_time_ = 1000.f * static_cast(ts.count()) * - std::chrono::microseconds::period::num / - std::chrono::microseconds::period::den; - ms_time_.push_back(latest_time_); - } - - float latest_time() const { return latest_time_; } - - float get_average_ms() { - if (ms_time_.size() == 0) { - return 0.f; - } - float sum = 0.f; - for (auto i : ms_time_) { - sum += i; - } - return sum / ms_time_.size(); - } - - float get_sum_ms() { - if (ms_time_.size() == 0) { - return 0.f; - } - float sum = 0.f; - for (auto i : ms_time_) { - sum += i; - } - return sum; - } - - // return tile (0-99) time. - float get_tile_time(float tile) { - if (tile < 0 || tile > 100) { - return -1.f; - } - int total_items = static_cast(ms_time_.size()); - if (total_items <= 0) { - return -2.f; - } - ms_time_.sort(); - int pos = static_cast(tile * total_items / 100); - auto it = ms_time_.begin(); - for (int i = 0; i < pos; ++i) { - ++it; - } - return *it; - } - - std::list get_time_stat() { return ms_time_; } - - float get_min_time() { - ms_time_.sort(); - return *ms_time_.begin(); - } - - float get_max_time() { - ms_time_.sort([](int a, int b) { return a > b; }); - return *ms_time_.begin(); - } - - private: - std::chrono::time_point tstart_; - std::chrono::time_point tend_; - std::list ms_time_; - float latest_time_; -}; - -} // namespace lite -} // namespace paddle -- GitLab