From f8ff5aa451f3e94ab2959788ee9970b1129e7f30 Mon Sep 17 00:00:00 2001
From: Xiaoyang LI <lixiaoyang05@baidu.com>
Date: Wed, 23 Oct 2019 16:25:00 +0800
Subject: [PATCH] remove log in reshape, fix conv error when padding size=4
 (#2199)

* remove log in reshape, fix conv error when padding size=4, test=develop

* fix style, test=develop

* remove useless code, test=develop

* remove redundant model test file, test=develop

* change cluster to power_mode, test=develop

* fix build error, test=develop

* change cluster to power_mode, test=develop

* change opt_nb to use_optimize_nb, test=develop

* null, test=develop
---
 lite/api/model_test.cc                        | 62 ++++++++++++-------
 lite/api/test_helper.h                        |  7 +++
 lite/operators/conv_op.cc                     |  4 --
 lite/operators/reshape_op.cc                  |  1 -
 lite/tests/math/conv_compute_test.cc          | 35 ++++++-----
 lite/tests/math/conv_int8_compute_test.cc     | 37 ++++++-----
 .../tests/math/conv_transpose_compute_test.cc | 25 +++++---
 lite/tests/math/gemm_int8_compute_test.cc     | 19 ++++--
 lite/tests/math/sgemm_compute_test.cc         | 17 +++--
 lite/tests/utils/timer.h                      | 15 +++--
 10 files changed, 141 insertions(+), 81 deletions(-)
diff --git a/lite/api/model_test.cc b/lite/api/model_test.cc
index e027a55413..14fecaad3a 100644
--- a/lite/api/model_test.cc
+++ b/lite/api/model_test.cc
@@ -21,13 +21,20 @@
 #include "lite/api/paddle_use_passes.h"
 #include "lite/api/test_helper.h"
 #include "lite/core/device_info.h"
+#include "lite/tests/utils/timer.h"
 #include "lite/utils/cp_logging.h"
 #include "lite/utils/string.h"
 
+using paddle::lite::Timer;
+
 DEFINE_string(input_shape,
               "1,3,224,224",
               "input shapes, separated by colon and comma");
 
+DEFINE_bool(use_optimize_nb,
+            false,
+            "optimized & naive buffer model for mobile devices");
+
 namespace paddle {
 namespace lite_api {
 
@@ -58,15 +65,14 @@ void OutputOptModel(const std::string& load_model_dir,
 #ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
 void Run(const std::vector<std::vector<int64_t>>& input_shapes,
          const std::string& model_dir,
-         const int repeat,
+         const PowerMode power_mode,
          const int thread_num,
+         const int repeat,
          const int warmup_times = 0) {
-#ifdef LITE_WITH_ARM
-  lite::DeviceInfo::Init();
-  lite::DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, thread_num);
-#endif
   lite_api::MobileConfig config;
   config.set_model_dir(model_dir);
+  config.set_power_mode(power_mode);
+  config.set_threads(thread_num);
 
   auto predictor = lite_api::CreatePaddlePredictor(config);
 
@@ -87,17 +93,22 @@ void Run(const std::vector<std::vector<int64_t>>& input_shapes,
     predictor->Run();
   }
 
-  auto start = lite::GetCurrentUS();
-  for (int i = 0; i < repeat; ++i) {
+  Timer ti;
+  for (int j = 0; j < repeat; ++j) {
+    ti.start();
     predictor->Run();
+    ti.end();
+    LOG(INFO) << "iter: " << j << ", time: " << ti.latest_time() << " ms";
   }
-  auto end = lite::GetCurrentUS();
 
   LOG(INFO) << "================== Speed Report ===================";
-  LOG(INFO) << "Model: " << model_dir << ", threads num " << thread_num
-            << ", warmup: " << warmup_times << ", repeats: " << repeat
-            << ", spend " << (end - start) / repeat / 1000.0
-            << " ms in average.";
+  LOG(INFO) << "Model: " << model_dir
+            << ", power_mode: " << static_cast<int>(power_mode)
+            << ", threads num " << thread_num << ", warmup: " << warmup_times
+            << ", repeats: " << repeat << ", avg time: " << ti.get_average_ms()
+            << " ms"
+            << ", min time: " << ti.get_min_time() << " ms"
+            << ", max time: " << ti.get_max_time() << " ms.";
 
   auto output = predictor->GetOutput(0);
   auto out = output->data<float>();
@@ -122,7 +133,12 @@ int main(int argc, char** argv) {
               << "--model_dir /path/to/your/model";
     exit(0);
   }
-  std::string save_optimized_model_dir = FLAGS_model_dir + "opt2";
+  std::string save_optimized_model_dir = "";
+  if (FLAGS_use_optimize_nb) {
+    save_optimized_model_dir = FLAGS_model_dir;
+  } else {
+    save_optimized_model_dir = FLAGS_model_dir + "opt2";
+  }
 
   auto split_string =
       [](const std::string& str_in) -> std::vector<std::string> {
@@ -164,17 +180,21 @@ int main(int argc, char** argv) {
     input_shapes.push_back(get_shape(str_input_shapes[i]));
   }
 
-  // Output optimized model
-  paddle::lite_api::OutputOptModel(
-      FLAGS_model_dir, save_optimized_model_dir, input_shapes);
+  if (!FLAGS_use_optimize_nb) {
+    // Output optimized model
+    paddle::lite_api::OutputOptModel(
+        FLAGS_model_dir, save_optimized_model_dir, input_shapes);
+  }
 
 #ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
   // Run inference using optimized model
-  paddle::lite_api::Run(input_shapes,
-                        save_optimized_model_dir,
-                        FLAGS_repeats,
-                        FLAGS_threads,
-                        FLAGS_warmup);
+  paddle::lite_api::Run(
+      input_shapes,
+      save_optimized_model_dir,
+      static_cast<paddle::lite_api::PowerMode>(FLAGS_power_mode),
+      FLAGS_threads,
+      FLAGS_repeats,
+      FLAGS_warmup);
 #endif
   return 0;
 }
diff --git a/lite/api/test_helper.h b/lite/api/test_helper.h
index d835c030f0..71752c942b 100644
--- a/lite/api/test_helper.h
+++ b/lite/api/test_helper.h
@@ -22,6 +22,13 @@
 DEFINE_string(model_dir, "", "model dir");
 DEFINE_int32(warmup, 0, "warmup times");
 DEFINE_int32(repeats, 1, "repeats times");
+DEFINE_int32(power_mode,
+             3,
+             "arm power mode: "
+             "0 for big cluster, "
+             "1 for little cluster, "
+             "2 for all cores, "
+             "3 for no bind");
 DEFINE_int32(threads, 1, "threads num");
 DEFINE_int32(im_width, 224, "image width");
 DEFINE_int32(im_height, 224, "image height");
diff --git a/lite/operators/conv_op.cc b/lite/operators/conv_op.cc
index 13c1e4abf6..ceca1a61ce 100644
--- a/lite/operators/conv_op.cc
+++ b/lite/operators/conv_op.cc
@@ -34,10 +34,6 @@ bool ConvOpLite::CheckShape() const {
 
   CHECK_EQ_OR_FALSE(in_dims.size(), filter_dims.size());
   CHECK_OR_FALSE(in_dims.size() - param_.strides.size() == 2U);
-  // CHECK_EQ_OR_FALSE(param_.paddings.size(), param_.strides.size());
-
-  // CHECK_EQ_OR_FALSE(in_dims[1], filter_dims[1] * param_.groups);
-  // CHECK_EQ_OR_FALSE(filter_dims[0] % param_.groups, 0);
   CHECK_EQ_OR_FALSE(filter_dims.size(), 4UL);
 
   return true;
diff --git a/lite/operators/reshape_op.cc b/lite/operators/reshape_op.cc
index 63aaabcd74..89cf698f8e 100644
--- a/lite/operators/reshape_op.cc
+++ b/lite/operators/reshape_op.cc
@@ -48,7 +48,6 @@ bool ReshapeOp::InferShape() const {
 
   auto x_dims = param_.x->dims();
   auto output_dims = ValidateShape(final_shape, x_dims);
-  LOG(INFO) << "output_dims:" << output_dims;
   param_.output->Resize(output_dims);
   auto out_lod = param_.output->mutable_lod();
   *out_lod = param_.x->lod();
diff --git a/lite/tests/math/conv_compute_test.cc b/lite/tests/math/conv_compute_test.cc
index aa83c5163a..9a9a8be81d 100644
--- a/lite/tests/math/conv_compute_test.cc
+++ b/lite/tests/math/conv_compute_test.cc
@@ -24,7 +24,13 @@
 #include "lite/kernels/arm/conv_compute.h"
 #endif  // LITE_WITH_ARM
 
-DEFINE_int32(cluster, 3, "cluster id");
+DEFINE_int32(power_mode,
+             3,
+             "power mode: "
+             "0 for POWER_HIGH;"
+             "1 for POWER_LOW;"
+             "2 for POWER_FULL;"
+             "3 for NO_BIND");
 DEFINE_int32(threads, 1, "threads num");
 DEFINE_int32(warmup, 0, "warmup times");
 DEFINE_int32(repeats, 1, "repeats times");
@@ -53,6 +59,7 @@ DEFINE_bool(flag_bias, true, "with bias");
 typedef paddle::lite::DDim DDim;
 typedef paddle::lite::Tensor Tensor;
 typedef paddle::lite::operators::ConvParam ConvParam;
+using paddle::lite::Timer;
 
 DDim compute_out_dim(const DDim& dim_in,
                      const paddle::lite::operators::ConvParam& param) {
@@ -87,7 +94,7 @@ void test_conv_fp32(const std::vector<DDim>& input_dims,
                     bool flag_bias,
                     bool flag_relu,
                     const std::vector<int>& thread_num,
-                    const std::vector<int>& cluster_id) {
+                    const std::vector<int>& power_mode) {
 #ifdef LITE_WITH_ARM
   paddle::lite::DeviceInfo::Init();
 #endif
@@ -120,7 +127,7 @@ void test_conv_fp32(const std::vector<DDim>& input_dims,
   auto wptr = param.filter->data<float>();
   auto bias_ptr = flag_bias ? param.bias->data<float>() : nullptr;
 
-  for (auto& cls : cluster_id) {
+  for (auto& cls : power_mode) {
     for (auto& th : thread_num) {
       paddle::lite::kernels::arm::ConvCompute<PRECISION(kFloat),
                                               PRECISION(kFloat)>
@@ -192,7 +199,7 @@ void test_conv_fp32(const std::vector<DDim>& input_dims,
           conv.Launch();
         }
         /// compute
-        lite::test::Timer t0;
+        Timer t0;
         for (int i = 0; i < FLAGS_repeats; ++i) {
           t0.start();
           conv.Launch();
@@ -233,7 +240,7 @@ void test_conv_fp32(const std::vector<DDim>& input_dims,
                          << ", dila_: " << dilas[0] << ", " << dilas[1]
                          << ", bias: " << (flag_bias ? "true" : "false")
                          << ", relu: " << (flag_relu ? "true" : "false")
-                         << ", threads: " << th << ", cluster: " << cls
+                         << ", threads: " << th << ", power_mode: " << cls
                          << " failed!!\n";
             }
           }
@@ -245,7 +252,7 @@ void test_conv_fp32(const std::vector<DDim>& input_dims,
                   << ", dila_: " << dilas[0] << ", " << dilas[1]
                   << ", bias: " << (flag_bias ? "true" : "false")
                   << ", relu: " << (flag_relu ? "true" : "false")
-                  << ", threads: " << th << ", cluster: " << cls
+                  << ", threads: " << th << ", power_mode: " << cls
                   << " successed!!\n";
       }
     }
@@ -266,7 +273,7 @@ void test_conv_fp32(const std::vector<DDim>& input_dims,
                     bool flag_bias,
                     bool flag_relu,
                     const std::vector<int>& thread_num,
-                    const std::vector<int>& cluster_id) {}
+                    const std::vector<int>& power_mode) {}
 #endif  // LITE_WITH_ARM
 
 #if 1  /// 3x3dw
@@ -293,7 +300,7 @@ TEST(TestConv3x3DW, test_conv3x3_depthwise) {
                              flag_bias,
                              flag_relu,
                              {1, 2, 4},
-                             {FLAGS_cluster});
+                             {FLAGS_power_mode});
             }
           }
         }
@@ -327,7 +334,7 @@ TEST(TestConv5x5DW, test_conv5x5_depthwise) {
                              flag_bias,
                              flag_relu,
                              {1, 2, 4},
-                             {FLAGS_cluster});
+                             {FLAGS_power_mode});
             }
           }
         }
@@ -364,7 +371,7 @@ TEST(TestConv1x1s1, test_conv1x1s1) {
                              flag_bias,
                              flag_relu,
                              {1, 2, 4},
-                             {FLAGS_cluster});
+                             {FLAGS_power_mode});
             }
           }
         }
@@ -398,7 +405,7 @@ TEST(TestConv3x3s1, test_conv_3x3s1) {
                              flag_bias,
                              flag_relu,
                              {1, 2, 4},
-                             {FLAGS_cluster});
+                             {FLAGS_power_mode});
             }
           }
         }
@@ -432,7 +439,7 @@ TEST(TestConv3x3s2, test_conv_3x3s2) {
                              flag_bias,
                              flag_relu,
                              {1, 2, 4},
-                             {FLAGS_cluster});
+                             {FLAGS_power_mode});
             }
           }
         }
@@ -474,7 +481,7 @@ TEST(TestConvRand, test_conv_rand) {
                                        flag_bias,
                                        flag_relu,
                                        {1, 2, 4},
-                                       {FLAGS_cluster});
+                                       {FLAGS_power_mode});
                       }
                     }
                   }
@@ -508,6 +515,6 @@ TEST(TestConvCustom, test_conv_fp32_custom_size) {
       FLAGS_flag_bias,
       FLAGS_flag_relu,
       {FLAGS_threads},
-      {FLAGS_cluster});
+      {FLAGS_power_mode});
 }
 #endif  // custom
diff --git a/lite/tests/math/conv_int8_compute_test.cc b/lite/tests/math/conv_int8_compute_test.cc
index 2a0971a298..83cf75f2cb 100644
--- a/lite/tests/math/conv_int8_compute_test.cc
+++ b/lite/tests/math/conv_int8_compute_test.cc
@@ -24,7 +24,13 @@
 #include "lite/kernels/arm/conv_compute.h"
 #endif  // LITE_WITH_ARM
 
-DEFINE_int32(cluster, 3, "cluster id");
+DEFINE_int32(power_mode,
+             3,
+             "power mode: "
+             "0 for POWER_HIGH;"
+             "1 for POWER_LOW;"
+             "2 for POWER_FULL;"
+             "3 for NO_BIND");
 DEFINE_int32(threads, 1, "threads num");
 DEFINE_int32(warmup, 0, "warmup times");
 DEFINE_int32(repeats, 1, "repeats times");
@@ -53,6 +59,7 @@ DEFINE_bool(flag_bias, true, "with bias");
 typedef paddle::lite::DDim DDim;
 typedef paddle::lite::Tensor Tensor;
 typedef paddle::lite::operators::ConvParam ConvParam;
+using paddle::lite::Timer;
 
 DDim compute_out_dim(const DDim& dim_in,
                      const paddle::lite::operators::ConvParam& param) {
@@ -124,7 +131,7 @@ void test_conv_int8(const std::vector<DDim>& input_dims,
                     bool flag_bias,
                     bool flag_relu,
                     const std::vector<int>& thread_num,
-                    const std::vector<int>& cluster_id) {
+                    const std::vector<int>& power_mode) {
   paddle::lite::DeviceInfo::Init();
   ConvParam param_int8_out;
   ConvParam param_fp32_out;
@@ -181,7 +188,7 @@ void test_conv_int8(const std::vector<DDim>& input_dims,
                                         1,
                                         weight_dim.count(1, 4));
 
-  for (auto& cls : cluster_id) {
+  for (auto& cls : power_mode) {
     for (auto& th : thread_num) {
       std::unique_ptr<paddle::lite::KernelContext> ctx1(
           new paddle::lite::KernelContext);
@@ -300,7 +307,7 @@ void test_conv_int8(const std::vector<DDim>& input_dims,
           conv_int8_int8.Launch();
         }
         /// compute fp32 output
-        lite::test::Timer t0;
+        Timer t0;
         for (int i = 0; i < FLAGS_repeats; ++i) {
           t0.start();
           conv_int8_fp32.Launch();
@@ -356,7 +363,7 @@ void test_conv_int8(const std::vector<DDim>& input_dims,
                          << ", dila_: " << dilas[0] << ", " << dilas[1]
                          << ", bias: " << (flag_bias ? "true" : "false")
                          << ", relu: " << (flag_relu ? "true" : "false")
-                         << ", threads: " << th << ", cluster: " << cls
+                         << ", threads: " << th << ", power_mode: " << cls
                          << " failed!!\n";
             }
           }
@@ -414,7 +421,7 @@ void test_conv_int8(const std::vector<DDim>& input_dims,
                          << ", dila_: " << dilas[0] << ", " << dilas[1]
                          << ", bias: " << (flag_bias ? "true" : "false")
                          << ", relu: " << (flag_relu ? "true" : "false")
-                         << ", threads: " << th << ", cluster: " << cls
+                         << ", threads: " << th << ", power_mode: " << cls
                          << " failed!!\n";
             }
           }
@@ -426,7 +433,7 @@ void test_conv_int8(const std::vector<DDim>& input_dims,
                   << ", dila_: " << dilas[0] << ", " << dilas[1]
                   << ", bias: " << (flag_bias ? "true" : "false")
                   << ", relu: " << (flag_relu ? "true" : "false")
-                  << ", threads: " << th << ", cluster: " << cls
+                  << ", threads: " << th << ", power_mode: " << cls
                   << " successed!!\n";
       }
     }
@@ -444,7 +451,7 @@ void test_conv_int8(const std::vector<DDim>& input_dims,
                     bool flag_bias,
                     bool flag_relu,
                     const std::vector<int>& thread_num,
-                    const std::vector<int>& cluster_id) {}
+                    const std::vector<int>& power_mode) {}
 #endif  // LITE_WITH_ARM
 
 #if 1  /// 3x3dw
@@ -471,7 +478,7 @@ TEST(TestConv3x3DWInt8, test_conv3x3_depthwise) {
                              flag_bias,
                              flag_relu,
                              {1, 2, 4},
-                             {FLAGS_cluster});
+                             {FLAGS_power_mode});
             }
           }
         }
@@ -505,7 +512,7 @@ TEST(TestConv5x5DWInt8, test_conv5x5_depthwise) {
                              flag_bias,
                              flag_relu,
                              {1, 2, 4},
-                             {FLAGS_cluster});
+                             {FLAGS_power_mode});
             }
           }
         }
@@ -542,7 +549,7 @@ TEST(TestConv1x1s1Int8, test_conv1x1s1) {
                              flag_bias,
                              flag_relu,
                              {1, 2, 4},
-                             {FLAGS_cluster});
+                             {FLAGS_power_mode});
             }
           }
         }
@@ -576,7 +583,7 @@ TEST(TestConv3x3s1Int8, test_conv_3x3s1) {
                              flag_bias,
                              flag_relu,
                              {1, 2, 4},
-                             {FLAGS_cluster});
+                             {FLAGS_power_mode});
             }
           }
         }
@@ -610,7 +617,7 @@ TEST(TestConv3x3s2Int8, test_conv_3x3s2) {
                              flag_bias,
                              flag_relu,
                              {1, 2, 4},
-                             {FLAGS_cluster});
+                             {FLAGS_power_mode});
             }
           }
         }
@@ -652,7 +659,7 @@ TEST(TestConvRandInt8, test_conv_rand) {
                                        flag_bias,
                                        flag_relu,
                                        {1, 2, 4},
-                                       {FLAGS_cluster});
+                                       {FLAGS_power_mode});
                       }
                     }
                   }
@@ -686,6 +693,6 @@ TEST(TestConvCustomInt8, test_conv_custom_size) {
       FLAGS_flag_bias,
       FLAGS_flag_relu,
       {FLAGS_threads},
-      {FLAGS_cluster});
+      {FLAGS_power_mode});
 }
 #endif  // custom
diff --git a/lite/tests/math/conv_transpose_compute_test.cc b/lite/tests/math/conv_transpose_compute_test.cc
index 3a1bbac04b..1a91bdd6c9 100644
--- a/lite/tests/math/conv_transpose_compute_test.cc
+++ b/lite/tests/math/conv_transpose_compute_test.cc
@@ -24,7 +24,13 @@
 #include "lite/kernels/arm/conv_transpose_compute.h"
 #endif  // LITE_WITH_ARM
 
-DEFINE_int32(cluster, 3, "cluster id");
+DEFINE_int32(power_mode,
+             3,
+             "power mode: "
+             "0 for POWER_HIGH;"
+             "1 for POWER_LOW;"
+             "2 for POWER_FULL;"
+             "3 for NO_BIND");
 DEFINE_int32(threads, 1, "threads num");
 DEFINE_int32(warmup, 0, "warmup times");
 DEFINE_int32(repeats, 1, "repeats times");
@@ -53,6 +59,7 @@ DEFINE_bool(flag_bias, false, "with bias");
 typedef paddle::lite::DDim DDim;
 typedef paddle::lite::Tensor Tensor;
 typedef paddle::lite::operators::ConvParam ConvParam;
+using paddle::lite::Timer;
 
 DDim compute_out_dim(const DDim& dim_in,
                      const paddle::lite::operators::ConvParam& param) {
@@ -78,7 +85,7 @@ void test_conv_transpose_fp32(const std::vector<DDim>& input_dims,
                               bool flag_bias,
                               bool flag_relu,
                               const std::vector<int>& thread_num,
-                              const std::vector<int>& cluster_id) {
+                              const std::vector<int>& power_mode) {
 #ifdef LITE_WITH_ARM
   paddle::lite::DeviceInfo::Init();
 #endif
@@ -114,7 +121,7 @@ void test_conv_transpose_fp32(const std::vector<DDim>& input_dims,
   auto wptr = tmp_weights.data<float>();
   auto bias_ptr = flag_bias ? param.bias->data<float>() : nullptr;
 
-  for (auto& cls : cluster_id) {
+  for (auto& cls : power_mode) {
     for (auto& th : thread_num) {
       paddle::lite::kernels::arm::Conv2DTransposeCompute conv_t;
       std::unique_ptr<paddle::lite::KernelContext> ctx1(
@@ -185,7 +192,7 @@ void test_conv_transpose_fp32(const std::vector<DDim>& input_dims,
           conv_t.Launch();
         }
         /// compute
-        lite::test::Timer t0;
+        Timer t0;
         for (int i = 0; i < FLAGS_repeats; ++i) {
           t0.start();
           conv_t.Launch();
@@ -226,7 +233,7 @@ void test_conv_transpose_fp32(const std::vector<DDim>& input_dims,
                          << ", dila_: " << dilas[0] << ", " << dilas[1]
                          << ", bias: " << (flag_bias ? "true" : "false")
                          << ", relu: " << (flag_relu ? "true" : "false")
-                         << ", threads: " << th << ", cluster: " << cls
+                         << ", threads: " << th << ", power_mode: " << cls
                          << " failed!!\n";
             }
           }
@@ -238,7 +245,7 @@ void test_conv_transpose_fp32(const std::vector<DDim>& input_dims,
                   << ", dila_: " << dilas[0] << ", " << dilas[1]
                   << ", bias: " << (flag_bias ? "true" : "false")
                   << ", relu: " << (flag_relu ? "true" : "false")
-                  << ", threads: " << th << ", cluster: " << cls
+                  << ", threads: " << th << ", power_mode: " << cls
                   << " successed!!\n";
       }
     }
@@ -259,7 +266,7 @@ void test_conv_transpose_fp32(const std::vector<DDim>& input_dims,
                               bool flag_bias,
                               bool flag_relu,
                               const std::vector<int>& thread_num,
-                              const std::vector<int>& cluster_id) {}
+                              const std::vector<int>& power_mode) {}
 #endif  // LITE_WITH_ARM
 
 #if 1  /// random param conv
@@ -294,7 +301,7 @@ TEST(TestConvRand, test_conv_transpose_rand) {
                                                  flag_bias,
                                                  flag_relu,
                                                  {1, 2, 4},
-                                                 {FLAGS_cluster});
+                                                 {FLAGS_power_mode});
                       }
                     }
                   }
@@ -328,6 +335,6 @@ TEST(TestConvCustom, test_conv_transpose_fp32_custom_size) {
       FLAGS_flag_bias,
       FLAGS_flag_relu,
       {FLAGS_threads},
-      {FLAGS_cluster});
+      {FLAGS_power_mode});
 }
 #endif  // custom
diff --git a/lite/tests/math/gemm_int8_compute_test.cc b/lite/tests/math/gemm_int8_compute_test.cc
index 575dc27cb2..3faf79b866 100644
--- a/lite/tests/math/gemm_int8_compute_test.cc
+++ b/lite/tests/math/gemm_int8_compute_test.cc
@@ -25,8 +25,15 @@
 #include "lite/tests/utils/timer.h"
 
 typedef paddle::lite::Tensor Tensor;
+using paddle::lite::Timer;
 
-DEFINE_int32(cluster, 3, "cluster id");
+DEFINE_int32(power_mode,
+             3,
+             "power mode: "
+             "0 for POWER_HIGH;"
+             "1 for POWER_LOW;"
+             "2 for POWER_FULL;"
+             "3 for NO_BIND");
 DEFINE_int32(threads, 1, "threads num");
 DEFINE_int32(warmup, 0, "warmup times");
 DEFINE_int32(repeats, 1, "repeats times");
@@ -146,7 +153,7 @@ bool test_gemm_int8(bool tra,
                                           1,
                                           tc_basic_fp32.numel());
   }
-  lite::test::Timer t0;
+  Timer t0;
   //! compute
   double ops = 2.0 * m * n * k;
   std::unique_ptr<paddle::lite::KernelContext> ctx1(
@@ -202,7 +209,7 @@ bool test_gemm_int8(bool tra,
     t0.end();
   }
   LOG(INFO) << "gemm_int8_int8 output: M: " << m << ", N: " << n << ", K: " << k
-            << ", cluster: " << cls << ", threads: " << ths
+            << ", power_mode: " << cls << ", threads: " << ths
             << ", GOPS: " << ops * 1e-9f
             << " GOPS, avg time: " << t0.get_average_ms()
             << " ms, min time: " << t0.get_min_time()
@@ -229,7 +236,7 @@ bool test_gemm_int8(bool tra,
     t0.end();
   }
   LOG(INFO) << "gemm_int8_fp32 output: M: " << m << ", N: " << n << ", K: " << k
-            << ", cluster: " << cls << ", threads: " << ths
+            << ", power_mode: " << cls << ", threads: " << ths
             << ", GOPS: " << ops * 1e-9f
             << " GOPS, avg time: " << t0.get_average_ms()
             << " ms, min time: " << t0.get_min_time()
@@ -323,7 +330,7 @@ TEST(TestLiteGemmInt8, gemm_prepacked_int8) {
                                                k,
                                                has_bias,
                                                has_relu,
-                                               FLAGS_cluster,
+                                               FLAGS_power_mode,
                                                th);
                     if (flag) {
                       LOG(INFO) << "test m = " << m << ", n=" << n
@@ -364,7 +371,7 @@ TEST(TestGemmInt8Custom, gemm_prepacked_int8_custom) {
                              FLAGS_K,
                              FLAGS_flag_bias,
                              FLAGS_flag_relu,
-                             FLAGS_cluster,
+                             FLAGS_power_mode,
                              FLAGS_threads);
   if (!flag) {
     LOG(FATAL) << "test m = " << FLAGS_M << ", n=" << FLAGS_N
diff --git a/lite/tests/math/sgemm_compute_test.cc b/lite/tests/math/sgemm_compute_test.cc
index 56cdf6179a..6135c3a4bb 100644
--- a/lite/tests/math/sgemm_compute_test.cc
+++ b/lite/tests/math/sgemm_compute_test.cc
@@ -25,8 +25,15 @@
 #include "lite/tests/utils/timer.h"
 
 typedef paddle::lite::Tensor Tensor;
+using paddle::lite::Timer;
 
-DEFINE_int32(cluster, 3, "cluster id");
+DEFINE_int32(power_mode,
+             3,
+             "power mode: "
+             "0 for POWER_HIGH;"
+             "1 for POWER_LOW;"
+             "2 for POWER_FULL;"
+             "3 for NO_BIND");
 DEFINE_int32(threads, 1, "threads num");
 DEFINE_int32(warmup, 0, "warmup times");
 DEFINE_int32(repeats, 1, "repeats times");
@@ -128,7 +135,7 @@ bool test_sgemm(bool tra,
                has_bias,
                has_relu);
   }
-  lite::test::Timer t0;
+  Timer t0;
 #ifdef LITE_WITH_ARM
   //! compute
   double ops = 2.0 * m * n * k;
@@ -182,7 +189,7 @@ bool test_sgemm(bool tra,
     t0.end();
   }
   LOG(INFO) << "M: " << m << ", N: " << n << ", K: " << k
-            << ", cluster: " << cls << ", threads: " << ths
+            << ", power_mode: " << cls << ", threads: " << ths
             << ", GOPS: " << ops * 1e-9f
             << " GOPS, avg time: " << t0.get_average_ms()
             << " ms, min time: " << t0.get_min_time()
@@ -258,7 +265,7 @@ TEST(TestSgemm, test_func_sgemm_prepacked) {
                                                  beta,
                                                  has_bias,
                                                  has_relu,
-                                                 FLAGS_cluster,
+                                                 FLAGS_power_mode,
                                                  th);
                           if (flag) {
                             LOG(INFO)
@@ -318,7 +325,7 @@ TEST(TestSgemmCustom, test_func_sgemm_prepacked_custom) {
                          FLAGS_beta,
                          FLAGS_flag_bias,
                          FLAGS_flag_relu,
-                         FLAGS_cluster,
+                         FLAGS_power_mode,
                          FLAGS_threads);
   if (!flag) {
     LOG(FATAL) << "test m = " << FLAGS_M << ", n=" << FLAGS_N
diff --git a/lite/tests/utils/timer.h b/lite/tests/utils/timer.h
index 0a5efd5d26..095f32046e 100644
--- a/lite/tests/utils/timer.h
+++ b/lite/tests/utils/timer.h
@@ -17,8 +17,8 @@
 #include <chrono>  // NOLINT
 #include <list>
 
+namespace paddle {
 namespace lite {
-namespace test {
 
 class Timer final {
  public:
@@ -34,12 +34,14 @@ class Timer final {
     tend_ = std::chrono::system_clock::now();
     auto ts =
         std::chrono::duration_cast<std::chrono::microseconds>(tend_ - tstart_);
-    float elapse_ms = 1000.f * static_cast<float>(ts.count()) *
-                      std::chrono::microseconds::period::num /
-                      std::chrono::microseconds::period::den;
-    ms_time_.push_back(elapse_ms);
+    latest_time_ = 1000.f * static_cast<float>(ts.count()) *
+                   std::chrono::microseconds::period::num /
+                   std::chrono::microseconds::period::den;
+    ms_time_.push_back(latest_time_);
   }
 
+  float latest_time() const { return latest_time_; }
+
   float get_average_ms() {
     if (ms_time_.size() == 0) {
       return 0.f;
@@ -96,7 +98,8 @@ class Timer final {
   std::chrono::time_point<std::chrono::system_clock> tstart_;
   std::chrono::time_point<std::chrono::system_clock> tend_;
   std::list<float> ms_time_;
+  float latest_time_;
 };
 
-}  // namespace test
 }  // namespace lite
+}  // namespace paddle
-- 
GitLab