remove log in reshape, fix conv error when padding size=4 (#2199)

* remove log in reshape, fix conv error when padding size=4, test=develop * fix style, test=develop * remove useless code, test=develop * remove redundant model test file, test=develop * change cluster to power_mode, test=develop * fix build error, test=develop * change cluster to power_mode, test=develop * change opt_nb to use_optimize_nb, test=develop * null, test=develop

remove log in reshape, fix conv error when padding size=4 (#2199)
* remove log in reshape, fix conv error when padding size=4, test=develop * fix style, test=develop * remove useless code, test=develop * remove redundant model test file, test=develop * change cluster to power_mode, test=develop * fix build error, test=develop * change cluster to power_mode, test=develop * change opt_nb to use_optimize_nb, test=develop * null, test=develop
06d7a8f5 · Xiaoyang LI · GitHub · b382a0dd · 06d7a8f5 · 06d7a8f5
10 changed file
--- a/lite/api/model_test.cc
+++ b/lite/api/model_test.cc
@@ -21,13 +21,20 @@
 #include "lite/api/paddle_use_passes.h"
 #include "lite/api/test_helper.h"
 #include "lite/core/device_info.h"
+#include "lite/tests/utils/timer.h"
 #include "lite/utils/cp_logging.h"
 #include "lite/utils/string.h"
+using paddle::lite::Timer;
 DEFINE_string(input_shape,
              "1,3,224,224",
              "input shapes, separated by colon and comma");
+DEFINE_bool(use_optimize_nb,
+            false,
+            "optimized & naive buffer model for mobile devices");
 namespace paddle {
 namespace lite_api {
@@ -58,15 +65,14 @@ void OutputOptModel(const std::string& load_model_dir,
 #ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
 void Run(const std::vector<std::vector<int64_t>>& input_shapes,
         const std::string& model_dir,
-         const int repeat,
+         const PowerMode power_mode,
         const int thread_num,
+         const int repeat,
         const int warmup_times = 0) {
-#ifdef LITE_WITH_ARM
-  lite::DeviceInfo::Init();
-  lite::DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, thread_num);
-#endif
  lite_api::MobileConfig config;
  config.set_model_dir(model_dir);
+  config.set_power_mode(power_mode);
+  config.set_threads(thread_num);
  auto predictor = lite_api::CreatePaddlePredictor(config);
@@ -87,17 +93,22 @@ void Run(const std::vector<std::vector<int64_t>>& input_shapes,
    predictor->Run();
  }
-  auto start = lite::GetCurrentUS();
+  Timer ti;
-  for (int i = 0; i < repeat; ++i) {
+  for (int j = 0; j < repeat; ++j) {
+    ti.start();
    predictor->Run();
+    ti.end();
+    LOG(INFO) << "iter: " << j << ", time: " << ti.latest_time() << " ms";
  }
-  auto end = lite::GetCurrentUS();
  LOG(INFO) << "================== Speed Report ===================";
-  LOG(INFO) << "Model: " << model_dir << ", threads num " << thread_num
+  LOG(INFO) << "Model: " << model_dir
-            << ", warmup: " << warmup_times << ", repeats: " << repeat
+            << ", power_mode: " << static_cast<int>(power_mode)
-            << ", spend " << (end - start) / repeat / 1000.0
+            << ", threads num " << thread_num << ", warmup: " << warmup_times
-            << " ms in average.";
+            << ", repeats: " << repeat << ", avg time: " << ti.get_average_ms()
+            << " ms"
+            << ", min time: " << ti.get_min_time() << " ms"
+            << ", max time: " << ti.get_max_time() << " ms.";
  auto output = predictor->GetOutput(0);
  auto out = output->data<float>();
@@ -122,7 +133,12 @@ int main(int argc, char** argv) {
              << "--model_dir /path/to/your/model";
    exit(0);
  }
-  std::string save_optimized_model_dir = FLAGS_model_dir + "opt2";
+  std::string save_optimized_model_dir = "";
+  if (FLAGS_use_optimize_nb) {
+    save_optimized_model_dir = FLAGS_model_dir;
+  } else {
+    save_optimized_model_dir = FLAGS_model_dir + "opt2";
+  }
  auto split_string =
      [](const std::string& str_in) -> std::vector<std::string> {
@@ -164,17 +180,21 @@ int main(int argc, char** argv) {
    input_shapes.push_back(get_shape(str_input_shapes[i]));
  }
-  // Output optimized model
+  if (!FLAGS_use_optimize_nb) {
-  paddle::lite_api::OutputOptModel(
+    // Output optimized model
-      FLAGS_model_dir, save_optimized_model_dir, input_shapes);
+    paddle::lite_api::OutputOptModel(
+        FLAGS_model_dir, save_optimized_model_dir, input_shapes);
+  }
 #ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
  // Run inference using optimized model
-  paddle::lite_api::Run(input_shapes,
+  paddle::lite_api::Run(
-                        save_optimized_model_dir,
+      input_shapes,
-                        FLAGS_repeats,
+      save_optimized_model_dir,
-                        FLAGS_threads,
+      static_cast<paddle::lite_api::PowerMode>(FLAGS_power_mode),
-                        FLAGS_warmup);
+      FLAGS_threads,
+      FLAGS_repeats,
+      FLAGS_warmup);
 #endif
  return 0;
 }
--- a/lite/api/test_helper.h
+++ b/lite/api/test_helper.h
@@ -22,6 +22,13 @@
 DEFINE_string(model_dir, "", "model dir");
 DEFINE_int32(warmup, 0, "warmup times");
 DEFINE_int32(repeats, 1, "repeats times");
+DEFINE_int32(power_mode,
+             3,
+             "arm power mode: "
+             "0 for big cluster, "
+             "1 for little cluster, "
+             "2 for all cores, "
+             "3 for no bind");
 DEFINE_int32(threads, 1, "threads num");
 DEFINE_int32(im_width, 224, "image width");
 DEFINE_int32(im_height, 224, "image height");

--- a/lite/operators/conv_op.cc
+++ b/lite/operators/conv_op.cc
@@ -34,10 +34,6 @@ bool ConvOpLite::CheckShape() const {
  CHECK_EQ_OR_FALSE(in_dims.size(), filter_dims.size());
  CHECK_OR_FALSE(in_dims.size() - param_.strides.size() == 2U);
-  // CHECK_EQ_OR_FALSE(param_.paddings.size(), param_.strides.size());
-  // CHECK_EQ_OR_FALSE(in_dims[1], filter_dims[1] * param_.groups);
-  // CHECK_EQ_OR_FALSE(filter_dims[0] % param_.groups, 0);
  CHECK_EQ_OR_FALSE(filter_dims.size(), 4UL);
  return true;

--- a/lite/operators/reshape_op.cc
+++ b/lite/operators/reshape_op.cc
@@ -48,7 +48,6 @@ bool ReshapeOp::InferShape() const {
  auto x_dims = param_.x->dims();
  auto output_dims = ValidateShape(final_shape, x_dims);
-  LOG(INFO) << "output_dims:" << output_dims;
  param_.output->Resize(output_dims);
  auto out_lod = param_.output->mutable_lod();
  *out_lod = param_.x->lod();

--- a/lite/tests/math/conv_compute_test.cc
+++ b/lite/tests/math/conv_compute_test.cc
@@ -24,7 +24,13 @@
 #include "lite/kernels/arm/conv_compute.h"
 #endif  // LITE_WITH_ARM
-DEFINE_int32(cluster, 3, "cluster id");
+DEFINE_int32(power_mode,
+             3,
+             "power mode: "
+             "0 for POWER_HIGH;"
+             "1 for POWER_LOW;"
+             "2 for POWER_FULL;"
+             "3 for NO_BIND");
 DEFINE_int32(threads, 1, "threads num");
 DEFINE_int32(warmup, 0, "warmup times");
 DEFINE_int32(repeats, 1, "repeats times");
@@ -53,6 +59,7 @@ DEFINE_bool(flag_bias, true, "with bias");
 typedef paddle::lite::DDim DDim;
 typedef paddle::lite::Tensor Tensor;
 typedef paddle::lite::operators::ConvParam ConvParam;
+using paddle::lite::Timer;
 DDim compute_out_dim(const DDim& dim_in,
                     const paddle::lite::operators::ConvParam& param) {
@@ -87,7 +94,7 @@ void test_conv_fp32(const std::vector<DDim>& input_dims,
                    bool flag_bias,
                    bool flag_relu,
                    const std::vector<int>& thread_num,
-                    const std::vector<int>& cluster_id) {
+                    const std::vector<int>& power_mode) {
 #ifdef LITE_WITH_ARM
  paddle::lite::DeviceInfo::Init();
 #endif
@@ -120,7 +127,7 @@ void test_conv_fp32(const std::vector<DDim>& input_dims,
  auto wptr = param.filter->data<float>();
  auto bias_ptr = flag_bias ? param.bias->data<float>() : nullptr;
-  for (auto& cls : cluster_id) {
+  for (auto& cls : power_mode) {
    for (auto& th : thread_num) {
      paddle::lite::kernels::arm::ConvCompute<PRECISION(kFloat),
                                              PRECISION(kFloat)>
@@ -192,7 +199,7 @@ void test_conv_fp32(const std::vector<DDim>& input_dims,
          conv.Launch();
        }
        /// compute
-        lite::test::Timer t0;
+        Timer t0;
        for (int i = 0; i < FLAGS_repeats; ++i) {
          t0.start();
          conv.Launch();
@@ -233,7 +240,7 @@ void test_conv_fp32(const std::vector<DDim>& input_dims,
                         << ", dila_: " << dilas[0] << ", " << dilas[1]
                         << ", bias: " << (flag_bias ? "true" : "false")
                         << ", relu: " << (flag_relu ? "true" : "false")
-                         << ", threads: " << th << ", cluster: " << cls
+                         << ", threads: " << th << ", power_mode: " << cls
                         << " failed!!\n";
            }
          }
@@ -245,7 +252,7 @@ void test_conv_fp32(const std::vector<DDim>& input_dims,
                  << ", dila_: " << dilas[0] << ", " << dilas[1]
                  << ", bias: " << (flag_bias ? "true" : "false")
                  << ", relu: " << (flag_relu ? "true" : "false")
-                  << ", threads: " << th << ", cluster: " << cls
+                  << ", threads: " << th << ", power_mode: " << cls
                  << " successed!!\n";
      }
    }
@@ -266,7 +273,7 @@ void test_conv_fp32(const std::vector<DDim>& input_dims,
                    bool flag_bias,
                    bool flag_relu,
                    const std::vector<int>& thread_num,
-                    const std::vector<int>& cluster_id) {}
+                    const std::vector<int>& power_mode) {}
 #endif  // LITE_WITH_ARM
 #if 1  /// 3x3dw
@@ -293,7 +300,7 @@ TEST(TestConv3x3DW, test_conv3x3_depthwise) {
                             flag_bias,
                             flag_relu,
                             {1, 2, 4},
-                             {FLAGS_cluster});
+                             {FLAGS_power_mode});
            }
          }
        }
@@ -327,7 +334,7 @@ TEST(TestConv5x5DW, test_conv5x5_depthwise) {
                             flag_bias,
                             flag_relu,
                             {1, 2, 4},
-                             {FLAGS_cluster});
+                             {FLAGS_power_mode});
            }
          }
        }
@@ -364,7 +371,7 @@ TEST(TestConv1x1s1, test_conv1x1s1) {
                             flag_bias,
                             flag_relu,
                             {1, 2, 4},
-                             {FLAGS_cluster});
+                             {FLAGS_power_mode});
            }
          }
        }
@@ -398,7 +405,7 @@ TEST(TestConv3x3s1, test_conv_3x3s1) {
                             flag_bias,
                             flag_relu,
                             {1, 2, 4},
-                             {FLAGS_cluster});
+                             {FLAGS_power_mode});
            }
          }
        }
@@ -432,7 +439,7 @@ TEST(TestConv3x3s2, test_conv_3x3s2) {
                             flag_bias,
                             flag_relu,
                             {1, 2, 4},
-                             {FLAGS_cluster});
+                             {FLAGS_power_mode});
            }
          }
        }
@@ -474,7 +481,7 @@ TEST(TestConvRand, test_conv_rand) {
                                       flag_bias,
                                       flag_relu,
                                       {1, 2, 4},
-                                       {FLAGS_cluster});
+                                       {FLAGS_power_mode});
                      }
                    }
                  }
@@ -508,6 +515,6 @@ TEST(TestConvCustom, test_conv_fp32_custom_size) {
      FLAGS_flag_bias,
      FLAGS_flag_relu,
      {FLAGS_threads},
-      {FLAGS_cluster});
+      {FLAGS_power_mode});
 }
 #endif  // custom
--- a/lite/tests/math/conv_int8_compute_test.cc
+++ b/lite/tests/math/conv_int8_compute_test.cc
@@ -24,7 +24,13 @@
 #include "lite/kernels/arm/conv_compute.h"
 #endif  // LITE_WITH_ARM
-DEFINE_int32(cluster, 3, "cluster id");
+DEFINE_int32(power_mode,
+             3,
+             "power mode: "
+             "0 for POWER_HIGH;"
+             "1 for POWER_LOW;"
+             "2 for POWER_FULL;"
+             "3 for NO_BIND");
 DEFINE_int32(threads, 1, "threads num");
 DEFINE_int32(warmup, 0, "warmup times");
 DEFINE_int32(repeats, 1, "repeats times");
@@ -53,6 +59,7 @@ DEFINE_bool(flag_bias, true, "with bias");
 typedef paddle::lite::DDim DDim;
 typedef paddle::lite::Tensor Tensor;
 typedef paddle::lite::operators::ConvParam ConvParam;
+using paddle::lite::Timer;
 DDim compute_out_dim(const DDim& dim_in,
                     const paddle::lite::operators::ConvParam& param) {
@@ -124,7 +131,7 @@ void test_conv_int8(const std::vector<DDim>& input_dims,
                    bool flag_bias,
                    bool flag_relu,
                    const std::vector<int>& thread_num,
-                    const std::vector<int>& cluster_id) {
+                    const std::vector<int>& power_mode) {
  paddle::lite::DeviceInfo::Init();
  ConvParam param_int8_out;
  ConvParam param_fp32_out;
@@ -181,7 +188,7 @@ void test_conv_int8(const std::vector<DDim>& input_dims,
                                        1,
                                        weight_dim.count(1, 4));
-  for (auto& cls : cluster_id) {
+  for (auto& cls : power_mode) {
    for (auto& th : thread_num) {
      std::unique_ptr<paddle::lite::KernelContext> ctx1(
          new paddle::lite::KernelContext);
@@ -300,7 +307,7 @@ void test_conv_int8(const std::vector<DDim>& input_dims,
          conv_int8_int8.Launch();
        }
        /// compute fp32 output
-        lite::test::Timer t0;
+        Timer t0;
        for (int i = 0; i < FLAGS_repeats; ++i) {
          t0.start();
          conv_int8_fp32.Launch();
@@ -356,7 +363,7 @@ void test_conv_int8(const std::vector<DDim>& input_dims,
                         << ", dila_: " << dilas[0] << ", " << dilas[1]
                         << ", bias: " << (flag_bias ? "true" : "false")
                         << ", relu: " << (flag_relu ? "true" : "false")
-                         << ", threads: " << th << ", cluster: " << cls
+                         << ", threads: " << th << ", power_mode: " << cls
                         << " failed!!\n";
            }
          }
@@ -414,7 +421,7 @@ void test_conv_int8(const std::vector<DDim>& input_dims,
                         << ", dila_: " << dilas[0] << ", " << dilas[1]
                         << ", bias: " << (flag_bias ? "true" : "false")
                         << ", relu: " << (flag_relu ? "true" : "false")
-                         << ", threads: " << th << ", cluster: " << cls
+                         << ", threads: " << th << ", power_mode: " << cls
                         << " failed!!\n";
            }
          }
@@ -426,7 +433,7 @@ void test_conv_int8(const std::vector<DDim>& input_dims,
                  << ", dila_: " << dilas[0] << ", " << dilas[1]
                  << ", bias: " << (flag_bias ? "true" : "false")
                  << ", relu: " << (flag_relu ? "true" : "false")
-                  << ", threads: " << th << ", cluster: " << cls
+                  << ", threads: " << th << ", power_mode: " << cls
                  << " successed!!\n";
      }
    }
@@ -444,7 +451,7 @@ void test_conv_int8(const std::vector<DDim>& input_dims,
                    bool flag_bias,
                    bool flag_relu,
                    const std::vector<int>& thread_num,
-                    const std::vector<int>& cluster_id) {}
+                    const std::vector<int>& power_mode) {}
 #endif  // LITE_WITH_ARM
 #if 1  /// 3x3dw
@@ -471,7 +478,7 @@ TEST(TestConv3x3DWInt8, test_conv3x3_depthwise) {
                             flag_bias,
                             flag_relu,
                             {1, 2, 4},
-                             {FLAGS_cluster});
+                             {FLAGS_power_mode});
            }
          }
        }
@@ -505,7 +512,7 @@ TEST(TestConv5x5DWInt8, test_conv5x5_depthwise) {
                             flag_bias,
                             flag_relu,
                             {1, 2, 4},
-                             {FLAGS_cluster});
+                             {FLAGS_power_mode});
            }
          }
        }
@@ -542,7 +549,7 @@ TEST(TestConv1x1s1Int8, test_conv1x1s1) {
                             flag_bias,
                             flag_relu,
                             {1, 2, 4},
-                             {FLAGS_cluster});
+                             {FLAGS_power_mode});
            }
          }
        }
@@ -576,7 +583,7 @@ TEST(TestConv3x3s1Int8, test_conv_3x3s1) {
                             flag_bias,
                             flag_relu,
                             {1, 2, 4},
-                             {FLAGS_cluster});
+                             {FLAGS_power_mode});
            }
          }
        }
@@ -610,7 +617,7 @@ TEST(TestConv3x3s2Int8, test_conv_3x3s2) {
                             flag_bias,
                             flag_relu,
                             {1, 2, 4},
-                             {FLAGS_cluster});
+                             {FLAGS_power_mode});
            }
          }
        }
@@ -652,7 +659,7 @@ TEST(TestConvRandInt8, test_conv_rand) {
                                       flag_bias,
                                       flag_relu,
                                       {1, 2, 4},
-                                       {FLAGS_cluster});
+                                       {FLAGS_power_mode});
                      }
                    }
                  }
@@ -686,6 +693,6 @@ TEST(TestConvCustomInt8, test_conv_custom_size) {
      FLAGS_flag_bias,
      FLAGS_flag_relu,
      {FLAGS_threads},
-      {FLAGS_cluster});
+      {FLAGS_power_mode});
 }
 #endif  // custom
--- a/lite/tests/math/conv_transpose_compute_test.cc
+++ b/lite/tests/math/conv_transpose_compute_test.cc
@@ -24,7 +24,13 @@
 #include "lite/kernels/arm/conv_transpose_compute.h"
 #endif  // LITE_WITH_ARM
-DEFINE_int32(cluster, 3, "cluster id");
+DEFINE_int32(power_mode,
+             3,
+             "power mode: "
+             "0 for POWER_HIGH;"
+             "1 for POWER_LOW;"
+             "2 for POWER_FULL;"
+             "3 for NO_BIND");
 DEFINE_int32(threads, 1, "threads num");
 DEFINE_int32(warmup, 0, "warmup times");
 DEFINE_int32(repeats, 1, "repeats times");
@@ -53,6 +59,7 @@ DEFINE_bool(flag_bias, false, "with bias");
 typedef paddle::lite::DDim DDim;
 typedef paddle::lite::Tensor Tensor;
 typedef paddle::lite::operators::ConvParam ConvParam;
+using paddle::lite::Timer;
 DDim compute_out_dim(const DDim& dim_in,
                     const paddle::lite::operators::ConvParam& param) {
@@ -78,7 +85,7 @@ void test_conv_transpose_fp32(const std::vector<DDim>& input_dims,
                              bool flag_bias,
                              bool flag_relu,
                              const std::vector<int>& thread_num,
-                              const std::vector<int>& cluster_id) {
+                              const std::vector<int>& power_mode) {
 #ifdef LITE_WITH_ARM
  paddle::lite::DeviceInfo::Init();
 #endif
@@ -114,7 +121,7 @@ void test_conv_transpose_fp32(const std::vector<DDim>& input_dims,
  auto wptr = tmp_weights.data<float>();
  auto bias_ptr = flag_bias ? param.bias->data<float>() : nullptr;
-  for (auto& cls : cluster_id) {
+  for (auto& cls : power_mode) {
    for (auto& th : thread_num) {
      paddle::lite::kernels::arm::Conv2DTransposeCompute conv_t;
      std::unique_ptr<paddle::lite::KernelContext> ctx1(
@@ -185,7 +192,7 @@ void test_conv_transpose_fp32(const std::vector<DDim>& input_dims,
          conv_t.Launch();
        }
        /// compute
-        lite::test::Timer t0;
+        Timer t0;
        for (int i = 0; i < FLAGS_repeats; ++i) {
          t0.start();
          conv_t.Launch();
@@ -226,7 +233,7 @@ void test_conv_transpose_fp32(const std::vector<DDim>& input_dims,
                         << ", dila_: " << dilas[0] << ", " << dilas[1]
                         << ", bias: " << (flag_bias ? "true" : "false")
                         << ", relu: " << (flag_relu ? "true" : "false")
-                         << ", threads: " << th << ", cluster: " << cls
+                         << ", threads: " << th << ", power_mode: " << cls
                         << " failed!!\n";
            }
          }
@@ -238,7 +245,7 @@ void test_conv_transpose_fp32(const std::vector<DDim>& input_dims,
                  << ", dila_: " << dilas[0] << ", " << dilas[1]
                  << ", bias: " << (flag_bias ? "true" : "false")
                  << ", relu: " << (flag_relu ? "true" : "false")
-                  << ", threads: " << th << ", cluster: " << cls
+                  << ", threads: " << th << ", power_mode: " << cls
                  << " successed!!\n";
      }
    }
@@ -259,7 +266,7 @@ void test_conv_transpose_fp32(const std::vector<DDim>& input_dims,
                              bool flag_bias,
                              bool flag_relu,
                              const std::vector<int>& thread_num,
-                              const std::vector<int>& cluster_id) {}
+                              const std::vector<int>& power_mode) {}
 #endif  // LITE_WITH_ARM
 #if 1  /// random param conv
@@ -294,7 +301,7 @@ TEST(TestConvRand, test_conv_transpose_rand) {
                                                 flag_bias,
                                                 flag_relu,
                                                 {1, 2, 4},
-                                                 {FLAGS_cluster});
+                                                 {FLAGS_power_mode});
                      }
                    }
                  }
@@ -328,6 +335,6 @@ TEST(TestConvCustom, test_conv_transpose_fp32_custom_size) {
      FLAGS_flag_bias,
      FLAGS_flag_relu,
      {FLAGS_threads},
-      {FLAGS_cluster});
+      {FLAGS_power_mode});
 }
 #endif  // custom
--- a/lite/tests/math/gemm_int8_compute_test.cc
+++ b/lite/tests/math/gemm_int8_compute_test.cc
@@ -25,8 +25,15 @@
 #include "lite/tests/utils/timer.h"
 typedef paddle::lite::Tensor Tensor;
+using paddle::lite::Timer;
-DEFINE_int32(cluster, 3, "cluster id");
+DEFINE_int32(power_mode,
+             3,
+             "power mode: "
+             "0 for POWER_HIGH;"
+             "1 for POWER_LOW;"
+             "2 for POWER_FULL;"
+             "3 for NO_BIND");
 DEFINE_int32(threads, 1, "threads num");
 DEFINE_int32(warmup, 0, "warmup times");
 DEFINE_int32(repeats, 1, "repeats times");
@@ -146,7 +153,7 @@ bool test_gemm_int8(bool tra,
                                          1,
                                          tc_basic_fp32.numel());
  }
-  lite::test::Timer t0;
+  Timer t0;
  //! compute
  double ops = 2.0 * m * n * k;
  std::unique_ptr<paddle::lite::KernelContext> ctx1(
@@ -202,7 +209,7 @@ bool test_gemm_int8(bool tra,
    t0.end();
  }
  LOG(INFO) << "gemm_int8_int8 output: M: " << m << ", N: " << n << ", K: " << k
-            << ", cluster: " << cls << ", threads: " << ths
+            << ", power_mode: " << cls << ", threads: " << ths
            << ", GOPS: " << ops * 1e-9f
            << " GOPS, avg time: " << t0.get_average_ms()
            << " ms, min time: " << t0.get_min_time()
@@ -229,7 +236,7 @@ bool test_gemm_int8(bool tra,
    t0.end();
  }
  LOG(INFO) << "gemm_int8_fp32 output: M: " << m << ", N: " << n << ", K: " << k
-            << ", cluster: " << cls << ", threads: " << ths
+            << ", power_mode: " << cls << ", threads: " << ths
            << ", GOPS: " << ops * 1e-9f
            << " GOPS, avg time: " << t0.get_average_ms()
            << " ms, min time: " << t0.get_min_time()
@@ -323,7 +330,7 @@ TEST(TestLiteGemmInt8, gemm_prepacked_int8) {
                                               k,
                                               has_bias,
                                               has_relu,
-                                               FLAGS_cluster,
+                                               FLAGS_power_mode,
                                               th);
                    if (flag) {
                      LOG(INFO) << "test m = " << m << ", n=" << n
@@ -364,7 +371,7 @@ TEST(TestGemmInt8Custom, gemm_prepacked_int8_custom) {
                             FLAGS_K,
                             FLAGS_flag_bias,
                             FLAGS_flag_relu,
-                             FLAGS_cluster,
+                             FLAGS_power_mode,
                             FLAGS_threads);
  if (!flag) {
    LOG(FATAL) << "test m = " << FLAGS_M << ", n=" << FLAGS_N

--- a/lite/tests/math/sgemm_compute_test.cc
+++ b/lite/tests/math/sgemm_compute_test.cc
@@ -25,8 +25,15 @@
 #include "lite/tests/utils/timer.h"
 typedef paddle::lite::Tensor Tensor;
+using paddle::lite::Timer;
-DEFINE_int32(cluster, 3, "cluster id");
+DEFINE_int32(power_mode,
+             3,
+             "power mode: "
+             "0 for POWER_HIGH;"
+             "1 for POWER_LOW;"
+             "2 for POWER_FULL;"
+             "3 for NO_BIND");
 DEFINE_int32(threads, 1, "threads num");
 DEFINE_int32(warmup, 0, "warmup times");
 DEFINE_int32(repeats, 1, "repeats times");
@@ -128,7 +135,7 @@ bool test_sgemm(bool tra,
               has_bias,
               has_relu);
  }
-  lite::test::Timer t0;
+  Timer t0;
 #ifdef LITE_WITH_ARM
  //! compute
  double ops = 2.0 * m * n * k;
@@ -182,7 +189,7 @@ bool test_sgemm(bool tra,
    t0.end();
  }
  LOG(INFO) << "M: " << m << ", N: " << n << ", K: " << k
-            << ", cluster: " << cls << ", threads: " << ths
+            << ", power_mode: " << cls << ", threads: " << ths
            << ", GOPS: " << ops * 1e-9f
            << " GOPS, avg time: " << t0.get_average_ms()
            << " ms, min time: " << t0.get_min_time()
@@ -258,7 +265,7 @@ TEST(TestSgemm, test_func_sgemm_prepacked) {
                                                 beta,
                                                 has_bias,
                                                 has_relu,
-                                                 FLAGS_cluster,
+                                                 FLAGS_power_mode,
                                                 th);
                          if (flag) {
                            LOG(INFO)
@@ -318,7 +325,7 @@ TEST(TestSgemmCustom, test_func_sgemm_prepacked_custom) {
                         FLAGS_beta,
                         FLAGS_flag_bias,
                         FLAGS_flag_relu,
-                         FLAGS_cluster,
+                         FLAGS_power_mode,
                         FLAGS_threads);
  if (!flag) {
    LOG(FATAL) << "test m = " << FLAGS_M << ", n=" << FLAGS_N

--- a/lite/tests/utils/timer.h
+++ b/lite/tests/utils/timer.h
@@ -17,8 +17,8 @@
 #include <chrono>  // NOLINT
 #include <list>
+namespace paddle {
 namespace lite {
-namespace test {
 class Timer final {
 public:
@@ -34,12 +34,14 @@ class Timer final {
    tend_ = std::chrono::system_clock::now();
    auto ts =
        std::chrono::duration_cast<std::chrono::microseconds>(tend_ - tstart_);
-    float elapse_ms = 1000.f * static_cast<float>(ts.count()) *
+    latest_time_ = 1000.f * static_cast<float>(ts.count()) *
-                      std::chrono::microseconds::period::num /
+                   std::chrono::microseconds::period::num /
-                      std::chrono::microseconds::period::den;
+                   std::chrono::microseconds::period::den;
-    ms_time_.push_back(elapse_ms);
+    ms_time_.push_back(latest_time_);
  }
+  float latest_time() const { return latest_time_; }
  float get_average_ms() {
    if (ms_time_.size() == 0) {
      return 0.f;
@@ -96,7 +98,8 @@ class Timer final {
  std::chrono::time_point<std::chrono::system_clock> tstart_;
  std::chrono::time_point<std::chrono::system_clock> tend_;
  std::list<float> ms_time_;
+  float latest_time_;
 };
-}  // namespace test
 }  // namespace lite
+}  // namespace paddle