Update benchmark (#2746)

* update benchmark, test=develop

Update benchmark (#2746)
* update benchmark, test=develop
71b35779 · juncaipeng · GitHub · e5c62f96 · 71b35779 · 71b35779
隐藏空白更改
内联并排

Showing with 93 addition and 47 deletion

lite/api/benchmark.cc lite/api/benchmark.cc +69 -38

lite/tools/benchmark.sh lite/tools/benchmark.sh +24 -9

未找到文件。
--- a/lite/api/benchmark.cc
+++ b/lite/api/benchmark.cc
@@ -13,32 +13,61 @@
 // limitations under the License.

 #include <gflags/gflags.h>
+#include <sys/time.h>
+#include <time.h>
+#include <algorithm>
 #include <cstdio>
 #include <fstream>
+#include <iomanip>
+#include <numeric>
 #include <string>
 #include <vector>
 #include "lite/api/paddle_api.h"
 #include "lite/api/paddle_use_kernels.h"
 #include "lite/api/paddle_use_ops.h"
 #include "lite/api/paddle_use_passes.h"
-#include "lite/api/test_helper.h"
 #include "lite/core/device_info.h"
 #include "lite/utils/cp_logging.h"
 #include "lite/utils/string.h"

+DEFINE_string(model_dir, "", "model dir");
 DEFINE_string(input_shape,
              "1,3,224,224",
-              "input shapes, separated by colon and comma");
-DEFINE_string(result_filename, "", "save test result");
+              "set input shapes according to the model, "
+              "separated by colon and comma, "
+              "such as 1,3,244,244:1,3,300,300.");
+DEFINE_int32(warmup, 0, "warmup times");
+DEFINE_int32(repeats, 1, "repeats times");
+DEFINE_int32(power_mode,
+             3,
+             "arm power mode: "
+             "0 for big cluster, "
+             "1 for little cluster, "
+             "2 for all cores, "
+             "3 for no bind");
+DEFINE_int32(threads, 1, "threads num");
+DEFINE_string(result_filename,
+              "result.txt",
+              "save benchmark "
+              "result to the file");
 DEFINE_bool(run_model_optimize,
            false,
-            "if set true, apply model_optimize_tool to model, use optimized "
-            "model to test");
-DEFINE_bool(is_quantized_model, false, "if set true, test the quantized model");
+            "if set true, apply model_optimize_tool to "
+            "model and use optimized model to test. ");
+DEFINE_bool(is_quantized_model,
+            false,
+            "if set true, "
+            "test the performance of the quantized model. ");

 namespace paddle {
 namespace lite_api {

+inline double GetCurrentUS() {
+  struct timeval time;
+  gettimeofday(&time, NULL);
+  return 1e+6 * time.tv_sec + time.tv_usec;
+}
+
 void OutputOptModel(const std::string& load_model_dir,
                    const std::string& save_optimized_model_dir,
                    const std::vector<std::vector<int64_t>>& input_shapes) {
@@ -58,7 +87,7 @@ void OutputOptModel(const std::string& load_model_dir,
      paddle::lite::string_format("rm -rf %s", save_optimized_model_dir.c_str())
          .c_str());
  if (ret == 0) {
-    LOG(INFO) << "delete old optimized model " << save_optimized_model_dir;
+    LOG(INFO) << "Delete old optimized model " << save_optimized_model_dir;
  }
  predictor->SaveOptimizedModel(save_optimized_model_dir,
                                LiteModelType::kNaiveBuffer);
@@ -69,23 +98,22 @@ void OutputOptModel(const std::string& load_model_dir,
 #ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
 void Run(const std::vector<std::vector<int64_t>>& input_shapes,
         const std::string& model_dir,
-         const int repeat,
-         const int thread_num,
-         const int warmup_times,
         const std::string model_name) {
+  // set config and create predictor
  lite_api::MobileConfig config;
-  config.set_threads(thread_num);
-  config.set_power_mode(LITE_POWER_NO_BIND);
+  config.set_threads(FLAGS_threads);
+  config.set_power_mode(static_cast<PowerMode>(FLAGS_power_mode));
  config.set_model_dir(model_dir);

  auto predictor = lite_api::CreatePaddlePredictor(config);

+  // set input
  for (int j = 0; j < input_shapes.size(); ++j) {
    auto input_tensor = predictor->GetInput(j);
    input_tensor->Resize(input_shapes[j]);
    auto input_data = input_tensor->mutable_data<float>();
    int input_num = 1;
-    for (int i = 0; i < input_shapes[j].size(); ++i) {
+    for (size_t i = 0; i < input_shapes[j].size(); ++i) {
      input_num *= input_shapes[j][i];
    }
    for (int i = 0; i < input_num; ++i) {
@@ -93,26 +121,36 @@ void Run(const std::vector<std::vector<int64_t>>& input_shapes,
    }
  }

-  for (int i = 0; i < warmup_times; ++i) {
+  // warmup
+  for (int i = 0; i < FLAGS_warmup; ++i) {
    predictor->Run();
  }

-  auto start = lite::GetCurrentUS();
-  for (int i = 0; i < repeat; ++i) {
+  // run
+  std::vector<float> perf_vct;
+  for (int i = 0; i < FLAGS_repeats; ++i) {
+    auto start = GetCurrentUS();
    predictor->Run();
+    auto end = GetCurrentUS();
+    perf_vct.push_back((end - start) / 1000.0);
  }
-  auto end = lite::GetCurrentUS();
-
-  std::FILE* pf = std::fopen(FLAGS_result_filename.c_str(), "a");
-  if (nullptr == pf) {
-    LOG(INFO) << "create result file error";
-    exit(0);
+  std::sort(perf_vct.begin(), perf_vct.end());
+  float min_res = perf_vct.back();
+  float max_res = perf_vct.front();
+  float total_res = accumulate(perf_vct.begin(), perf_vct.end(), 0.0);
+  float avg_res = total_res / FLAGS_repeats;
+
+  // save result
+  std::ofstream ofs(FLAGS_result_filename, std::ios::app);
+  if (!ofs.is_open()) {
+    LOG(FATAL) << "open result file failed";
  }
-  fprintf(pf,
-          "-- %-18s    avg = %5.4f ms\n",
-          model_name.c_str(),
-          (end - start) / repeat / 1000.0);
-  std::fclose(pf);
+  ofs.precision(5);
+  ofs << std::setw(20) << std::fixed << std::left << model_name;
+  ofs << "min = " << std::setw(12) << min_res;
+  ofs << "max = " << std::setw(12) << max_res;
+  ofs << "average = " << std::setw(12) << avg_res;
+  ofs << std::endl;
 }
 #endif

@@ -122,9 +160,7 @@ void Run(const std::vector<std::vector<int64_t>>& input_shapes,
 int main(int argc, char** argv) {
  gflags::ParseCommandLineFlags(&argc, &argv, true);
  if (FLAGS_model_dir == "" || FLAGS_result_filename == "") {
-    LOG(INFO) << "usage: "
-              << "--model_dir /path/to/your/model --result_filename "
-                 "/path/to/resultfile";
+    LOG(INFO) << "please run ./benchmark_bin --help to obtain usage.";
    exit(0);
  }

@@ -166,11 +202,11 @@ int main(int argc, char** argv) {

  std::vector<std::string> str_input_shapes = split_string(FLAGS_input_shape);
  std::vector<std::vector<int64_t>> input_shapes;
-  for (int i = 0; i < str_input_shapes.size(); ++i) {
+  for (size_t i = 0; i < str_input_shapes.size(); ++i) {
    input_shapes.push_back(get_shape(str_input_shapes[i]));
  }

-  // Output optimized model
+  // Output optimized model if needed
  if (FLAGS_run_model_optimize) {
    paddle::lite_api::OutputOptModel(
        FLAGS_model_dir, save_optimized_model_dir, input_shapes);
@@ -180,12 +216,7 @@ int main(int argc, char** argv) {
  // Run inference using optimized model
  std::string run_model_dir =
      FLAGS_run_model_optimize ? save_optimized_model_dir : FLAGS_model_dir;
-  paddle::lite_api::Run(input_shapes,
-                        run_model_dir,
-                        FLAGS_repeats,
-                        FLAGS_threads,
-                        FLAGS_warmup,
-                        model_name);
+  paddle::lite_api::Run(input_shapes, run_model_dir, model_name);
 #endif
  return 0;
 }
--- a/lite/tools/benchmark.sh
+++ b/lite/tools/benchmark.sh
@@ -2,13 +2,12 @@
 set -e

 # Check input
-if [ $# -lt  3 ];
+if [ $# -lt  2 ];
 then
    echo "Input error"
    echo "Usage:"
-    echo "  sh benchmark.sh <benchmark_bin_path> <benchmark_models_path> <result_filename>"
-    echo "  sh benchmark.sh <benchmark_bin_path> <benchmark_models_path> <result_filename> <is_run_model_optimize: [true|false]>"
-    echo "  sh benchmark.sh <benchmark_bin_path> <benchmark_models_path> <result_filename> <is_run_model_optimize: [true|false]> <is_run_quantized_model: [trur|false]>"
+    echo "  sh benchmark.sh benchmark_bin_path benchmark_models_path <result_filename> <input_shape> <power_mode: [0|1|2|3]> <is_run_model_optimize: [true|false]> <is_run_quantized_model: [trur|false]>"
+    echo "\npower_mode refer: 0 for big cluster, 1 for little cluster, 2 for all cores,  3 for no bind."
    exit
 fi

@@ -16,8 +15,10 @@ fi
 ANDROID_DIR=/data/local/tmp
 BENCHMARK_BIN=$1
 MODELS_DIR=$2
-RESULT_FILENAME=$3

+RESULT_FILENAME=result.txt
+INPUT_SHAPE=1,3,244,244
+POWER_MODE=3
 WARMUP=10
 REPEATS=30
 IS_RUN_MODEL_OPTIMIZE=false
@@ -26,13 +27,25 @@ NUM_THREADS_LIST=(1 2 4)
 MODELS_LIST=$(ls $MODELS_DIR)

 # Check input
+if [ $# -gt  2 ];
+then
+    RESULT_FILENAME=$3
+fi
 if [ $# -gt  3 ];
 then
-    IS_RUN_MODEL_OPTIMIZE=$4
+    INPUT_SHAPE=$4
 fi
 if [ $# -gt  4 ];
 then
-    IS_RUN_QUANTIZED_MODEL=$5
+    POWER_MODE=$5
+fi
+if [ $# -gt  5 ];
+then
+    IS_RUN_MODEL_OPTIMIZE=$6
+fi
+if [ $# -gt  6 ];
+then
+    IS_RUN_QUANTIZED_MODEL=$7
 fi

 # Adb push benchmark_bin, models
@@ -41,16 +54,18 @@ adb shell chmod +x $ANDROID_DIR/benchmark_bin
 adb push $MODELS_DIR $ANDROID_DIR

 # Run benchmark
-adb shell "echo 'PaddleLite Benchmark' > $ANDROID_DIR/$RESULT_FILENAME"
+adb shell "echo 'PaddleLite Benchmark (in ms)\n' > $ANDROID_DIR/$RESULT_FILENAME"
 for threads in ${NUM_THREADS_LIST[@]}; do
-    adb shell "echo Threads=$threads Warmup=$WARMUP Repeats=$REPEATS >> $ANDROID_DIR/$RESULT_FILENAME"
+    adb shell "echo threads=$threads warmup=$WARMUP repeats=$REPEATS input_shape=$INPUT_SHAPE power_mode=$POWER_MODE >> $ANDROID_DIR/$RESULT_FILENAME"
    for model_name in ${MODELS_LIST[@]}; do
      echo "Model=$model_name Threads=$threads"
      adb shell "$ANDROID_DIR/benchmark_bin \
                   --model_dir=$ANDROID_DIR/${MODELS_DIR}/$model_name \
+                   --input_shape=$INPUT_SHAPE \
                   --warmup=$WARMUP \
                   --repeats=$REPEATS \
                   --threads=$threads \
+                   --power_mode=$POWER_MODE \
                   --result_filename=$ANDROID_DIR/$RESULT_FILENAME \
                   --run_model_optimize=$IS_RUN_MODEL_OPTIMIZE \
                   --is_quantized_model=$IS_RUN_QUANTIZED_MODEL"