Add model benchmark for counting the execution time of each Op.

e5fba539 · liuqi · 383245be · e5fba539 · e5fba539 · e5fba539
Showing with 85 addition and 5 deletion

benchmark.sh benchmark.sh +68 -0

config config +3 -2

env.sh env.sh +1 -0

generate_model_code.sh generate_model_code.sh +6 -1

mace_tools.py mace_tools.py +6 -1

tuning_run.sh tuning_run.sh +1 -1

未找到文件。
--- a/benchmark.sh
+++ b/benchmark.sh
+#!/bin/bash
+Usage() {
+  echo "Usage: bash tools/benchmark.sh model_output_dir"
+}
+if [ $# -lt 1 ]; then
+  Usage
+  exit 1
+fi
+CURRENT_DIR=`dirname $0`
+source ${CURRENT_DIR}/env.sh
+MODEL_OUTPUT_DIR=$1
+if [ -f "$MODEL_OUTPUT_DIR/benchmark_model" ]; then
+  rm -rf $MODEL_OUTPUT_DIR/benchmark_model
+fi
+if [ x"$RUNTIME" = x"local" ]; then
+  bazel build --verbose_failures -c opt --strip always benchmark:benchmark_model \
+    --copt="-std=c++11" \
+    --copt="-D_GLIBCXX_USE_C99_MATH_TR1" \
+    --copt="-Werror=return-type" \
+    --copt="-DMACE_MODEL_TAG=${MODEL_TAG}" \
+    --define openmp=true \
+    --define production=true || exit 1
+  cp bazel-bin/benchmark/benchmark_model $MODEL_OUTPUT_DIR
+  MACE_CPP_MIN_VLOG_LEVEL=$VLOG_LEVEL \
+  ${MODEL_OUTPUT_DIR}/benchmark_model \
+      --device=${DEVICE_TYPE} \
+      --input_shape="${INPUT_SHAPE}"\
+      --output_shape="${OUTPUT_SHAPE}"\
+      --input_file=${MODEL_OUTPUT_DIR}/${INPUT_FILE_NAME} || exit 1
+else
+  bazel build --verbose_failures -c opt --strip always benchmark:benchmark_model \
+    --crosstool_top=//external:android/crosstool \
+    --host_crosstool_top=@bazel_tools//tools/cpp:toolchain \
+    --cpu=${ANDROID_ABI} \
+    --copt="-std=c++11" \
+    --copt="-D_GLIBCXX_USE_C99_MATH_TR1" \
+    --copt="-Werror=return-type" \
+    --copt="-DMACE_MODEL_TAG=${MODEL_TAG}" \
+    --define openmp=true \
+    --define production=true || exit 1
+  cp bazel-bin/benchmark/benchmark_model $MODEL_OUTPUT_DIR
+  adb shell "mkdir -p ${PHONE_DATA_DIR}" || exit 1
+  adb push ${MODEL_OUTPUT_DIR}/${INPUT_FILE_NAME} ${PHONE_DATA_DIR} || exit 1
+  adb push ${MODEL_OUTPUT_DIR}/benchmark_model ${PHONE_DATA_DIR} || exit 1
+  adb </dev/null shell \
+    LD_LIBRARY_PATH=${PHONE_DATA_DIR} \
+    MACE_CPP_MIN_VLOG_LEVEL=$VLOG_LEVEL \
+    MACE_RUN_PARAMETER_PATH=${PHONE_DATA_DIR}/mace_run.config \
+    MACE_LIMIT_OPENCL_KERNEL_TIME=${LIMIT_OPENCL_KERNEL_TIME} \
+    MACE_OPENCL_PROFILING=1 \
+    ${PHONE_DATA_DIR}/benchmark_model \
+    --device=${DEVICE_TYPE} \
+    --input_shape="${INPUT_SHAPE}"\
+    --output_shape="${OUTPUT_SHAPE}"\
+    --input_file=${PHONE_DATA_DIR}/${INPUT_FILE_NAME} || exit 1
+fi
--- a/config
+++ b/config
@@ -10,6 +10,7 @@ MODEL_TAG=mobile_net
 INPUT_SHAPE=1,64,64,3
 OUTPUT_SHAPE=1,64,64,2
 RUNTIME=gpu
-TARGET_ABI=armeabi-v7a ; Or 'arm64-v8a', 'host' etc.
+TARGET_ABI=armeabi-v7a
 LIMIT_OPENCL_KERNEL_TIME=0
-DSP_MODE=0 ; used only for dsp
+DSP_MODE=0
+BENCHMARK_FLAG=0
--- a/env.sh
+++ b/env.sh
+#!/usr/bin/env bash
 LIBMACE_TAG=`git describe --abbrev=0 --tags`
 VLOG_LEVEL=0

--- a/generate_model_code.sh
+++ b/generate_model_code.sh
@@ -10,6 +10,11 @@ if [ ${DSP_MODE} ]; then
    DSP_MODE_FLAG="--dsp_mode=${DSP_MODE}"
 fi
+OBFUSCATE=True
+if [ "${BENCHMARK_FLAG}" = "1" ]; then
+  OBFUSCATE=False
+fi
 bazel-bin/lib/python/tools/tf_converter --input=${TF_MODEL_FILE_PATH} \
                                        --output=${MODEL_CODEGEN_DIR}/model.cc \
                                        --input_node=${TF_INPUT_NODE} \
@@ -21,4 +26,4 @@ bazel-bin/lib/python/tools/tf_converter --input=${TF_MODEL_FILE_PATH} \
                                        --model_tag=${MODEL_TAG} \
                                        --input_shape=${INPUT_SHAPE} \
                                        ${DSP_MODE_FLAG} \
-                                        --obfuscate=True || exit 1
+                                        --obfuscate=${OBFUSCATE} || exit 1
--- a/mace_tools.py
+++ b/mace_tools.py
@@ -89,6 +89,9 @@ def tuning_run(model_output_dir, running_round, tuning, production_mode):
      model_output_dir, running_round, int(tuning), int(production_mode))
  run_command(command)
+def benchmark_model(model_output_dir):
+  command = "bash tools/benchmark.sh {}".format(model_output_dir)
+  run_command(command)
 def run_model(model_output_dir, running_round):
  tuning_run(model_output_dir, running_round, False, False)
@@ -139,7 +142,6 @@ def build_production_code():
 def merge_libs_and_tuning_results(output_dir, model_output_dirs):
  pull_or_not = False
  generate_production_code(model_output_dirs, pull_or_not)
-  production_or_not = True
  build_production_code()
  model_output_dirs_str = ",".join(model_output_dirs)
@@ -282,6 +284,9 @@ def main(unused_args):
    if FLAGS.mode == "run" or FLAGS.mode == "validate" or FLAGS.mode == "all":
      run_model(model_output_dir, FLAGS.round)
+    if FLAGS.mode == "benchmark":
+      benchmark_model(model_output_dir)
    if FLAGS.mode == "validate" or FLAGS.mode == "all":
      validate_model(model_output_dir)

--- a/tuning_run.sh
+++ b/tuning_run.sh
 #!/bin/bash
 Usage() {
-  echo "Usage: bash tools/run_and_tuning.sh model_output_dir round tuning production_mode"
+  echo "Usage: bash tools/tuning_run.sh model_output_dir round tuning production_mode"
 }
 if [ $# -lt 4 ]; then