From 8af285271fd3f94c5d23df9b20662ecd8e5d7f0a Mon Sep 17 00:00:00 2001
From: liuqi <liuqi10@xiaomi.com>
Date: Wed, 30 May 2018 14:26:10 +0800
Subject: [PATCH] Update docs for latest usage.

---
 .../create_a_model_deployment.rst             |  39 ++--
 docs/getting_started/how_to_build.rst         |  30 +--
 .../models/demo_app_models.yaml               |   2 +-
 mace/benchmark/statistics.cc                  |   2 +-
 tools/bazel_adb_run.py                        |   9 +-
 tools/{mace_tools.py => converter.py}         | 196 +++++++++++------
 tools/sh_commands.py                          | 201 ++++++++----------
 7 files changed, 263 insertions(+), 216 deletions(-)
 rename tools/{mace_tools.py => converter.py} (87%)

diff --git a/docs/getting_started/create_a_model_deployment.rst b/docs/getting_started/create_a_model_deployment.rst
index d949c7b7..918c84a2 100644
--- a/docs/getting_started/create_a_model_deployment.rst
+++ b/docs/getting_started/create_a_model_deployment.rst
@@ -30,22 +30,31 @@ Configurations
     :header-rows: 1
     :align: left
 
-    * - Configuration key
-      - Description
+    * - library_name
+      - library name
     * - target_abis
       - The target ABI to build, can be one or more of 'host', 'armeabi-v7a' or 'arm64-v8a'
+    * - target_socs
+      - build for specified socs if you just want use the model for that socs.
     * - embed_model_data
-      - Whether embedding model weights as the code, default to 1
+      - Whether embedding model weights as the code, default to 0
+    * - build_type
+      - model build type, can be ['proto', 'code']. 'proto' for converting model to ProtoBuf file and 'code' for converting model to c++ code.
+    * - model_name
+      - model name. should be unique if there are multiple models.
+        **LIMIT: if build_type is code, model_name will used in c++ code so that model_name must fulfill c++ name specification.**
     * - platform
-      - The source framework, tensorflow or caffe
+      - The source framework, one of [tensorflow, caffe]
     * - model_file_path
       - The path of the model file, can be local or remote
-    * - weight_file_path
-      - The path of the model weights file, used by Caffe model
     * - model_sha256_checksum
       - The SHA256 checksum of the model file
+    * - weight_file_path
+      - The path of the model weights file, used by Caffe model
     * - weight_sha256_checksum
       - The SHA256 checksum of the weight file, used by Caffe model
+    * - subgraphs
+      - subgraphs key. ** DO NOT EDIT **
     * - input_tensors
       - The input tensor names (tensorflow), top name of inputs' layer (caffe). one or more strings
     * - output_tensors
@@ -55,14 +64,16 @@ Configurations
     * - output_shapes
       - The shapes of the output tensors, in NHWC order
     * - runtime
-      - The running device, one of CPU, GPU or DSP
+      - The running device, one of [cpu, gpu, dsp, cpu_gpu]. cpu_gpu contains cpu and gpu model definition so you can run the model on both cpu and gpu.
+    * - data_type
+      - [optional] The data type used for specified runtime. [fp16_fp32, fp32_fp32] for gpu, default is fp16_fp32. [fp32] for cpu. [uint8] for dsp.
     * - limit_opencl_kernel_time
-      - Whether splitting the OpenCL kernel within 1 ms to keep UI responsiveness, default to 0
-    * - dsp_mode
-      - Control the DSP precision and performance, default to 0 usually works for most cases
+      - [optional] Whether splitting the OpenCL kernel within 1 ms to keep UI responsiveness, default to 0
+    * - nnlib_graph_mode
+      - [optional] Control the DSP precision and performance, default to 0 usually works for most cases
     * - obfuscate
-      - Whether to obfuscate the model operator name, default to 0
-    * - fast_conv
-      - Whether to enable Winograd convolution, **will increase memory consumption**
+      - [optional] Whether to obfuscate the model operator name, default to 0
+    * - winograd
+      - [optional] Whether to enable Winograd convolution, **will increase memory consumption**
     * - input_files
-      - Specify Numpy validation inputs. When not provided, [-1, 1] random values will be used
+      - [optional] Specify Numpy validation inputs. When not provided, [-1, 1] random values will be used
diff --git a/docs/getting_started/how_to_build.rst b/docs/getting_started/how_to_build.rst
index ea9da202..b9ff430a 100644
--- a/docs/getting_started/how_to_build.rst
+++ b/docs/getting_started/how_to_build.rst
@@ -166,11 +166,11 @@ Caffe目前只支持最新版本，旧版本请使用Caffe的工具进行升级
 
 模型静态库的生成需要使用目标机型，\ ***并且要求必须在目标SOC的机型上编译生成静态库。***
 
-我们提供了\ ``mace_tools.py``\ 工具，可以将模型文件转换成静态库。\ ``tools/mace_tools.py``\ 使用步骤：
+我们提供了\ ``converter.py``\ 工具，可以将模型文件转换成静态库。\ ``tools/converter.py``\ 使用步骤：
 
 
 
-3.2 运行\ ``tools/mace_tools.py``\ 脚本
+3.2 运行\ ``tools/converter.py``\ 脚本
 
 **Commands**
 
@@ -181,8 +181,8 @@ Caffe目前只支持最新版本，旧版本请使用Caffe的工具进行升级
             build模型静态库以及测试工具。
 
         * *--config* (type=str,  default="",  required)：模型配置yaml文件路径.
-        * *--tuning* (optional)：是否为特定SOC调制GPU参数.
-        * *--enable_openmp* (optional)：是否启用openmp.
+        * *--tuning* (default=false, optional)：是否为特定SOC调制GPU参数.
+        * *--enable_openmp* (default=true, optional)：是否启用openmp.
 
     **run**
 
@@ -192,10 +192,10 @@ Caffe目前只支持最新版本，旧版本请使用Caffe的工具进行升级
 
         * *--config* (type=str,  default="",  required)：模型配置yaml文件路径.
         * *--round* (type=int, default=1,  optional)：模型运行次数。
-        * *--validate* (optional): 是否需要验证运行结果与框架运行结果是否一致。
+        * *--validate* (default=false, optional): 是否需要验证运行结果与框架运行结果是否一致。
         * *--caffe_env* (type=local/docker, default=docker,  optional)：当vaildate时，可以选择指定caffe环境,local表示本地，docker表示使用docker容器.
         * *--restart_round* (type=int, default=1,  optional)：模型重启次数。
-        * *--check_gpu_out_of_memory* (optional): 是否需要检查gpu内存越界。
+        * *--check_gpu_out_of_memory* (default=false, optional): 是否需要检查gpu内存越界。
         * *--vlog_level* (type=int[0-5], default=0,  optional)：详细日志级别.
 
         .. warning::
@@ -256,26 +256,26 @@ Caffe目前只支持最新版本，旧版本请使用Caffe的工具进行升级
 .. code:: sh
 
     # print help message
-    python tools/mace_tools.py -h
-    python tools/mace_tools.py build -h
-    python tools/mace_tools.py run -h
-    python tools/mace_tools.py benchmark -h
+    python tools/converter.py -h
+    python tools/converter.py build -h
+    python tools/converter.py run -h
+    python tools/converter.py benchmark -h
 
     # 仅编译模型和生成静态库
-    python tools/mace_tools.py build --config=models/config.yaml
+    python tools/converter.py build --config=models/config.yaml
 
     # 测试模型的运行时间
-    python tools/mace_tools.py run --config=models/config.yaml --round=100
+    python tools/converter.py run --config=models/config.yaml --round=100
 
     # 对比编译好的模型在mace上与直接使用tensorflow或者caffe运行的结果，相似度使用`余弦距离表示`
     # 其中使用OpenCL设备，默认相似度大于等于`0.995`为通过；DSP设备下，相似度需要达到`0.930`。
-    python tools/mace_tools.py run --config=models/config.yaml --validate
+    python tools/converter.py run --config=models/config.yaml --validate
 
     # 模型Benchmark：查看每个Op的运行时间
-    python tools/mace_tools.py benchmark --config=models/config.yaml
+    python tools/converter.py benchmark --config=models/config.yaml
 
     # 查看模型运行时占用内存（如果有多个模型，可能需要注释掉一部分配置，只剩一个模型的配置）
-    python tools/mace_tools.py run --config=models/config.yaml --round=10000 &
+    python tools/converter.py run --config=models/config.yaml --round=10000 &
     adb shell dumpsys meminfo | grep mace_run
     sleep 10
     kill %1
diff --git a/docs/getting_started/models/demo_app_models.yaml b/docs/getting_started/models/demo_app_models.yaml
index bdd68edd..f78dc40a 100644
--- a/docs/getting_started/models/demo_app_models.yaml
+++ b/docs/getting_started/models/demo_app_models.yaml
@@ -7,7 +7,7 @@ target_socs: [msm8998]
 embed_model_data: 1
 build_type: code # 模型build类型。code表示将模型转为代码，proto表示将模型转为protobuf文件
 models: # 一个配置文件可以包含多个模型的配置信息，最终生成的库中包含多个模型
-  first_net: # 模型的标签，在调度模型的时候，会用这个变量，必须唯一
+  model_name: # 模型的标签，在调度模型的时候，会用这个变量，必须唯一
     platform: tensorflow
     model_file_path: path/to/model64.pb # also support http:// and https://
     model_sha256_checksum: 7f7462333406e7dea87222737590ebb7d94490194d2f21a7d72bafa87e64e9f9
diff --git a/mace/benchmark/statistics.cc b/mace/benchmark/statistics.cc
index ddc1c058..bc68dd64 100644
--- a/mace/benchmark/statistics.cc
+++ b/mace/benchmark/statistics.cc
@@ -238,7 +238,7 @@ std::string OpStat::StatByNodeType() const {
 std::string OpStat::Summary() const {
   std::stringstream stream;
   if (!records_.empty()) {
-    stream << total_time_.ToString("Summary") << std::endl;
+    stream << total_time_.ToString("Summary of Ops' Stat") << std::endl;
   }
 
   stream << records_.size() << " ops total." << std::endl;
diff --git a/tools/bazel_adb_run.py b/tools/bazel_adb_run.py
index c67c2a85..40c4b5fb 100644
--- a/tools/bazel_adb_run.py
+++ b/tools/bazel_adb_run.py
@@ -136,11 +136,6 @@ def main(unused_args):
     sh_commands.gen_mace_version()
     sh_commands.gen_tuning_param_code([])
 
-    strip = "always"
-    debug = False
-    if FLAGS.address_sanitizer:
-        strip = "never"
-        debug = True
     for target_abi in target_abis:
         sh_commands.bazel_build(target, abi=target_abi,
                                 enable_neon=FLAGS.enable_neon,
@@ -158,10 +153,10 @@ def main(unused_args):
                     host_bin_path,
                     bin_name,
                     args=FLAGS.args,
-                    opencl_profiling=1,
+                    opencl_profiling=True,
                     vlog_level=0,
                     device_bin_path="/data/local/tmp/mace",
-                    out_of_range_check=1,
+                    out_of_range_check=True,
                     address_sanitizer=FLAGS.address_sanitizer)
                 device_properties = sh_commands.adb_getprop_by_serialno(
                     serialno)
diff --git a/tools/mace_tools.py b/tools/converter.py
similarity index 87%
rename from tools/mace_tools.py
rename to tools/converter.py
index 7bafee33..58bd3a5a 100644
--- a/tools/mace_tools.py
+++ b/tools/converter.py
@@ -12,21 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# python tools/mace_tools.py \
-#     --config=tools/example.yaml \
-#     --round=100 \
-#     --mode=all
-
 import argparse
 import filelock
 import hashlib
 import os
+import re
 import sh
 import subprocess
 import sys
 import urllib
 import yaml
-import re
+
 from enum import Enum
 
 import sh_commands
@@ -46,13 +42,23 @@ MODEL_OUTPUT_DIR_NAME = 'model'
 BUILD_TMP_DIR_NAME = '_tmp'
 BUILD_TMP_GENERAL_OUTPUT_DIR_NAME = 'general'
 OUTPUT_LIBRARY_DIR_NAME = 'library'
+CL_BUILT_KERNEL_FILE_NAME = "mace_cl_compiled_program.bin"
+CL_PLATFORM_INFO_FILE_NAME = "mace_cl_platform_info.txt"
+CODEGEN_BASE_DIR = 'mace/codegen'
+MODEL_CODEGEN_DIR = CODEGEN_BASE_DIR + '/models'
+MACE_RUN_TARGET = "//mace/tools/validation:mace_run"
 
 ABITypeStrs = [
-    "armeabi-v7a",
-    "arm64-v8a",
-    "host",
+    'armeabi-v7a',
+    'arm64-v8a',
+    'host',
 ]
-ABIType = Enum('ABIType', [(ele, ele) for ele in ABITypeStrs], type=str)
+
+
+class ABIType(object):
+    armeabi_v7a = 'armeabi-v7a'
+    arm64_v8a = 'arm64-v8a'
+    host = 'host'
 
 
 PlatformTypeStrs = [
@@ -92,6 +98,13 @@ GPUDataTypeStrs = [
 GPUDataType = Enum('GPUDataType', [(ele, ele) for ele in GPUDataTypeStrs],
                    type=str)
 
+DSPDataTypeStrs = [
+    "uint8",
+]
+
+DSPDataType = Enum('DSPDataType', [(ele, ele) for ele in DSPDataTypeStrs],
+                   type=str)
+
 
 class DefaultValues(object):
     omp_num_threads = -1,
@@ -129,6 +142,8 @@ class YAMLKeyword(object):
 class ModuleName(object):
     YAML_CONFIG = 'YAML CONFIG'
     MODEL_CONVERTER = 'Model Converter'
+    RUN = 'RUN'
+    BENCHMARK = 'Benchmark'
 
 
 CPP_KEYWORDS = [
@@ -171,11 +186,13 @@ def parse_device_type(runtime):
 
 def get_hexagon_mode(configs):
     runtime_list = []
-    for model_name in configs["models"]:
-        model_runtime = configs["models"][model_name].get("runtime", "")
+    for model_name in configs[YAMLKeyword.models]:
+        model_runtime =\
+            configs[YAMLKeyword.models][model_name].get(
+                YAMLKeyword.runtime, "")
         runtime_list.append(model_runtime.lower())
 
-    if "dsp" in runtime_list:
+    if RuntimeType.dsp in runtime_list:
         return True
     return False
 
@@ -200,7 +217,7 @@ def format_model_config(config_file_path):
 
     library_name = configs.get(YAMLKeyword.library_name, "")
     mace_check(len(library_name) > 0,
-               ModuleName.YAML_CONFIG, "library name shuold not be empty")
+               ModuleName.YAML_CONFIG, "library name should not be empty")
 
     target_abis = configs.get(YAMLKeyword.target_abis, [])
     mace_check((isinstance(target_abis, list) and len(target_abis) > 0),
@@ -216,7 +233,8 @@ def format_model_config(config_file_path):
     elif not isinstance(target_socs, list):
         configs[YAMLKeyword.target_socs] = [target_socs]
 
-    if ABIType.host not in target_abis:
+    if ABIType.armeabi_v7a in target_abis \
+            or ABIType.arm64_v8a in target_abis:
         available_socs = sh_commands.adb_get_all_socs()
         if YAMLKeyword.target_socs in configs:
             target_socs = set(configs[YAMLKeyword.target_socs])
@@ -261,7 +279,7 @@ def format_model_config(config_file_path):
         mace_check((model_name[0] == '_' or model_name[0].isalpha())
                    and bool(model_name_reg.match(model_name)),
                    ModuleName.YAML_CONFIG,
-                   "model name shuold Meet the c++ naming convention"
+                   "model name should Meet the c++ naming convention"
                    " which start with '_' or alpha"
                    " and only contain alpha, number and '_'")
 
@@ -318,6 +336,15 @@ def format_model_config(config_file_path):
             else:
                 model_config[YAMLKeyword.data_type] =\
                     GPUDataType.fp16_fp32.value
+        elif runtime == RuntimeType.dsp:
+            if len(data_type) > 0:
+                mace_check(data_type in DSPDataTypeStrs,
+                           ModuleName.YAML_CONFIG,
+                           "'data_type' must be in " + str(DSPDataTypeStrs)
+                           + " for dsp runtime")
+            else:
+                model_config[YAMLKeyword.data_type] = \
+                    DSPDataType.uint8.value
 
         subgraphs = model_config.get(YAMLKeyword.subgraphs, "")
         mace_check(len(subgraphs) > 0, ModuleName.YAML_CONFIG,
@@ -342,15 +369,17 @@ def format_model_config(config_file_path):
             if value == "":
                 model_config[key] = 0
 
-        validation_inputs_data = model_config.get("validation_inputs_data",
-                                                  [])
-        model_config["validation_inputs_data"] = validation_inputs_data
+        validation_inputs_data = model_config.get(
+            YAMLKeyword.validation_inputs_data, [])
         if not isinstance(validation_inputs_data, list):
-            model_config["validation_inputs_data"] = [
+            model_config[YAMLKeyword.validation_inputs_data] = [
                 validation_inputs_data]
+        else:
+            model_config[YAMLKeyword.validation_inputs_data] = \
+                validation_inputs_data
 
-        weight_file_path = model_config.get("weight_file_path", "")
-        model_config["weight_file_path"] = weight_file_path
+        weight_file_path = model_config.get(YAMLKeyword.weight_file_path, "")
+        model_config[YAMLKeyword.weight_file_path] = weight_file_path
 
     return configs
 
@@ -359,9 +388,9 @@ def get_build_binary_dir(library_name, target_abi, target_soc,
                          serial_num):
     if not target_soc or not serial_num:
         binary_path_digest = md5sum(target_abi)
+        binary_path_digest = "%s_%s" % (target_abi, binary_path_digest)
     else:
-        device_name = sh_commands.adb_get_device_name_by_serialno(serial_num)\
-                .replace(' ', '')
+        device_name = sh_commands.adb_get_device_name_by_serialno(serial_num)
         binary_path_digest = md5sum(target_abi + target_soc + serial_num)
         binary_path_digest = "%s_%s_%s" % \
                              (device_name, target_soc, binary_path_digest)
@@ -386,7 +415,7 @@ def get_build_model_dirs(library_name, model_name, target_abi, target_soc,
         device_name = \
             sh_commands.adb_get_device_name_by_serialno(serial_num)
         model_output_dir = "%s/%s_%s/%s" % (
-            model_output_base_dir, device_name.replace(' ', ''),
+            model_output_base_dir, device_name,
             target_soc, target_abi)
 
     mace_model_dir = \
@@ -401,21 +430,16 @@ def get_build_model_dirs(library_name, model_name, target_abi, target_soc,
 def pull_opencl_binary_and_tuning_param(target_abi,
                                         serialno,
                                         model_output_dirs):
-    cl_built_kernel_file_name = "mace_cl_compiled_program.bin"
-    cl_platform_info_file_name = "mace_cl_platform_info.txt"
     sh_commands.pull_binaries(target_abi, serialno, model_output_dirs,
-                              cl_built_kernel_file_name,
-                              cl_platform_info_file_name)
+                              CL_BUILT_KERNEL_FILE_NAME,
+                              CL_PLATFORM_INFO_FILE_NAME)
 
 
 def gen_opencl_and_tuning_code(model_output_dirs):
-    cl_built_kernel_file_name = "mace_cl_compiled_program.bin"
-    cl_platform_info_file_name = "mace_cl_platform_info.txt"
-
     # generate opencl binary code
     sh_commands.gen_opencl_binary_code(model_output_dirs,
-                                       cl_built_kernel_file_name,
-                                       cl_platform_info_file_name)
+                                       CL_BUILT_KERNEL_FILE_NAME,
+                                       CL_PLATFORM_INFO_FILE_NAME)
 
     sh_commands.gen_tuning_param_code(model_output_dirs)
 
@@ -475,8 +499,9 @@ def convert_model(configs):
     library_name = configs[YAMLKeyword.library_name]
     if not os.path.exists(BUILD_OUTPUT_DIR):
         os.makedirs(BUILD_OUTPUT_DIR)
-    elif not os.path.exists(os.path.join(BUILD_OUTPUT_DIR, library_name)):
-        os.makedirs(os.path.join(BUILD_OUTPUT_DIR, library_name))
+    elif os.path.exists(os.path.join(BUILD_OUTPUT_DIR, library_name)):
+        sh.rm("-rf", os.path.join(BUILD_OUTPUT_DIR, library_name))
+    os.makedirs(os.path.join(BUILD_OUTPUT_DIR, library_name))
 
     model_output_dir = \
         '%s/%s/%s' % (BUILD_OUTPUT_DIR, library_name, MODEL_OUTPUT_DIR_NAME)
@@ -527,12 +552,10 @@ def convert_model(configs):
                                  "weight file sha256checksum not match")
 
         data_type = model_config[YAMLKeyword.data_type]
-        if ABIType.host.value in configs[YAMLKeyword.target_abis]:
-            data_type = CPUDataType.fp32.value
         # TODO(liuqi): support multiple subgraphs
         subgraphs = model_config[YAMLKeyword.subgraphs]
 
-        model_codegen_dir = "mace/codegen/models/%s" % model_name
+        model_codegen_dir = "%s/%s" % (MODEL_CODEGEN_DIR, model_name)
         sh_commands.gen_model_code(
             model_codegen_dir,
             model_config[YAMLKeyword.platform],
@@ -561,14 +584,13 @@ def convert_model(configs):
                 output_dir=model_output_dir
             )
 
-        MaceLogger.header(
+        MaceLogger.summary(
             StringFormatter.block("Model %s converted" % model_name))
 
 
 def build_specific_lib(target_abi, target_soc, serial_num,
                        configs, tuning, enable_openmp,
                        address_sanitizer):
-    mace_run_target = "//mace/tools/validation:mace_run"
     library_name = configs[YAMLKeyword.library_name]
     build_type = configs[YAMLKeyword.build_type]
     embed_model_data = configs[YAMLKeyword.embed_model_data]
@@ -583,7 +605,7 @@ def build_specific_lib(target_abi, target_soc, serial_num,
 
     gen_opencl_and_tuning_code([])
     sh_commands.bazel_build(
-        mace_run_target,
+        MACE_RUN_TARGET,
         abi=target_abi,
         hexagon_mode=hexagon_mode,
         enable_openmp=enable_openmp,
@@ -608,7 +630,7 @@ def build_specific_lib(target_abi, target_soc, serial_num,
         os.makedirs(model_output_dir)
 
         # build for specified soc
-        if not address_sanitizer and target_abi != ABIType.host \
+        if not address_sanitizer and tuning and target_abi != ABIType.host \
                 and target_soc is not None and \
                 model_runtime in [RuntimeType.gpu, RuntimeType.cpu_gpu]:
             sh_commands.clear_phone_data_dir(serial_num, PHONE_DATA_DIR)
@@ -653,7 +675,7 @@ def build_specific_lib(target_abi, target_soc, serial_num,
     if binary_changed:
         gen_opencl_and_tuning_code(model_output_dirs)
         sh_commands.bazel_build(
-            mace_run_target,
+            MACE_RUN_TARGET,
             abi=target_abi,
             hexagon_mode=hexagon_mode,
             enable_openmp=enable_openmp,
@@ -671,6 +693,7 @@ def build_specific_lib(target_abi, target_soc, serial_num,
 
     # generate library
     sh_commands.merge_libs(target_soc,
+                           serial_num,
                            target_abi,
                            library_name,
                            BUILD_OUTPUT_DIR,
@@ -705,17 +728,18 @@ def generate_library(configs, tuning, enable_openmp, address_sanitizer):
 
     target_socs = configs[YAMLKeyword.target_socs]
     for target_abi in configs[YAMLKeyword.target_abis]:
-        if not target_socs or target_abi == ABIType.host.value:
+        if not target_socs or target_abi == ABIType.host:
             build_specific_lib(target_abi, None, None, configs,
                                tuning, enable_openmp, address_sanitizer)
         else:
             for target_soc in target_socs:
-                serial_num = sh_commands.get_target_soc_serial_number(
-                    target_soc)
-                with sh_commands.device_lock(serial_num):
-                    build_specific_lib(target_abi, target_soc, serial_num,
-                                       configs, tuning, enable_openmp,
-                                       address_sanitizer)
+                serial_nums = \
+                    sh_commands.get_target_socs_serialnos([target_soc])
+                for serial_num in serial_nums:
+                    with sh_commands.device_lock(serial_num):
+                        build_specific_lib(target_abi, target_soc, serial_num,
+                                           configs, tuning, enable_openmp,
+                                           address_sanitizer)
 
     # package library
     sh_commands.packaging_lib(BUILD_OUTPUT_DIR,
@@ -804,8 +828,19 @@ def run_specific_target(flags, configs, target_abi,
     else:
         build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi,
                                                     target_soc, serial_num)
+    mace_check(os.path.exists(build_tmp_binary_dir),
+               ModuleName.RUN,
+               'You should build before run.')
 
     for model_name in configs[YAMLKeyword.models]:
+        if target_abi == ABIType.host:
+            device_name = ABIType.host
+        else:
+            device_name =\
+                sh_commands.adb_get_device_name_by_serialno(serial_num)
+        MaceLogger.header(
+            StringFormatter.block(
+                "Run model %s on %s" % (model_name, device_name)))
         model_config = configs[YAMLKeyword.models][model_name]
         model_runtime = model_config[YAMLKeyword.runtime]
         subgraphs = model_config[YAMLKeyword.subgraphs]
@@ -820,6 +855,10 @@ def run_specific_target(flags, configs, target_abi,
                 get_build_model_dirs(library_name, model_name, target_abi,
                                      target_soc, serial_num,
                                      model_config[YAMLKeyword.model_file_path])
+        mace_check(os.path.exists(model_output_dir)
+                   and os.path.exists(mace_model_dir),
+                   ModuleName.RUN,
+                   'You should build before run.')
         if target_abi != ABIType.host:
             sh_commands.clear_phone_data_dir(serial_num, PHONE_DATA_DIR)
 
@@ -869,9 +908,9 @@ def run_specific_target(flags, configs, target_abi,
             )
             if flags.validate:
                 model_file_path, weight_file_path = get_model_files_path(
-                    model_config["model_file_path"],
+                    model_config[YAMLKeyword.model_file_path],
                     model_output_base_dir,
-                    model_config["weight_file_path"])
+                    model_config[YAMLKeyword.weight_file_path])
 
                 sh_commands.validate_model(
                     abi=target_abi,
@@ -899,20 +938,21 @@ def run_mace(flags):
     target_socs = configs[YAMLKeyword.target_socs]
     if not target_socs:
         target_socs = sh_commands.adb_get_all_socs()
-    if ABIType.host not in configs[YAMLKeyword.target_abis] \
-            and not target_socs:
-        MaceLogger.warning('There is no device plugin the computer.')
 
     for target_abi in configs[YAMLKeyword.target_abis]:
         if target_abi == ABIType.host:
             run_specific_target(flags, configs, target_abi, None, None)
         else:
             for target_soc in target_socs:
-                serial_num = sh_commands.get_target_soc_serial_number(
-                    target_soc)
-                with sh_commands.device_lock(serial_num):
-                    run_specific_target(flags, configs, target_abi,
-                                        target_soc, serial_num)
+                serial_nums = \
+                    sh_commands.get_target_socs_serialnos([target_soc])
+                mace_check(serial_nums,
+                           ModuleName.RUN,
+                           'There is no device with soc: ' + target_soc)
+                for serial_num in serial_nums:
+                    with sh_commands.device_lock(serial_num):
+                        run_specific_target(flags, configs, target_abi,
+                                            target_soc, serial_num)
 
 
 ################################
@@ -928,8 +968,19 @@ def bm_specific_target(flags, configs, target_abi, target_soc, serial_num):
     else:
         build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi,
                                                     target_soc, serial_num)
+    mace_check(os.path.exists(build_tmp_binary_dir),
+               ModuleName.BENCHMARK,
+               'You should build before benchmark.')
 
     for model_name in configs[YAMLKeyword.models]:
+        if target_abi == ABIType.host:
+            device_name = ABIType.host
+        else:
+            device_name = \
+                sh_commands.adb_get_device_name_by_serialno(serial_num)
+        MaceLogger.header(
+            StringFormatter.block(
+                "Benchmark model %s on %s" % (model_name, device_name)))
         model_config = configs[YAMLKeyword.models][model_name]
         model_runtime = model_config[YAMLKeyword.runtime]
         subgraphs = model_config[YAMLKeyword.subgraphs]
@@ -944,6 +995,10 @@ def bm_specific_target(flags, configs, target_abi, target_soc, serial_num):
                 get_build_model_dirs(library_name, model_name, target_abi,
                                      target_soc, serial_num,
                                      model_config[YAMLKeyword.model_file_path])
+        mace_check(os.path.exists(model_output_dir)
+                   and os.path.exists(mace_model_dir),
+                   ModuleName.BENCHMARK,
+                   'You should build before benchmark.')
         if target_abi != ABIType.host:
             sh_commands.clear_phone_data_dir(serial_num, PHONE_DATA_DIR)
 
@@ -990,20 +1045,21 @@ def benchmark_model(flags):
     target_socs = configs[YAMLKeyword.target_socs]
     if not target_socs:
         target_socs = sh_commands.adb_get_all_socs()
-    if ABIType.host.value not in configs[YAMLKeyword.target_abis] \
-            and not target_socs:
-        MaceLogger.warning('There is no device plugin the computer.')
 
     for target_abi in configs[YAMLKeyword.target_abis]:
-        if target_abi == ABIType.host.value:
+        if target_abi == ABIType.host:
             bm_specific_target(flags, configs, target_abi, None, None)
         else:
             for target_soc in target_socs:
-                serial_num = sh_commands.get_target_soc_serial_number(
-                    target_soc)
-                with sh_commands.device_lock(serial_num):
-                    bm_specific_target(flags, configs, target_abi,
-                                       target_soc, serial_num)
+                serial_nums = \
+                    sh_commands.get_target_socs_serialnos([target_soc])
+                mace_check(serial_nums,
+                           ModuleName.BENCHMARK,
+                           'There is no device with soc: ' + target_soc)
+                for serial_num in serial_nums:
+                    with sh_commands.device_lock(serial_num):
+                        bm_specific_target(flags, configs, target_abi,
+                                           target_soc, serial_num)
 
 
 ################################
diff --git a/tools/sh_commands.py b/tools/sh_commands.py
index 81f69398..28bcaca0 100644
--- a/tools/sh_commands.py
+++ b/tools/sh_commands.py
@@ -165,7 +165,7 @@ def adb_getprop_by_serialno(serialno):
 
 def adb_get_device_name_by_serialno(serialno):
     props = adb_getprop_by_serialno(serialno)
-    return props.get("ro.product.model", "")
+    return props.get("ro.product.model", "").replace(' ', '')
 
 
 def adb_supported_abis(serialno):
@@ -201,10 +201,10 @@ def adb_run(abi,
             host_bin_path,
             bin_name,
             args="",
-            opencl_profiling=1,
+            opencl_profiling=True,
             vlog_level=0,
             device_bin_path="/data/local/tmp/mace",
-            out_of_range_check=1,
+            out_of_range_check=True,
             address_sanitizer=False):
     host_bin_full_path = "%s/%s" % (host_bin_path, bin_name)
     device_bin_full_path = "%s/%s" % (device_bin_path, bin_name)
@@ -225,11 +225,13 @@ def adb_run(abi,
             adb_push(find_asan_rt_library(abi), device_bin_path, serialno)
             ld_preload = "LD_PRELOAD=%s/%s" % (device_bin_path,
                                                asan_rt_library_names(abi)),
+        opencl_profiling = 1 if opencl_profiling else 0
+        out_of_range_check = 1 if out_of_range_check else 0
         print("Run %s" % device_bin_full_path)
 
         stdout_buff = []
         process_output = make_output_processor(stdout_buff)
-        p = sh.adb(
+        sh.adb(
             "-s",
             serialno,
             "shell",
@@ -239,9 +241,7 @@ def adb_run(abi,
             "MACE_CPP_MIN_VLOG_LEVEL=%d" % vlog_level,
             "%s %s" % (device_bin_full_path, args),
             _out=process_output,
-            _bg=True,
-            _err_to_out=True)
-        p.wait()
+            _fg=True)
         return "".join(stdout_buff)
 
 
@@ -309,12 +309,10 @@ def bazel_build(target,
         bazel_args += ("--config", "asan")
     else:
         bazel_args += ("--config", "optimization")
-    p = sh.bazel(
+    sh.bazel(
         _out=process_output,
-        _bg=True,
-        _err_to_out=True,
+        _fg=True,
         *bazel_args)
-    p.wait()
     print("Build done!\n")
     return "".join(stdout_buff)
 
@@ -322,13 +320,11 @@ def bazel_build(target,
 def bazel_build_common(target, build_args=""):
     stdout_buff = []
     process_output = make_output_processor(stdout_buff)
-    p = sh.bazel(
+    sh.bazel(
         "build",
         target + build_args,
         _out=process_output,
-        _bg=True,
-        _err_to_out=True)
-    p.wait()
+        _fg=True)
     return "".join(stdout_buff)
 
 
@@ -462,30 +458,28 @@ def gen_model_code(model_codegen_dir,
 
     stdout_buff = []
     process_output = make_output_processor(stdout_buff)
-    p = sh.python("bazel-bin/mace/python/tools/converter",
-                  "-u",
-                  "--platform=%s" % platform,
-                  "--model_file=%s" % model_file_path,
-                  "--weight_file=%s" % weight_file_path,
-                  "--model_checksum=%s" % model_sha256_checksum,
-                  "--weight_checksum=%s" % weight_sha256_checksum,
-                  "--input_node=%s" % input_nodes,
-                  "--output_node=%s" % output_nodes,
-                  "--runtime=%s" % runtime,
-                  "--template=%s" % "mace/python/tools",
-                  "--model_tag=%s" % model_tag,
-                  "--input_shape=%s" % input_shapes,
-                  "--dsp_mode=%s" % dsp_mode,
-                  "--embed_model_data=%s" % embed_model_data,
-                  "--winograd=%s" % fast_conv,
-                  "--obfuscate=%s" % obfuscate,
-                  "--output_dir=%s" % model_codegen_dir,
-                  "--model_build_type=%s" % model_build_type,
-                  "--data_type=%s" % data_type,
-                  _out=process_output,
-                  _bg=True,
-                  _err_to_out=True)
-    p.wait()
+    sh.python("bazel-bin/mace/python/tools/converter",
+              "-u",
+              "--platform=%s" % platform,
+              "--model_file=%s" % model_file_path,
+              "--weight_file=%s" % weight_file_path,
+              "--model_checksum=%s" % model_sha256_checksum,
+              "--weight_checksum=%s" % weight_sha256_checksum,
+              "--input_node=%s" % input_nodes,
+              "--output_node=%s" % output_nodes,
+              "--runtime=%s" % runtime,
+              "--template=%s" % "mace/python/tools",
+              "--model_tag=%s" % model_tag,
+              "--input_shape=%s" % input_shapes,
+              "--dsp_mode=%s" % dsp_mode,
+              "--embed_model_data=%s" % embed_model_data,
+              "--winograd=%s" % fast_conv,
+              "--obfuscate=%s" % obfuscate,
+              "--output_dir=%s" % model_codegen_dir,
+              "--model_build_type=%s" % model_build_type,
+              "--data_type=%s" % data_type,
+              _out=process_output,
+              _fg=True)
 
 
 def gen_random_input(model_output_dir,
@@ -692,15 +686,13 @@ def tuning_run(abi,
             "--model_file=%s" % mace_model_phone_path,
         ])
         adb_cmd = ' '.join(adb_cmd)
-        p = sh.adb(
+        sh.adb(
             "-s",
             serialno,
             "shell",
             adb_cmd,
             _out=process_output,
-            _bg=True,
-            _err_to_out=True)
-        p.wait()
+            _fg=True)
         print("Running finished!\n")
         return "".join(stdout_buff)
 
@@ -802,26 +794,24 @@ def validate_model(abi,
 
             stdout_buff = []
             process_output = make_output_processor(stdout_buff)
-            p = sh.docker(
-                    "exec",
-                    container_name,
-                    "python",
-                    "-u",
-                    "/mace/validate.py",
-                    "--platform=caffe",
-                    "--model_file=/mace/%s" % model_file_name,
-                    "--weight_file=/mace/%s" % weight_file_name,
-                    "--input_file=/mace/%s" % input_file_name,
-                    "--mace_out_file=/mace/%s" % output_file_name,
-                    "--device_type=%s" % device_type,
-                    "--input_node=%s" % ",".join(input_nodes),
-                    "--output_node=%s" % ",".join(output_nodes),
-                    "--input_shape=%s" % ":".join(input_shapes),
-                    "--output_shape=%s" % ":".join(output_shapes),
-                    _out=process_output,
-                    _bg=True,
-                    _err_to_out=True)
-            p.wait()
+            sh.docker(
+                "exec",
+                container_name,
+                "python",
+                "-u",
+                "/mace/validate.py",
+                "--platform=caffe",
+                "--model_file=/mace/%s" % model_file_name,
+                "--weight_file=/mace/%s" % weight_file_name,
+                "--input_file=/mace/%s" % input_file_name,
+                "--mace_out_file=/mace/%s" % output_file_name,
+                "--device_type=%s" % device_type,
+                "--input_node=%s" % ",".join(input_nodes),
+                "--output_node=%s" % ",".join(output_nodes),
+                "--input_shape=%s" % ":".join(input_shapes),
+                "--output_shape=%s" % ":".join(output_shapes),
+                _out=process_output,
+                _fg=True)
 
     print("Validation done!\n")
 
@@ -843,6 +833,7 @@ def build_host_libraries(model_build_type, abi):
 
 
 def merge_libs(target_soc,
+               serial_num,
                abi,
                project_name,
                build_output_dir,
@@ -911,8 +902,10 @@ def merge_libs(target_soc,
             mri_stream += "create %s/libmace_%s.a\n" % \
                           (model_bin_dir, project_name)
         else:
-            mri_stream += "create %s/libmace_%s.%s.a\n" % \
-                          (model_bin_dir, project_name, target_soc)
+            device_name = adb_get_device_name_by_serialno(serial_num)
+            mri_stream += "create %s/libmace_%s.%s.%s.a\n" % \
+                          (model_bin_dir, project_name,
+                           device_name, target_soc)
         if model_build_type == BuildType.code:
             mri_stream += (
                 "addlib "
@@ -969,16 +962,14 @@ def packaging_lib(libmace_output_dir, project_name):
                                                  tar_package_path))
     stdout_buff = []
     process_output = make_output_processor(stdout_buff)
-    p = sh.tar(
-            "cvzf",
-            "%s" % tar_package_path,
-            glob.glob("%s/*" % project_dir),
-            "--exclude",
-            "%s/_tmp" % project_dir,
-            _out=process_output,
-            _bg=True,
-            _err_to_out=True)
-    p.wait()
+    sh.tar(
+        "cvzf",
+        "%s" % tar_package_path,
+        glob.glob("%s/*" % project_dir),
+        "--exclude",
+        "%s/_tmp" % project_dir,
+        _out=process_output,
+        _fg=True)
     print("Packaging Done!\n")
 
 
@@ -1068,7 +1059,7 @@ def benchmark_model(abi,
         adb_push("%s/benchmark_model" % benchmark_binary_dir, phone_data_dir,
                  serialno)
 
-        p = sh.adb(
+        sh.adb(
             "-s",
             serialno,
             "shell",
@@ -1093,9 +1084,7 @@ def benchmark_model(abi,
             "--gpu_priority_hint=%s" % gpu_priority_hint,
             "--model_file=%s" % mace_model_phone_path,
             _out=process_output,
-            _bg=True,
-            _err_to_out=True)
-        p.wait()
+            _fg=True)
 
     print("Benchmark done!\n")
     return "".join(stdout_buff)
@@ -1134,7 +1123,7 @@ def build_run_throughput_test(abi,
     sh.cp("-f", merged_lib_file, "mace/benchmark/libmace_merged.a")
     stdout_buff = []
     process_output = make_output_processor(stdout_buff)
-    p = sh.bazel(
+    sh.bazel(
         "build",
         "-c",
         "opt",
@@ -1155,9 +1144,7 @@ def build_run_throughput_test(abi,
         "openmp=true",
         model_tag_build_flag,
         _out=process_output,
-        _bg=True,
-        _err_to_out=True)
-    p.wait()
+        _fg=True)
 
     sh.rm("mace/benchmark/libmace_merged.a")
     sh.adb("-s",
@@ -1187,31 +1174,29 @@ def build_run_throughput_test(abi,
              phone_data_dir,
              serialno)
 
-    p = sh.adb(
-            "-s",
-            serialno,
-            "shell",
-            "LD_LIBRARY_PATH=%s" % phone_data_dir,
-            "MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
-            "MACE_RUN_PARAMETER_PATH=%s/mace_run.config" %
-            phone_data_dir,
-            "%s/model_throughput_test" % phone_data_dir,
-            "--input_node=%s" % ",".join(input_nodes),
-            "--output_node=%s" % ",".join(output_nodes),
-            "--input_shape=%s" % ":".join(input_shapes),
-            "--output_shape=%s" % ":".join(output_shapes),
-            "--input_file=%s/%s" % (phone_data_dir, input_file_name),
-            "--cpu_model_data_file=%s/%s.data" % (phone_data_dir,
-                                                  cpu_model_tag),
-            "--gpu_model_data_file=%s/%s.data" % (phone_data_dir,
-                                                  gpu_model_tag),
-            "--dsp_model_data_file=%s/%s.data" % (phone_data_dir,
-                                                  dsp_model_tag),
-            "--run_seconds=%s" % run_seconds,
-            _out=process_output,
-            _bg=True,
-            _err_to_out=True)
-    p.wait()
+    sh.adb(
+        "-s",
+        serialno,
+        "shell",
+        "LD_LIBRARY_PATH=%s" % phone_data_dir,
+        "MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
+        "MACE_RUN_PARAMETER_PATH=%s/mace_run.config" %
+        phone_data_dir,
+        "%s/model_throughput_test" % phone_data_dir,
+        "--input_node=%s" % ",".join(input_nodes),
+        "--output_node=%s" % ",".join(output_nodes),
+        "--input_shape=%s" % ":".join(input_shapes),
+        "--output_shape=%s" % ":".join(output_shapes),
+        "--input_file=%s/%s" % (phone_data_dir, input_file_name),
+        "--cpu_model_data_file=%s/%s.data" % (phone_data_dir,
+                                              cpu_model_tag),
+        "--gpu_model_data_file=%s/%s.data" % (phone_data_dir,
+                                              gpu_model_tag),
+        "--dsp_model_data_file=%s/%s.data" % (phone_data_dir,
+                                              dsp_model_tag),
+        "--run_seconds=%s" % run_seconds,
+        _out=process_output,
+        _fg=True)
 
     print("throughput_test done!\n")
 
-- 
GitLab