Merge branch 'device_support' into 'master'

Device support See merge request !898

Merge branch 'device_support' into 'master'
Device support See merge request !898
3cdf9973 · 叶剑武 · 66cf184f · 12c4dace · 3cdf9973 · 3cdf9973
28 changed file
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -47,8 +47,13 @@ ops_test:
  stage: ops_test
  script:
    - if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi
-    - python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS
-    - python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS --enable_neon=false
+    - >
+      if ping -c 1 v9.git.n.xiaomi.com 1>/dev/null 2>&1; then
+        GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git
+        DEVICE_CONF_FILE=generic-mobile-devices/devices.yml
+      fi
+    - python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64,armhf --target_socs=$TARGET_SOCS
+    - python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64,armhf --target_socs=$TARGET_SOCS --enable_neon=false

 api_test:
  stage: api_test
@@ -68,14 +73,19 @@ extra_tests:
  stage: extra_tests
  script:
    - if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi
-    - python tools/bazel_adb_run.py --target="//mace/utils:tuner_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS
+    - >
+      if ping -c 1 v9.git.n.xiaomi.com 1>/dev/null 2>&1; then
+        GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git
+        DEVICE_CONF_FILE=generic-mobile-devices/devices.yml
+      fi
+    - python tools/bazel_adb_run.py --target="//mace/utils:tuner_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64 --target_socs=$TARGET_SOCS

 platform_compatible_tests:
  stage: platform_compatible_tests
  script:
    - bazel build mace/core:core --define openmp=true
-    - bazel build --config arm_linux --define openmp=true --define opencl=true --define neon=true //mace/libmace:libmace.so
-    - bazel build --config aarch64_linux --define openmp=true --define opencl=true --define neon=true //mace/libmace:libmace.so
+    - bazel build --config arm_linux_gnueabihf --define openmp=true --define opencl=true --define neon=true //mace/libmace:libmace.so
+    - bazel build --config aarch64_linux_gnu --define openmp=true --define opencl=true --define neon=true //mace/libmace:libmace.so

 build_libraries:
  stage: build_libraries
@@ -87,6 +97,11 @@ ndk_versions_compatible_tests:
  script:
    - DEFAULT_NDK_PATH=$ANDROID_NDK_HOME
    - prefix_path=${DEFAULT_NDK_PATH%android-ndk-*}
+    - >
+      if ping -c 1 v9.git.n.xiaomi.com 1>/dev/null 2>&1; then
+        GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git
+        DEVICE_CONF_FILE=generic-mobile-devices/devices.yml
+      fi
    - >
      for ndk in android-ndk-r12b android-ndk-r15c android-ndk-r16 android-ndk-r17b;
      do
@@ -96,8 +111,8 @@ ndk_versions_compatible_tests:
        export PATH=$ANDROID_NDK_HOME:$PATH;
        echo "ndk path: $ANDROID_NDK_HOME";
        if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi
-        python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS --args="--gtest_filter=ActivationOpTest*";
-        python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS --enable_neon=false --args="--gtest_filter=ActivationOpTest*";
+        python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64 --target_socs=$TARGET_SOCS --args="--gtest_filter=ActivationOpTest*";
+        python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64 --target_socs=$TARGET_SOCS --enable_neon=false --args="--gtest_filter=ActivationOpTest*";
      fi
      done
    - export ANDROID_NDK_HOME=$DEFAULT_NDK_PATH
@@ -111,16 +126,27 @@ python_tools_tests:
    - GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@github.com:XiaoMi/mace-models.git
    - CONF_FILE=mace-models/mobilenet-v2/mobilenet-v2.yml
    - >
-      python tools/converter.py convert --config=${CONF_FILE} --target_abis=armeabi-v7a --model_graph_format=file --model_data_format=file || exit 1;
-      python tools/converter.py run --config=${CONF_FILE} --round=1 --target_abis=armeabi-v7a --validate --model_graph_format=file --model_data_format=file || exit 1;
-      python tools/converter.py run --config=${CONF_FILE} --example --target_abis=armeabi-v7a --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
+      if ping -c 1 v9.git.n.xiaomi.com 1>/dev/null 2>&1; then
+        GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git
+        DEVICE_CONF_FILE=generic-mobile-devices/devices.yml
+      fi
+    - >
+      python tools/converter.py convert --config=${CONF_FILE} --target_abis=armeabi-v7a,arm64 --model_graph_format=file --model_data_format=file || exit 1;
+      python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --round=1 --target_abis=armeabi-v7a,arm64 --validate --model_graph_format=file --model_data_format=file || exit 1;
+      python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --example --target_abis=armeabi-v7a,arm64 --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;

 model_tests:
  stage: model_tests
  script:
    - pwd
    - rm -rf mace-models
+    - rm -rf generic-mobile-devices
    - GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@github.com:XiaoMi/mace-models.git
+    - >
+      if ping -c 1 v9.git.n.xiaomi.com 1>/dev/null 2>&1; then
+        GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git
+        DEVICE_CONF_FILE=generic-mobile-devices/devices.yml
+      fi
    - >
      for CONF_FILE in mace-models/mobilenet-v1/mobilenet-v1.yml mace-models/mobilenet-v1/mobilenet-v1-quantize-retrain.yml;
      do
@@ -131,8 +157,8 @@ model_tests:
    - CONF_FILE=mace-models/mobilenet-v2/mobilenet-v2-host.yml
    - >
      python tools/converter.py convert --config=${CONF_FILE} --model_graph_format=file --model_data_format=file || exit 1;
-      python tools/converter.py run --config=${CONF_FILE} --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
-      python tools/converter.py run --config=${CONF_FILE} --example --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
+      python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
+      python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --example --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
    - rm -rf mace-models

 build_android_demo:

--- a/docs/installation/env_requirement.rst
+++ b/docs/installation/env_requirement.rst
@@ -35,7 +35,7 @@ Required dependencies
      - Required by model validation
    * - six
      - pip install -I six==1.11.0
-      - Required for Python 2 and 3 compatibility (TODO)
+      - Required for Python 2 and 3 compatibility

 Optional dependencies
 ---------------------

--- a/docs/user_guide/advanced_usage.rst
+++ b/docs/user_guide/advanced_usage.rst
@@ -109,13 +109,66 @@ in one deployment file.
        sha256sum /path/to/your/file


+
 Advanced usage
 --------------

-There are two common advanced use cases:
+There are three common advanced use cases:
+  - run your model on the embedded device(ARM LINUX)
  - converting model to C++ code.
  - tuning GPU kernels for a specific SoC.

+Run you model on the embedded device(ARM Linux)
+-----------------------------------------------
+
+The way to run your model on the ARM Linux is nearly same as with android, except you need specify a device config file.
+
+.. code:: bash
+
+    python tools/converter.py run --config=/path/to/your/model_deployment_file.yml --device_yml=/path/to/devices.yml
+
+There are two steps to do before run:
+
+1. configure login without password
+
+    MACE use ssh to connect embedded device, you should copy your public key to embedded device with the blow command.
+
+    .. code:: bash
+
+      cat ~/.ssh/id_rsa.pub | ssh -q {user}@{ip} "cat >> ~/.ssh/authorized_keys"
+
+2. write your own device yaml configuration file.
+
+    * **Example**
+
+        Here is an device yaml config demo.
+
+        .. literalinclude:: devices/demo_device_nanopi.yml
+            :language: yaml
+
+    * **Configuration**
+        The detailed explanation is listed in the blow table.
+
+        .. list-table::
+            :header-rows: 1
+
+            * - Options
+              - Usage
+            * - target_abis
+              - Device supported abis, you can get it via ``dpkg --print-architecture`` and
+                ``dpkg --print-foreign-architectures`` command, if more than one abi is supported,
+                separate them by commas.
+            * - target_socs
+              - device soc, you can get it from device manual, we haven't found a way to get it in shell.
+            * - models
+              - device models full name, you can get via get ``lshw`` command (third party package, install it via your package manager).
+                see it's product value.
+            * - address
+              - Since we use ssh to connect device, ip address is required.
+            * - username
+              - login username, required.
+
+
 Convert model(s) to C++ code
 --------------------------------

@@ -403,6 +456,7 @@ Reduce Library Size
        - It is recommended to use ``version script`` and ``strip`` feature when linking mace static library. The effect is remarkable.

 * Remove the unused ops.
+
 Remove the registration of the ops unused for your models in the ``mace/ops/ops_register.cc``,
 which will reduce the library size significantly. the final binary just link the registered ops' code.


--- a/docs/user_guide/basic_usage.rst
+++ b/docs/user_guide/basic_usage.rst
@@ -68,7 +68,8 @@ Here we use the mobilenet-v2 model as an example.

    .. note::

-        If you want to run on device/phone, please plug in at least one device/phone.
+        If you want to run on phone, please plug in at least one phone.
+        Or if you want to run on embedded device, please give a :doc:`advanced_usage`.

    .. code:: sh

@@ -245,10 +246,14 @@ to run and validate your model.
    	# Test model run time
        python tools/converter.py run --config=/path/to/your/model_deployment_file.yml --round=100

-    	# Validate the correctness by comparing the results against the
+        # Validate the correctness by comparing the results against the
    	# original model and framework, measured with cosine distance for similarity.
    	python tools/converter.py run --config=/path/to/your/model_deployment_file.yml --validate

+        # If you want to run model on specified arm linux device, you should put device config file in the working directory or run with flag `--device_yml`
+        python tools/converter.py run --config=/path/to/your/model_deployment_file.yml --device_yml=/path/to/devices.yml
+
+
 * **benchmark**

    benchmark and profile the model.

--- a/docs/user_guide/devices/demo_device_nanopi.yml
+++ b/docs/user_guide/devices/demo_device_nanopi.yml
@@ -12,12 +12,9 @@ devices:
    address: 10.0.0.0
  # login username
    username: user
-  # login password, is required when you can login into device without password
-    password: 1234567
  raspberry:
    target_abis: [armv7l]
    target_socs: BCM2837
    models: Raspberry Pi 3 Model B Plus Rev 1.3
    address: 10.0.0.1
    username: user
-    password: 123456
--- a/mace/BUILD
+++ b/mace/BUILD
@@ -24,6 +24,24 @@ config_setting(
    visibility = ["//visibility:public"],
 )

+config_setting(
+    name = "arm_linux_aarch64",
+    values = {
+        "crosstool_top": "//tools/aarch64_compiler:toolchain",
+        "cpu": "aarch64",
+    },
+    visibility = ["//visibility:public"],
+)
+
+config_setting(
+    name = "arm_linux_armhf",
+    values = {
+        "crosstool_top": "//tools/arm_compiler:toolchain",
+        "cpu": "armeabi-v7a",
+    },
+    visibility = ["//visibility:public"],
+)
+
 config_setting(
    name = "neon_enabled",
    define_values = {

--- a/mace/core/runtime/cpu/cpu_runtime.cc
+++ b/mace/core/runtime/cpu/cpu_runtime.cc
@@ -42,7 +42,7 @@ struct CPUFreq {
 };

 namespace {
-#if defined(__ANDROID__)
+
 int GetCPUCount() {
  int cpu_count = 0;
  std::string cpu_sys_conf = "/proc/cpuinfo";
@@ -69,10 +69,8 @@ int GetCPUCount() {
  VLOG(2) << "CPU cores: " << cpu_count;
  return cpu_count;
 }
-#endif

 int GetCPUMaxFreq(std::vector<float> *max_freqs) {
-#if defined(__ANDROID__)
  int cpu_count = GetCPUCount();
  for (int cpu_id = 0; cpu_id < cpu_count; ++cpu_id) {
    std::string cpuinfo_max_freq_sys_conf = MakeString(
@@ -94,34 +92,6 @@ int GetCPUMaxFreq(std::vector<float> *max_freqs) {
    }
    f.close();
  }
-#else
-  std::string cpu_sys_conf = "/proc/cpuinfo";
-  std::ifstream f(cpu_sys_conf);
-  if (!f.is_open()) {
-    LOG(ERROR) << "failed to open " << cpu_sys_conf;
-    return -1;
-  }
-  std::string line;
-  const std::string freq_key = "cpu MHz";
-  while (std::getline(f, line)) {
-    if (line.size() >= freq_key.size()
-        && line.compare(0, freq_key.size(), freq_key) == 0) {
-      size_t pos = line.find(":");
-      if (pos != std::string::npos) {
-        std::string freq_str = line.substr(pos + 1);
-        float freq = atof(freq_str.c_str());
-        max_freqs->push_back(freq);
-      }
-    }
-  }
-  if (f.bad()) {
-    LOG(ERROR) << "failed to read " << cpu_sys_conf;
-  }
-  if (!f.eof()) {
-    LOG(ERROR) << "failed to read end of " << cpu_sys_conf;
-  }
-  f.close();
-#endif

  for (float freq : *max_freqs) {
    VLOG(2) << "CPU freq: " << freq;

--- a/mace/examples/cli/BUILD
+++ b/mace/examples/cli/BUILD
 # Examples
 load(
    "//mace:mace.bzl",
-    "if_openmp_enabled",
    "if_android",
    "if_hexagon_enabled",
    "if_opencl_enabled",
+    "if_openmp_enabled",
 )

 cc_binary(
@@ -18,8 +18,9 @@ cc_binary(
    ]),
    linkopts = [
        "-lm",
+        "-ldl",
    ] + if_openmp_enabled([
-        "-fopenmp"
+        "-fopenmp",
    ]) + if_android([
        "-ldl",
        "-pie",
@@ -47,6 +48,7 @@ cc_binary(
    ]),
    linkopts = [
        "-lm",
+        "-ldl",
    ] + if_android([
        "-ldl",
        "-pie",
@@ -55,8 +57,7 @@ cc_binary(
    linkstatic = 0,
    deps = [
        "//external:gflags_nothreads",
-        "//mace/codegen:generated_mace_engine_factory",
        "//mace/codegen:generated_libmace",
+        "//mace/codegen:generated_mace_engine_factory",
    ],
 )
-
--- a/mace/mace.bzl
+++ b/mace/mace.bzl
@@ -24,6 +24,18 @@ def if_android_arm64(a):
      "//conditions:default": [],
  })

+def if_arm_linux_aarch64(a):
+  return select({
+      "//mace:arm_linux_aarch64": a,
+      "//conditions:default": [],
+  })
+
+def if_arm_linux_armhf(a):
+  return select({
+      "//mace:arm_linux_armhf": a,
+      "//conditions:default": []
+  })
+
 def if_neon_enabled(a):
  return select({
      "//mace:neon_enabled": a,
@@ -81,4 +93,3 @@ def encrypt_opencl_kernel_genrule():
      outs = ["opencl/encrypt_opencl_kernel.cc"],
      cmd = "cat $(SRCS) > $@;"
  )
-
--- a/mace/ops/depthwise_conv2d_test.cc
+++ b/mace/ops/depthwise_conv2d_test.cc
@@ -233,7 +233,7 @@ void TestNxNS12(const index_t height, const index_t width) {
  auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w,
                  Padding type) {
    // generate random input
-    static unsigned int seed = time(NULL);
+    // static unsigned int seed = time(NULL);
    index_t batch = 1;
    index_t channel = 32;
    index_t multiplier = 1;

--- a/mace/ops/local_response_norm.cc
+++ b/mace/ops/local_response_norm.cc
@@ -13,6 +13,7 @@
 // limitations under the License.

 #include <algorithm>
+#include <cmath>

 #include "mace/core/operator.h"


--- a/mace/ops/resize_bicubic.cc
+++ b/mace/ops/resize_bicubic.cc
@@ -15,6 +15,7 @@
 #include "mace/ops/resize_bicubic.h"

 #include <algorithm>
+#include <cmath>
 #include <memory>
 #include <vector>


--- a/mace/ops/scalar_math.cc
+++ b/mace/ops/scalar_math.cc
@@ -13,6 +13,7 @@
 // limitations under the License.

 #include <algorithm>
+#include <cmath>
 #include <vector>

 #include "mace/core/operator.h"

--- a/mace/ops/softmax.cc
+++ b/mace/ops/softmax.cc
@@ -13,6 +13,7 @@
 // limitations under the License.

 #include <algorithm>
+#include <cmath>
 #include <limits>
 #include <memory>
 #include <vector>
@@ -106,7 +107,7 @@ class SoftmaxOp<DeviceType::CPU, float> : public Operation {

        float sum = 0;
        for (index_t c = 0; c < class_count; ++c) {
-          float exp_value = ::exp(input_ptr[c] - max_val);
+          float exp_value = std::exp(input_ptr[c] - max_val);
          sum += exp_value;
          output_ptr[c] = exp_value;
        }

--- a/mace/ops/strided_slice.cc
+++ b/mace/ops/strided_slice.cc
@@ -13,6 +13,7 @@
 // limitations under the License.

 #include <algorithm>
+#include <cmath>
 #include <vector>

 #include "mace/core/operator.h"

--- a/mace/ops/transpose.cc
+++ b/mace/ops/transpose.cc
@@ -16,8 +16,9 @@
 #include <arm_neon.h>
 #endif

-#include <vector>
 #include <algorithm>
+#include <cmath>
+#include <vector>

 #include "mace/core/operator.h"
 #include "mace/ops/transpose.h"

--- a/mace/python/tools/converter_tool/tensorflow_converter.py
+++ b/mace/python/tools/converter_tool/tensorflow_converter.py
@@ -112,6 +112,8 @@ TFSupportedOps = [

 TFOpType = Enum('TFOpType', [(op, op) for op in TFSupportedOps], type=str)

+TFSupportedOps = [six.b(op) for op in TFSupportedOps]
+

 class TensorflowConverter(base_converter.ConverterInterface):
    """A class for convert tensorflow frozen model to mace model.

--- a/mace/python/tools/model_saver.py
+++ b/mace/python/tools/model_saver.py
@@ -14,6 +14,7 @@

 import datetime
 import os
+import six
 import uuid
 import numpy as np
 import hashlib
@@ -34,8 +35,8 @@ class ModelFormat(object):

 def generate_obfuscated_name(namespace, name):
    md5 = hashlib.md5()
-    md5.update(namespace)
-    md5.update(name)
+    md5.update(six.b(namespace))
+    md5.update(six.b(name))
    md5_digest = md5.hexdigest()

    name = md5_digest[:8]

--- a/mace/utils/quantize.h
+++ b/mace/utils/quantize.h
@@ -15,8 +15,9 @@
 #ifndef MACE_UTILS_QUANTIZE_H_
 #define MACE_UTILS_QUANTIZE_H_

-#include <limits>
 #include <algorithm>
+#include <cmath>
+#include <limits>

 namespace mace {


--- a/tools/bazel.rc
+++ b/tools/bazel.rc
 # Partially borrowed from tensorflow tools/bazel.rc

 # By default, we don't distinct target and host platfroms.
-# When doing cross compilation, use --config=cross_compile to distinct them.
 build --distinct_host_configuration=false
-build:cross_compile --distinct_host_configuration=true

 build --verbose_failures
 build --copt=-std=c++11
@@ -17,34 +15,33 @@ build --copt=-DMACE_USE_NNLIB_CAF
 build:symbol_hidden --copt=-fvisibility=hidden

 # Usage example: bazel build --config android
-build:android --config=cross_compile
+build:android --distinct_host_configuration=true
 build:android --crosstool_top=//external:android/crosstool
 build:android --host_crosstool_top=@bazel_tools//tools/cpp:toolchain

-# Usage example: bazel build --config arm_linux
-build:arm_linux --config=cross_compile
-build:arm_linux --crosstool_top=//tools/arm_compiler:toolchain
-build:arm_linux --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
-build:arm_linux --cpu=armeabi-v7a
-build:arm_linux --copt -mfloat-abi=hard
-build:arm_linux --copt -mfpu=neon 
-build:arm_linux --copt -Wno-ignored-attributes
-build:arm_linux --copt -Wno-unused-function
-build:arm_linux --copt -Wno-sequence-point
-build:arm_linux --copt -Wno-implicit-fallthrough
-build:arm_linux --copt -Wno-psabi
+# Usage example: bazel build --config arm_linux_gnueabihf
+build:arm_linux_gnueabihf --distinct_host_configuration=true
+build:arm_linux_gnueabihf --crosstool_top=//tools/arm_compiler:toolchain
+build:arm_linux_gnueabihf --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
+build:arm_linux_gnueabihf --cpu=armeabi-v7a
+build:arm_linux_gnueabihf --copt -mfloat-abi=hard
+build:arm_linux_gnueabihf --copt -mfpu=neon
+build:arm_linux_gnueabihf --copt -Wno-ignored-attributes
+build:arm_linux_gnueabihf --copt -Wno-unused-function
+build:arm_linux_gnueabihf --copt -Wno-sequence-point
+build:arm_linux_gnueabihf --copt -Wno-implicit-fallthrough

-# Usage example: bazel build --config aarch64_linux
-build:aarch64_linux --config=cross_compile
-build:aarch64_linux --crosstool_top=//tools/aarch64_compiler:toolchain
-build:aarch64_linux --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
-build:aarch64_linux --cpu=aarch64
-build:aarch64_linux --copt -Wno-ignored-attributes
-build:aarch64_linux --copt -Wno-unused-function
-build:aarch64_linux --copt -Wno-sequence-point
-build:aarch64_linux --copt -Wno-implicit-fallthrough
+# Usage example: bazel build --config aarch64_linux_gnu
+build:aarch64_linux_gnu --distinct_host_configuration=true
+build:aarch64_linux_gnu --crosstool_top=//tools/aarch64_compiler:toolchain
+build:aarch64_linux_gnu --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
+build:aarch64_linux_gnu --cpu=aarch64
+build:aarch64_linux_gnu --copt -Wno-ignored-attributes
+build:aarch64_linux_gnu --copt -Wno-unused-function
+build:aarch64_linux_gnu --copt -Wno-sequence-point
+build:aarch64_linux_gnu --copt -Wno-implicit-fallthrough

-# Usage example: bazel build --config optimization 
+# Usage example: bazel build --config optimization
 build:optimization -c opt
 build:optimization --copt=-O3
 build:optimization --linkopt=-Wl,--strip-all

--- a/tools/bazel_adb_run.py
+++ b/tools/bazel_adb_run.py
@@ -26,9 +26,9 @@ import sys

 import sh_commands

+from common import *

-def stdout_processor(stdout, device_properties, abi):
-    pass
+from device import DeviceWrapper, DeviceManager


 def unittest_stdout_processor(stdout, device_properties, abi):
@@ -39,7 +39,7 @@ def unittest_stdout_processor(stdout, device_properties, abi):
            raise Exception("Command failed")


-def ops_benchmark_stdout_processor(stdout, device_properties, abi):
+def ops_benchmark_stdout_processor(stdout, dev, abi):
    stdout_lines = stdout.split("\n")
    metrics = {}
    for line in stdout_lines:
@@ -52,13 +52,13 @@ def ops_benchmark_stdout_processor(stdout, device_properties, abi):
            metrics["%s.input_mb_per_sec" % parts[0]] = parts[3]
            metrics["%s.gmacc_per_sec" % parts[0]] = parts[4]

-    platform = device_properties["ro.board.platform"].replace(" ", "-")
-    model = device_properties["ro.product.model"].replace(" ", "-")
-    tags = {
-        "ro.board.platform": platform,
-        "ro.product.model": model,
-        "abi": abi
-    }
+    # platform = dev[YAMLKeyword.target_socs]
+    # model = dev[YAMLKeyword.device_name]
+    # tags = {
+    #     "ro.board.platform": platform,
+    #     "ro.product.model": model,
+    #     "abi": abi
+    # }
    # sh_commands.falcon_push_metrics(server,
    #    metrics, tags=tags, endpoint="mace_ops_benchmark")

@@ -87,7 +87,7 @@ def parse_args():
        type=str,
        default="all",
        help="SoCs (ro.board.platform from getprop) to build, "
-        "comma seperated list or all/random")
+             "comma seperated list or all/random")
    parser.add_argument(
        "--target", type=str, default="//...", help="Bazel target to build")
    parser.add_argument(
@@ -99,7 +99,7 @@ def parse_args():
    parser.add_argument(
        "--stdout_processor",
        type=str,
-        default="stdout_processor",
+        default="unittest_stdout_processor",
        help="Stdout processing function, default: stdout_processor")
    parser.add_argument(
        "--enable_neon",
@@ -115,14 +115,22 @@ def parse_args():
        type=str2bool,
        default=False,
        help="Whether to use simpleperf stat")
+    parser.add_argument(
+        '--device_yml',
+        type=str,
+        default='',
+        help='embedded linux device config yml file'
+    )
    return parser.parse_known_args()


 def main(unused_args):
    target_socs = None
+    target_devices = DeviceManager.list_devices(FLAGS.device_yml)
    if FLAGS.target_socs != "all" and FLAGS.target_socs != "random":
        target_socs = set(FLAGS.target_socs.split(','))
-    target_devices = sh_commands.get_target_socs_serialnos(target_socs)
+        target_devices = [dev for dev in target_devices
+                          if dev[YAMLKeyword.target_socs] in target_socs]
    if FLAGS.target_socs == "random":
        unlocked_devices = \
            [d for d in target_devices if not sh_commands.is_device_locked(d)]
@@ -136,31 +144,29 @@ def main(unused_args):
    target_abis = FLAGS.target_abis.split(',')

    for target_abi in target_abis:
+        toolchain = infer_toolchain(target_abi)
        sh_commands.bazel_build(target, abi=target_abi,
+                                toolchain=toolchain,
                                enable_neon=FLAGS.enable_neon,
                                address_sanitizer=FLAGS.address_sanitizer)
        if FLAGS.run_target:
-            for serialno in target_devices:
-                if target_abi not in set(
-                        sh_commands.adb_supported_abis(serialno)):
+            for dev in target_devices:
+                if target_abi not in dev[YAMLKeyword.target_abis]:
                    print("Skip device %s which does not support ABI %s" %
-                          (serialno, target_abi))
+                          (dev, target_abi))
                    continue
-                stdouts = sh_commands.adb_run(
+                device_wrapper = DeviceWrapper(dev)
+                stdouts = device_wrapper.run(
                    target_abi,
-                    serialno,
                    host_bin_path,
                    bin_name,
                    args=FLAGS.args,
                    opencl_profiling=True,
                    vlog_level=0,
-                    device_bin_path="/data/local/tmp/mace",
                    out_of_range_check=True,
                    address_sanitizer=FLAGS.address_sanitizer,
                    simpleperf=FLAGS.simpleperf)
-                device_properties = sh_commands.adb_getprop_by_serialno(
-                    serialno)
-                globals()[FLAGS.stdout_processor](stdouts, device_properties,
+                globals()[FLAGS.stdout_processor](stdouts, dev,
                                                  target_abi)



--- a/tools/build-standalone-lib.sh
+++ b/tools/build-standalone-lib.sh
@@ -22,6 +22,14 @@ mkdir -p $LIB_DIR/arm64-v8a/cpu_gpu
 rm -rf $LIB_DIR/linux-x86-64
 mkdir -p $LIB_DIR/linux-x86-64

+rm -rf $LIB_DIR/arm_linux_gnueabihf
+mkdir -p $LIB_DIR/arm_linux_gnueabihf/cpu_gpu
+
+rm -rf $LIB_DIR/aarch64_linux_gnu
+mkdir -p $LIB_DIR/aarch64_linux_gnu/cpu_gpu
+
+
+
 # build shared libraries
 echo "build shared lib for armeabi-v7a + cpu_gpu_dsp"
 bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --define hexagon=true --define quantize=true --cpu=armeabi-v7a
@@ -36,6 +44,14 @@ echo "build shared lib for arm64-v8a + cpu_gpu"
 bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --define quantize=true --cpu=arm64-v8a
 cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/arm64-v8a/cpu_gpu/

+echo "build shared lib for arm_linux_gnueabihf + cpu_gpu"
+bazel build --config arm_linux_gnueabihf --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --define quantize=true
+cp bazel-bin/mace/libmace/libmace.so  $LIB_DIR/arm_linux_gnueabihf/cpu_gpu/
+
+echo "build shared lib for aarch64_linux_gnu + cpu_gpu"
+bazel build --config aarch64_linux_gnu  --config optimization mace/libmace:libmace_dynamic  --define neon=true --define openmp=true --define opencl=true --define quantize=true
+cp bazel-bin/mace/libmace/libmace.so  $LIB_DIR/aarch64_linux_gnu/cpu_gpu/
+
 if [[ "$OSTYPE" != "darwin"* ]];then
 	echo "build shared lib for linux-x86-64"
 	bazel build mace/libmace:libmace_dynamic --config optimization --define quantize=true --define openmp=true
@@ -56,6 +72,14 @@ echo "build static lib for arm64-v8a + cpu_gpu"
 bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true --define quantize=true --cpu=arm64-v8a
 cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/arm64-v8a/cpu_gpu/

+echo "build static lib for arm_linux_gnueabihf + cpu_gpu"
+bazel build --config arm_linux_gnueabihf --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true --define quantize=true
+cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/arm_linux_gnueabihf/cpu_gpu/
+
+echo "build static lib for aarch64_linux_gnu + cpu_gpu"
+bazel build --config aarch64_linux_gnu --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true --define quantize=true
+cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/aarch64_linux_gnu/cpu_gpu/
+
 if [[ "$OSTYPE" != "darwin"* ]];then
 	echo "build static lib for linux-x86-64"
 	bazel build mace/libmace:libmace_static --config optimization --define quantize=true --define openmp=true

--- a/tools/common.py
+++ b/tools/common.py
@@ -13,7 +13,9 @@
 # limitations under the License.

 import enum
+import hashlib
 import re
+import os

 import six

@@ -135,3 +137,340 @@ def formatted_file_name(input_file_name, input_name):
    for c in input_name:
        res += c if c.isalnum() else '_'
    return res
+
+
+def md5sum(s):
+    md5 = hashlib.md5()
+    md5.update(s.encode('utf-8'))
+    return md5.hexdigest()
+
+
+def get_build_binary_dir(library_name, target_abi):
+    return "%s/%s/%s/%s" % (
+        BUILD_OUTPUT_DIR, library_name, BUILD_TMP_DIR_NAME, target_abi)
+
+
+def get_model_lib_output_path(library_name, abi):
+    lib_output_path = os.path.join(BUILD_OUTPUT_DIR, library_name,
+                                   MODEL_OUTPUT_DIR_NAME, abi,
+                                   "%s.a" % library_name)
+    return lib_output_path
+
+
+def check_model_converted(library_name, model_name,
+                          model_graph_format, model_data_format,
+                          abi):
+    model_output_dir = \
+        '%s/%s/%s' % (BUILD_OUTPUT_DIR, library_name, MODEL_OUTPUT_DIR_NAME)
+    if model_graph_format == ModelFormat.file:
+        mace_check(os.path.exists("%s/%s.pb" % (model_output_dir, model_name)),
+                   ModuleName.RUN,
+                   "You should convert model first.")
+    else:
+        model_lib_path = get_model_lib_output_path(library_name, abi)
+        mace_check(os.path.exists(model_lib_path),
+                   ModuleName.RUN,
+                   "You should convert model first.")
+    if model_data_format == ModelFormat.file:
+        mace_check(os.path.exists("%s/%s.data" %
+                                  (model_output_dir, model_name)),
+                   ModuleName.RUN,
+                   "You should convert model first.")
+
+
+def parse_device_type(runtime):
+    device_type = ""
+
+    if runtime == RuntimeType.dsp:
+        device_type = DeviceType.HEXAGON
+    elif runtime == RuntimeType.gpu:
+        device_type = DeviceType.GPU
+    elif runtime == RuntimeType.cpu:
+        device_type = DeviceType.CPU
+
+    return device_type
+
+
+def sha256_checksum(fname):
+    hash_func = hashlib.sha256()
+    with open(fname, "rb") as f:
+        for chunk in iter(lambda: f.read(4096), b""):
+            hash_func.update(chunk)
+    return hash_func.hexdigest()
+
+
+def get_model_files(model_file_path,
+                    model_sha256_checksum,
+                    model_output_dir,
+                    weight_file_path="",
+                    weight_sha256_checksum=""):
+    model_file = model_file_path
+    weight_file = weight_file_path
+
+    if model_file_path.startswith("http://") or \
+            model_file_path.startswith("https://"):
+        model_file = model_output_dir + "/" + md5sum(model_file_path) + ".pb"
+        if not os.path.exists(model_file) or \
+                sha256_checksum(model_file) != model_sha256_checksum:
+            MaceLogger.info("Downloading model, please wait ...")
+            six.moves.urllib.request.urlretrieve(model_file_path, model_file)
+            MaceLogger.info("Model downloaded successfully.")
+
+    if sha256_checksum(model_file) != model_sha256_checksum:
+        MaceLogger.error(ModuleName.MODEL_CONVERTER,
+                         "model file sha256checksum not match")
+
+    if weight_file_path.startswith("http://") or \
+            weight_file_path.startswith("https://"):
+        weight_file = \
+            model_output_dir + "/" + md5sum(weight_file_path) + ".caffemodel"
+        if not os.path.exists(weight_file) or \
+                sha256_checksum(weight_file) != weight_sha256_checksum:
+            MaceLogger.info("Downloading model weight, please wait ...")
+            six.moves.urllib.request.urlretrieve(weight_file_path, weight_file)
+            MaceLogger.info("Model weight downloaded successfully.")
+
+    if weight_file:
+        if sha256_checksum(weight_file) != weight_sha256_checksum:
+            MaceLogger.error(ModuleName.MODEL_CONVERTER,
+                             "weight file sha256checksum not match")
+
+    return model_file, weight_file
+
+
+def get_opencl_binary_output_path(library_name, target_abi, device):
+    target_soc = device.target_socs
+    device_name = device.device_name
+    return '%s/%s/%s/%s/%s_%s.%s.%s.bin' % \
+           (BUILD_OUTPUT_DIR,
+            library_name,
+            OUTPUT_OPENCL_BINARY_DIR_NAME,
+            target_abi,
+            library_name,
+            OUTPUT_OPENCL_BINARY_FILE_NAME,
+            device_name,
+            target_soc)
+
+
+def get_opencl_parameter_output_path(library_name, target_abi, device):
+    target_soc = device.target_socs
+    device_name = device.device_name
+    return '%s/%s/%s/%s/%s_%s.%s.%s.bin' % \
+           (BUILD_OUTPUT_DIR,
+            library_name,
+            OUTPUT_OPENCL_BINARY_DIR_NAME,
+            target_abi,
+            library_name,
+            OUTPUT_OPENCL_PARAMETER_FILE_NAME,
+            device_name,
+            target_soc)
+
+
+def get_build_model_dirs(library_name,
+                         model_name,
+                         target_abi,
+                         device,
+                         model_file_path):
+    device_name = device.device_name
+    target_socs = device.target_socs
+    model_path_digest = md5sum(model_file_path)
+    model_output_base_dir = '{}/{}/{}/{}/{}'.format(
+        BUILD_OUTPUT_DIR, library_name, BUILD_TMP_DIR_NAME,
+        model_name, model_path_digest)
+
+    if target_abi == ABIType.host:
+        model_output_dir = '%s/%s' % (model_output_base_dir, target_abi)
+    elif not target_socs or not device.address:
+        model_output_dir = '%s/%s/%s' % (model_output_base_dir,
+                                         BUILD_TMP_GENERAL_OUTPUT_DIR_NAME,
+                                         target_abi)
+    else:
+        model_output_dir = '{}/{}_{}/{}'.format(
+            model_output_base_dir,
+            device_name,
+            target_socs,
+            target_abi
+        )
+
+    mace_model_dir = '{}/{}/{}'.format(
+        BUILD_OUTPUT_DIR, library_name, MODEL_OUTPUT_DIR_NAME
+    )
+
+    return model_output_base_dir, model_output_dir, mace_model_dir
+
+
+def abi_to_internal(abi):
+    if abi in [ABIType.armeabi_v7a, ABIType.arm64_v8a]:
+        return abi
+    if abi == ABIType.arm64:
+        return ABIType.aarch64
+    if abi == ABIType.armhf:
+        return ABIType.armeabi_v7a
+
+
+def infer_toolchain(abi):
+    if abi in [ABIType.armeabi_v7a, ABIType.arm64_v8a]:
+        return ToolchainType.android
+    if abi == ABIType.armhf:
+        return ToolchainType.arm_linux_gnueabihf
+    if abi == ABIType.arm64:
+        return ToolchainType.aarch64_linux_gnu
+    return ''
+
+
+################################
+# YAML key word
+################################
+class YAMLKeyword(object):
+    library_name = 'library_name'
+    target_abis = 'target_abis'
+    target_socs = 'target_socs'
+    model_graph_format = 'model_graph_format'
+    model_data_format = 'model_data_format'
+    models = 'models'
+    platform = 'platform'
+    device_name = 'device_name'
+    system = 'system'
+    address = 'address'
+    username = 'username'
+    password = 'password'
+    model_file_path = 'model_file_path'
+    model_sha256_checksum = 'model_sha256_checksum'
+    weight_file_path = 'weight_file_path'
+    weight_sha256_checksum = 'weight_sha256_checksum'
+    subgraphs = 'subgraphs'
+    input_tensors = 'input_tensors'
+    input_shapes = 'input_shapes'
+    input_ranges = 'input_ranges'
+    output_tensors = 'output_tensors'
+    output_shapes = 'output_shapes'
+    check_tensors = 'check_tensors'
+    check_shapes = 'check_shapes'
+    runtime = 'runtime'
+    data_type = 'data_type'
+    input_data_types = 'input_data_types'
+    input_data_formats = 'input_data_formats'
+    output_data_formats = 'output_data_formats'
+    limit_opencl_kernel_time = 'limit_opencl_kernel_time'
+    nnlib_graph_mode = 'nnlib_graph_mode'
+    obfuscate = 'obfuscate'
+    winograd = 'winograd'
+    quantize = 'quantize'
+    quantize_range_file = 'quantize_range_file'
+    change_concat_ranges = 'change_concat_ranges'
+    validation_inputs_data = 'validation_inputs_data'
+    validation_threshold = 'validation_threshold'
+    graph_optimize_options = 'graph_optimize_options'  # internal use for now
+    cl_mem_type = 'cl_mem_type'
+
+
+################################
+# SystemType
+################################
+class SystemType:
+    host = 'host'
+    android = 'android'
+    arm_linux = 'arm_linux'
+
+
+################################
+# common device str
+################################
+
+PHONE_DATA_DIR = '/data/local/tmp/mace_run'
+DEVICE_DATA_DIR = '/tmp/data/mace_run'
+DEVICE_INTERIOR_DIR = PHONE_DATA_DIR + "/interior"
+BUILD_OUTPUT_DIR = 'builds'
+BUILD_TMP_DIR_NAME = '_tmp'
+BUILD_DOWNLOADS_DIR = BUILD_OUTPUT_DIR + '/downloads'
+BUILD_TMP_GENERAL_OUTPUT_DIR_NAME = 'general'
+MODEL_OUTPUT_DIR_NAME = 'model'
+EXAMPLE_STATIC_NAME = "example_static"
+EXAMPLE_DYNAMIC_NAME = "example_dynamic"
+EXAMPLE_STATIC_TARGET = "//mace/examples/cli:" + EXAMPLE_STATIC_NAME
+EXAMPLE_DYNAMIC_TARGET = "//mace/examples/cli:" + EXAMPLE_DYNAMIC_NAME
+MACE_RUN_STATIC_NAME = "mace_run_static"
+MACE_RUN_DYNAMIC_NAME = "mace_run_dynamic"
+MACE_RUN_STATIC_TARGET = "//mace/tools/validation:" + MACE_RUN_STATIC_NAME
+MACE_RUN_DYNAMIC_TARGET = "//mace/tools/validation:" + MACE_RUN_DYNAMIC_NAME
+CL_COMPILED_BINARY_FILE_NAME = "mace_cl_compiled_program.bin"
+BUILD_TMP_OPENCL_BIN_DIR = 'opencl_bin'
+LIBMACE_DYNAMIC_PATH = "bazel-bin/mace/libmace/libmace.so"
+CL_TUNED_PARAMETER_FILE_NAME = "mace_run.config"
+MODEL_HEADER_DIR_PATH = 'include/mace/public'
+OUTPUT_LIBRARY_DIR_NAME = 'lib'
+OUTPUT_OPENCL_BINARY_DIR_NAME = 'opencl'
+OUTPUT_OPENCL_BINARY_FILE_NAME = 'compiled_opencl_kernel'
+OUTPUT_OPENCL_PARAMETER_FILE_NAME = 'tuned_opencl_parameter'
+CODEGEN_BASE_DIR = 'mace/codegen'
+MODEL_CODEGEN_DIR = CODEGEN_BASE_DIR + '/models'
+ENGINE_CODEGEN_DIR = CODEGEN_BASE_DIR + '/engine'
+LIB_CODEGEN_DIR = CODEGEN_BASE_DIR + '/lib'
+LIBMACE_SO_TARGET = "//mace/libmace:libmace.so"
+LIBMACE_STATIC_TARGET = "//mace/libmace:libmace_static"
+LIBMACE_STATIC_PATH = "bazel-genfiles/mace/libmace/libmace.a"
+MODEL_LIB_TARGET = "//mace/codegen:generated_models"
+MODEL_LIB_PATH = "bazel-genfiles/mace/codegen/libgenerated_models.a"
+QUANTIZE_STAT_TARGET = "//mace/tools/quantization:quantize_stat"
+BM_MODEL_STATIC_NAME = "benchmark_model_static"
+BM_MODEL_DYNAMIC_NAME = "benchmark_model_dynamic"
+BM_MODEL_STATIC_TARGET = "//mace/benchmark:" + BM_MODEL_STATIC_NAME
+BM_MODEL_DYNAMIC_TARGET = "//mace/benchmark:" + BM_MODEL_DYNAMIC_NAME
+ALL_SOC_TAG = 'all'
+
+
+################################
+# Model File Format
+################################
+class ModelFormat(object):
+    file = 'file'
+    code = 'code'
+
+
+################################
+# ABI Type
+################################
+class ABIType(object):
+    armeabi_v7a = 'armeabi-v7a'
+    arm64_v8a = 'arm64-v8a'
+    arm64 = 'arm64'
+    aarch64 = 'aarch64'
+    armhf = 'armhf'
+    host = 'host'
+
+
+################################
+# Module name
+################################
+class ModuleName(object):
+    YAML_CONFIG = 'YAML CONFIG'
+    MODEL_CONVERTER = 'Model Converter'
+    RUN = 'RUN'
+    BENCHMARK = 'Benchmark'
+
+
+#################################
+# mace lib type
+#################################
+class MACELibType(object):
+    static = 0
+    dynamic = 1
+
+
+#################################
+# Run time type
+#################################
+class RuntimeType(object):
+    cpu = 'cpu'
+    gpu = 'gpu'
+    dsp = 'dsp'
+    cpu_gpu = 'cpu+gpu'
+
+
+#################################
+# Tool chain Type
+#################################
+class ToolchainType:
+    android = 'android'
+    arm_linux_gnueabihf = 'arm_linux_gnueabihf'
+    aarch64_linux_gnu = 'aarch64_linux_gnu'
--- a/tools/converter.py
+++ b/tools/converter.py
@@ -18,7 +18,6 @@ import hashlib
 import os
 import re
 import sh
-import subprocess
 import sys
 import urllib
 import yaml
@@ -27,14 +26,9 @@ from enum import Enum
 import six

 import sh_commands
-from sh_commands import BuildType
-from sh_commands import ModelFormat

-from common import CaffeEnvType
-from common import DeviceType
-from common import mace_check
-from common import MaceLogger
-from common import StringFormatter
+from common import *
+from device import DeviceWrapper, DeviceManager

 ################################
 # set environment
@@ -44,69 +38,20 @@ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 ################################
 # common definitions
 ################################
-BUILD_OUTPUT_DIR = 'builds'
-BUILD_DOWNLOADS_DIR = BUILD_OUTPUT_DIR + '/downloads'
-PHONE_DATA_DIR = "/data/local/tmp/mace_run"
-MODEL_OUTPUT_DIR_NAME = 'model'
-MODEL_HEADER_DIR_PATH = 'include/mace/public'
-BUILD_TMP_DIR_NAME = '_tmp'
-BUILD_TMP_GENERAL_OUTPUT_DIR_NAME = 'general'
-OUTPUT_LIBRARY_DIR_NAME = 'lib'
-OUTPUT_OPENCL_BINARY_DIR_NAME = 'opencl'
-OUTPUT_OPENCL_BINARY_FILE_NAME = 'compiled_opencl_kernel'
-OUTPUT_OPENCL_PARAMETER_FILE_NAME = 'tuned_opencl_parameter'
-CL_COMPILED_BINARY_FILE_NAME = "mace_cl_compiled_program.bin"
-CL_TUNED_PARAMETER_FILE_NAME = "mace_run.config"
-CODEGEN_BASE_DIR = 'mace/codegen'
-MODEL_CODEGEN_DIR = CODEGEN_BASE_DIR + '/models'
-ENGINE_CODEGEN_DIR = CODEGEN_BASE_DIR + '/engine'
-LIB_CODEGEN_DIR = CODEGEN_BASE_DIR + '/lib'
-LIBMACE_SO_TARGET = "//mace/libmace:libmace.so"
-LIBMACE_STATIC_TARGET = "//mace/libmace:libmace_static"
-LIBMACE_STATIC_PATH = "bazel-genfiles/mace/libmace/libmace.a"
-LIBMACE_DYNAMIC_PATH = "bazel-bin/mace/libmace/libmace.so"
-MODEL_LIB_TARGET = "//mace/codegen:generated_models"
-MODEL_LIB_PATH = "bazel-genfiles/mace/codegen/libgenerated_models.a"
-MACE_RUN_STATIC_NAME = "mace_run_static"
-MACE_RUN_DYNAMIC_NAME = "mace_run_dynamic"
-MACE_RUN_STATIC_TARGET = "//mace/tools/validation:" + MACE_RUN_STATIC_NAME
-MACE_RUN_DYNAMIC_TARGET = "//mace/tools/validation:" + MACE_RUN_DYNAMIC_NAME
-EXAMPLE_STATIC_NAME = "example_static"
-EXAMPLE_DYNAMIC_NAME = "example_dynamic"
-EXAMPLE_STATIC_TARGET = "//mace/examples/cli:" + EXAMPLE_STATIC_NAME
-EXAMPLE_DYNAMIC_TARGET = "//mace/examples/cli:" + EXAMPLE_DYNAMIC_NAME
-BM_MODEL_STATIC_NAME = "benchmark_model_static"
-BM_MODEL_DYNAMIC_NAME = "benchmark_model_dynamic"
-BM_MODEL_STATIC_TARGET = "//mace/benchmark:" + BM_MODEL_STATIC_NAME
-BM_MODEL_DYNAMIC_TARGET = "//mace/benchmark:" + BM_MODEL_DYNAMIC_NAME
-DEVICE_INTERIOR_DIR = PHONE_DATA_DIR + "/interior"
-BUILD_TMP_OPENCL_BIN_DIR = 'opencl_bin'
-ALL_SOC_TAG = 'all'

 ABITypeStrs = [
    'armeabi-v7a',
    'arm64-v8a',
+    'arm64',
+    'armhf',
    'host',
 ]

-
-class ABIType(object):
-    armeabi_v7a = 'armeabi-v7a'
-    arm64_v8a = 'arm64-v8a'
-    host = 'host'
-
-
 ModelFormatStrs = [
    "file",
    "code",
 ]

-
-class MACELibType(object):
-    static = 0
-    dynamic = 1
-
-
 PlatformTypeStrs = [
    "tensorflow",
    "caffe",
@@ -121,14 +66,6 @@ RuntimeTypeStrs = [
    "cpu+gpu"
 ]

-
-class RuntimeType(object):
-    cpu = 'cpu'
-    gpu = 'gpu'
-    dsp = 'dsp'
-    cpu_gpu = 'cpu+gpu'
-
-
 InputDataTypeStrs = [
    "int32",
    "float32",
@@ -174,49 +111,11 @@ class DefaultValues(object):
    gpu_priority_hint = 3,


-class YAMLKeyword(object):
-    library_name = 'library_name'
-    target_abis = 'target_abis'
-    target_socs = 'target_socs'
-    model_graph_format = 'model_graph_format'
-    model_data_format = 'model_data_format'
-    models = 'models'
-    platform = 'platform'
-    model_file_path = 'model_file_path'
-    model_sha256_checksum = 'model_sha256_checksum'
-    weight_file_path = 'weight_file_path'
-    weight_sha256_checksum = 'weight_sha256_checksum'
-    subgraphs = 'subgraphs'
-    input_tensors = 'input_tensors'
-    input_shapes = 'input_shapes'
-    input_ranges = 'input_ranges'
-    output_tensors = 'output_tensors'
-    output_shapes = 'output_shapes'
-    check_tensors = 'check_tensors'
-    check_shapes = 'check_shapes'
-    runtime = 'runtime'
-    data_type = 'data_type'
-    input_data_types = 'input_data_types'
-    input_data_formats = 'input_data_formats'
-    output_data_formats = 'output_data_formats'
-    limit_opencl_kernel_time = 'limit_opencl_kernel_time'
-    nnlib_graph_mode = 'nnlib_graph_mode'
-    obfuscate = 'obfuscate'
-    winograd = 'winograd'
-    quantize = 'quantize'
-    quantize_range_file = 'quantize_range_file'
-    change_concat_ranges = 'change_concat_ranges'
-    validation_inputs_data = 'validation_inputs_data'
-    validation_threshold = 'validation_threshold'
-    graph_optimize_options = 'graph_optimize_options'  # internal use for now
-    cl_mem_type = 'cl_mem_type'
-
-
-class ModuleName(object):
-    YAML_CONFIG = 'YAML CONFIG'
-    MODEL_CONVERTER = 'Model Converter'
-    RUN = 'RUN'
-    BENCHMARK = 'Benchmark'
+class ValidationThreshold(object):
+    cpu_threshold = 0.999,
+    gpu_threshold = 0.995,
+    hexagon_threshold = 0.930,
+    cpu_quantize_threshold = 0.980,


 CPP_KEYWORDS = [
@@ -260,7 +159,7 @@ def parse_device_type(runtime):
 def get_hexagon_mode(configs):
    runtime_list = []
    for model_name in configs[YAMLKeyword.models]:
-        model_runtime =\
+        model_runtime = \
            configs[YAMLKeyword.models][model_name].get(
                YAMLKeyword.runtime, "")
        runtime_list.append(model_runtime.lower())
@@ -273,7 +172,7 @@ def get_hexagon_mode(configs):
 def get_opencl_mode(configs):
    runtime_list = []
    for model_name in configs[YAMLKeyword.models]:
-        model_runtime =\
+        model_runtime = \
            configs[YAMLKeyword.models][model_name].get(
                YAMLKeyword.runtime, "")
        runtime_list.append(model_runtime.lower())
@@ -331,7 +230,7 @@ def format_model_config(flags):
    target_socs = configs.get(YAMLKeyword.target_socs, "")
    if flags.target_socs:
        configs[YAMLKeyword.target_socs] = \
-               [soc.lower() for soc in flags.target_socs.split(',')]
+            [soc.lower() for soc in flags.target_socs.split(',')]
    elif not target_socs:
        configs[YAMLKeyword.target_socs] = []
    elif not isinstance(target_socs, list):
@@ -347,7 +246,9 @@ def format_model_config(flags):
        if ALL_SOC_TAG in target_socs:
            mace_check(available_socs,
                       ModuleName.YAML_CONFIG,
-                       "Build for all SOCs plugged in computer, "
+                       "Android abi is listed in config file and "
+                       "build for all SOCs plugged in computer, "
+                       "But no android phone found, "
                       "you at least plug in one phone")
        else:
            for soc in target_socs:
@@ -412,7 +313,7 @@ def format_model_config(flags):

        weight_file_path = model_config.get(YAMLKeyword.weight_file_path, "")
        if weight_file_path:
-            weight_checksum =\
+            weight_checksum = \
                model_config.get(YAMLKeyword.weight_sha256_checksum, "")
            mace_check(weight_checksum != "", ModuleName.YAML_CONFIG,
                       "'%s' is necessary" %
@@ -538,14 +439,15 @@ def format_model_config(flags):
                YAMLKeyword.validation_threshold, {})
            if not isinstance(validation_threshold, dict):
                raise argparse.ArgumentTypeError(
-                        'similarity threshold must be a dict.')
+                    'similarity threshold must be a dict.')

            threshold_dict = {
-                    DeviceType.CPU: 0.999,
-                    DeviceType.GPU: 0.995,
-                    DeviceType.HEXAGON: 0.930,
-                    DeviceType.CPU + "_QUANTIZE": 0.980,
-                    }
+                DeviceType.CPU: ValidationThreshold.cpu_threshold,
+                DeviceType.GPU: ValidationThreshold.gpu_threshold,
+                DeviceType.HEXAGON: ValidationThreshold.hexagon_threshold,
+                DeviceType.CPU + "_QUANTIZE":
+                    ValidationThreshold.cpu_quantize_threshold,
+            }
            for k, v in six.iteritems(validation_threshold):
                if k.upper() == 'DSP':
                    k = DeviceType.HEXAGON
@@ -554,7 +456,7 @@ def format_model_config(flags):
                                     DeviceType.HEXAGON,
                                     DeviceType.CPU + "_QUANTIZE"):
                    raise argparse.ArgumentTypeError(
-                            'Unsupported validation threshold runtime: %s' % k)
+                        'Unsupported validation threshold runtime: %s' % k)
                threshold_dict[k.upper()] = v

            subgraph[YAMLKeyword.validation_threshold] = threshold_dict
@@ -573,7 +475,7 @@ def format_model_config(flags):
                subgraph[YAMLKeyword.input_ranges] = [input_ranges]
            else:
                subgraph[YAMLKeyword.input_ranges] = input_ranges
-            subgraph[YAMLKeyword.input_ranges] =\
+            subgraph[YAMLKeyword.input_ranges] = \
                [str(v) for v in subgraph[YAMLKeyword.input_ranges]]

        for key in [YAMLKeyword.limit_opencl_kernel_time,
@@ -598,67 +500,6 @@ def format_model_config(flags):
    return configs


-def get_build_binary_dir(library_name, target_abi):
-    return "%s/%s/%s/%s" % (
-        BUILD_OUTPUT_DIR, library_name, BUILD_TMP_DIR_NAME, target_abi)
-
-
-def get_build_model_dirs(library_name, model_name, target_abi, target_soc,
-                         serial_num, model_file_path):
-    model_path_digest = md5sum(model_file_path)
-    model_output_base_dir = "%s/%s/%s/%s/%s" % (
-        BUILD_OUTPUT_DIR, library_name, BUILD_TMP_DIR_NAME,
-        model_name, model_path_digest)
-
-    if target_abi == ABIType.host:
-        model_output_dir = "%s/%s" % (model_output_base_dir, target_abi)
-    elif not target_soc or not serial_num:
-        model_output_dir = "%s/%s/%s" % (
-            model_output_base_dir, BUILD_TMP_GENERAL_OUTPUT_DIR_NAME,
-            target_abi)
-    else:
-        device_name = \
-            sh_commands.adb_get_device_name_by_serialno(serial_num)
-        model_output_dir = "%s/%s_%s/%s" % (
-            model_output_base_dir, device_name,
-            target_soc, target_abi)
-
-    mace_model_dir = \
-        '%s/%s/%s' % (BUILD_OUTPUT_DIR, library_name, MODEL_OUTPUT_DIR_NAME)
-
-    return model_output_base_dir, model_output_dir, mace_model_dir
-
-
-def get_opencl_binary_output_path(library_name, target_abi,
-                                  target_soc, serial_num):
-    device_name = \
-        sh_commands.adb_get_device_name_by_serialno(serial_num)
-    return '%s/%s/%s/%s/%s_%s.%s.%s.bin' % \
-           (BUILD_OUTPUT_DIR,
-            library_name,
-            OUTPUT_OPENCL_BINARY_DIR_NAME,
-            target_abi,
-            library_name,
-            OUTPUT_OPENCL_BINARY_FILE_NAME,
-            device_name,
-            target_soc)
-
-
-def get_opencl_parameter_output_path(library_name, target_abi,
-                                     target_soc, serial_num):
-    device_name = \
-        sh_commands.adb_get_device_name_by_serialno(serial_num)
-    return '%s/%s/%s/%s/%s_%s.%s.%s.bin' % \
-           (BUILD_OUTPUT_DIR,
-            library_name,
-            OUTPUT_OPENCL_BINARY_DIR_NAME,
-            target_abi,
-            library_name,
-            OUTPUT_OPENCL_PARAMETER_FILE_NAME,
-            device_name,
-            target_soc)
-
-
 def clear_build_dirs(library_name):
    # make build dir
    if not os.path.exists(BUILD_OUTPUT_DIR):
@@ -676,27 +517,6 @@ def clear_build_dirs(library_name):
        sh.rm('-rf', lib_output_dir)


-def check_model_converted(library_name, model_name,
-                          model_graph_format, model_data_format,
-                          abi):
-    model_output_dir = \
-        '%s/%s/%s' % (BUILD_OUTPUT_DIR, library_name, MODEL_OUTPUT_DIR_NAME)
-    if model_graph_format == ModelFormat.file:
-        mace_check(os.path.exists("%s/%s.pb" % (model_output_dir, model_name)),
-                   ModuleName.RUN,
-                   "You should convert model first.")
-    else:
-        model_lib_path = get_model_lib_output_path(library_name, abi)
-        mace_check(os.path.exists(model_lib_path),
-                   ModuleName.RUN,
-                   "You should convert model first.")
-    if model_data_format == ModelFormat.file:
-        mace_check(os.path.exists("%s/%s.data" %
-                                  (model_output_dir, model_name)),
-                   ModuleName.RUN,
-                   "You should convert model first.")
-
-
 ################################
 # convert
 ################################
@@ -883,13 +703,6 @@ def convert_model(configs, cl_mem_type):
            StringFormatter.block("Model %s converted" % model_name))


-def get_model_lib_output_path(library_name, abi):
-    lib_output_path = os.path.join(BUILD_OUTPUT_DIR, library_name,
-                                   MODEL_OUTPUT_DIR_NAME, abi,
-                                   "%s.a" % library_name)
-    return lib_output_path
-
-
 def build_model_lib(configs, address_sanitizer):
    MaceLogger.header(StringFormatter.block("Building model library"))

@@ -902,10 +715,11 @@ def build_model_lib(configs, address_sanitizer):
        library_out_dir = os.path.dirname(model_lib_output_path)
        if not os.path.exists(library_out_dir):
            os.makedirs(library_out_dir)
-
+        toolchain = infer_toolchain(target_abi)
        sh_commands.bazel_build(
            MODEL_LIB_TARGET,
            abi=target_abi,
+            toolchain=toolchain,
            hexagon_mode=hexagon_mode,
            enable_opencl=get_opencl_mode(configs),
            enable_quantize=get_quantize_mode(configs),
@@ -994,8 +808,8 @@ def report_run_statistics(stdout,
        f.write(data_str)


-def build_mace_run(configs, target_abi, enable_openmp, address_sanitizer,
-                   mace_lib_type):
+def build_mace_run(configs, target_abi, toolchain, enable_openmp,
+                   address_sanitizer, mace_lib_type):
    library_name = configs[YAMLKeyword.library_name]
    hexagon_mode = get_hexagon_mode(configs)

@@ -1019,6 +833,7 @@ def build_mace_run(configs, target_abi, enable_openmp, address_sanitizer,
    sh_commands.bazel_build(
        mace_run_target,
        abi=target_abi,
+        toolchain=toolchain,
        hexagon_mode=hexagon_mode,
        enable_openmp=enable_openmp,
        enable_opencl=get_opencl_mode(configs),
@@ -1031,8 +846,8 @@ def build_mace_run(configs, target_abi, enable_openmp, address_sanitizer,
                                       mace_lib_type == MACELibType.dynamic)


-def build_example(configs, target_abi, enable_openmp, address_sanitizer,
-                  mace_lib_type):
+def build_example(configs, target_abi, toolchain,
+                  enable_openmp, mace_lib_type):
    library_name = configs[YAMLKeyword.library_name]
    hexagon_mode = get_hexagon_mode(configs)

@@ -1042,6 +857,7 @@ def build_example(configs, target_abi, enable_openmp, address_sanitizer,
    os.makedirs(build_tmp_binary_dir)

    symbol_hidden = True
+
    libmace_target = LIBMACE_STATIC_TARGET
    if mace_lib_type == MACELibType.dynamic:
        symbol_hidden = False
@@ -1049,11 +865,12 @@ def build_example(configs, target_abi, enable_openmp, address_sanitizer,

    sh_commands.bazel_build(libmace_target,
                            abi=target_abi,
+                            toolchain=toolchain,
                            enable_openmp=enable_openmp,
                            enable_opencl=get_opencl_mode(configs),
                            enable_quantize=get_quantize_mode(configs),
                            hexagon_mode=hexagon_mode,
-                            address_sanitizer=address_sanitizer,
+                            address_sanitizer=flags.address_sanitizer,
                            symbol_hidden=symbol_hidden)

    if os.path.exists(LIB_CODEGEN_DIR):
@@ -1079,11 +896,12 @@ def build_example(configs, target_abi, enable_openmp, address_sanitizer,

    sh_commands.bazel_build(example_target,
                            abi=target_abi,
+                            toolchain=toolchain,
                            enable_openmp=enable_openmp,
                            enable_opencl=get_opencl_mode(configs),
                            enable_quantize=get_quantize_mode(configs),
                            hexagon_mode=hexagon_mode,
-                            address_sanitizer=address_sanitizer,
+                            address_sanitizer=flags.address_sanitizer,
                            extra_args=build_arg)

    target_bin = "/".join(sh_commands.bazel_target_to_bin(example_target))
@@ -1092,296 +910,6 @@ def build_example(configs, target_abi, enable_openmp, address_sanitizer,
        sh.rm("-rf", LIB_CODEGEN_DIR)


-def tuning(library_name, model_name, model_config,
-           model_graph_format, model_data_format,
-           target_abi, target_soc, serial_num,
-           mace_lib_type):
-    six.print_('* Tuning, it may take some time...')
-
-    build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi)
-    mace_run_name = MACE_RUN_STATIC_NAME
-    link_dynamic = False
-    if mace_lib_type == MACELibType.dynamic:
-        mace_run_name = MACE_RUN_DYNAMIC_NAME
-        link_dynamic = True
-
-    embed_model_data = model_data_format == ModelFormat.code
-
-    model_output_base_dir, model_output_dir, mace_model_dir = \
-        get_build_model_dirs(library_name, model_name, target_abi,
-                             target_soc, serial_num,
-                             model_config[YAMLKeyword.model_file_path])
-
-    # build for specified soc
-    sh_commands.clear_phone_data_dir(serial_num, PHONE_DATA_DIR)
-
-    subgraphs = model_config[YAMLKeyword.subgraphs]
-    # generate input data
-    sh_commands.gen_random_input(
-        model_output_dir,
-        subgraphs[0][YAMLKeyword.input_tensors],
-        subgraphs[0][YAMLKeyword.input_shapes],
-        subgraphs[0][YAMLKeyword.validation_inputs_data],
-        input_ranges=subgraphs[0][YAMLKeyword.input_ranges],
-        input_data_types=subgraphs[0][YAMLKeyword.input_data_types])
-
-    sh_commands.tuning_run(
-        abi=target_abi,
-        serialno=serial_num,
-        target_dir=build_tmp_binary_dir,
-        target_name=mace_run_name,
-        vlog_level=0,
-        embed_model_data=embed_model_data,
-        model_output_dir=model_output_dir,
-        input_nodes=subgraphs[0][YAMLKeyword.input_tensors],
-        output_nodes=subgraphs[0][YAMLKeyword.output_tensors],
-        input_shapes=subgraphs[0][YAMLKeyword.input_shapes],
-        output_shapes=subgraphs[0][YAMLKeyword.output_shapes],
-        mace_model_dir=mace_model_dir,
-        model_tag=model_name,
-        device_type=DeviceType.GPU,
-        running_round=0,
-        restart_round=1,
-        limit_opencl_kernel_time=model_config[YAMLKeyword.limit_opencl_kernel_time],  # noqa
-        tuning=True,
-        out_of_range_check=False,
-        phone_data_dir=PHONE_DATA_DIR,
-        model_graph_format=model_graph_format,
-        opencl_binary_file="",
-        opencl_parameter_file="",
-        libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH,
-        link_dynamic=link_dynamic,
-    )
-    # pull opencl binary
-    sh_commands.pull_file_from_device(
-        serial_num,
-        DEVICE_INTERIOR_DIR,
-        CL_COMPILED_BINARY_FILE_NAME,
-        "%s/%s" % (model_output_dir, BUILD_TMP_OPENCL_BIN_DIR))
-
-    # pull opencl parameter
-    sh_commands.pull_file_from_device(
-        serial_num,
-        PHONE_DATA_DIR,
-        CL_TUNED_PARAMETER_FILE_NAME,
-        "%s/%s" % (model_output_dir, BUILD_TMP_OPENCL_BIN_DIR))
-
-    six.print_('Tuning done\n')
-
-
-def run_specific_target(flags, configs, target_abi,
-                        target_soc, serial_num):
-    library_name = configs[YAMLKeyword.library_name]
-    mace_lib_type = flags.mace_lib_type
-    embed_model_data = \
-        configs[YAMLKeyword.model_data_format] == ModelFormat.code
-    build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi)
-
-    # get target name for run
-    if flags.example:
-        if mace_lib_type == MACELibType.static:
-            target_name = EXAMPLE_STATIC_NAME
-        else:
-            target_name = EXAMPLE_DYNAMIC_NAME
-    else:
-        if mace_lib_type == MACELibType.static:
-            target_name = MACE_RUN_STATIC_NAME
-        else:
-            target_name = MACE_RUN_DYNAMIC_NAME
-
-    link_dynamic = mace_lib_type == MACELibType.dynamic
-    model_output_dirs = []
-
-    for model_name in configs[YAMLKeyword.models]:
-        check_model_converted(library_name, model_name,
-                              configs[YAMLKeyword.model_graph_format],
-                              configs[YAMLKeyword.model_data_format],
-                              target_abi)
-        if target_abi == ABIType.host:
-            device_name = ABIType.host
-        else:
-            device_name = \
-                sh_commands.adb_get_device_name_by_serialno(serial_num)
-            sh_commands.clear_phone_data_dir(serial_num, PHONE_DATA_DIR)
-
-        MaceLogger.header(
-            StringFormatter.block(
-                "Run model %s on %s" % (model_name, device_name)))
-
-        model_config = configs[YAMLKeyword.models][model_name]
-        model_runtime = model_config[YAMLKeyword.runtime]
-        subgraphs = model_config[YAMLKeyword.subgraphs]
-
-        if not configs[YAMLKeyword.target_socs] or target_abi == ABIType.host:
-            model_output_base_dir, model_output_dir, mace_model_dir = \
-                get_build_model_dirs(library_name, model_name, target_abi,
-                                     None, None,
-                                     model_config[YAMLKeyword.model_file_path])
-        else:
-            model_output_base_dir, model_output_dir, mace_model_dir = \
-                get_build_model_dirs(library_name, model_name, target_abi,
-                                     target_soc, serial_num,
-                                     model_config[YAMLKeyword.model_file_path])
-        # clear temp model output dir
-        if os.path.exists(model_output_dir):
-            sh.rm("-rf", model_output_dir)
-        os.makedirs(model_output_dir)
-
-        is_tuned = False
-        model_opencl_output_bin_path = ""
-        model_opencl_parameter_path = ""
-        # tuning for specified soc
-        if not flags.address_sanitizer \
-                and not flags.example \
-                and target_abi != ABIType.host \
-                and configs[YAMLKeyword.target_socs] \
-                and target_soc \
-                and model_runtime in [RuntimeType.gpu, RuntimeType.cpu_gpu] \
-                and not flags.disable_tuning:
-            tuning(library_name, model_name, model_config,
-                   configs[YAMLKeyword.model_graph_format],
-                   configs[YAMLKeyword.model_data_format],
-                   target_abi, target_soc, serial_num,
-                   mace_lib_type)
-            model_output_dirs.append(model_output_dir)
-            model_opencl_output_bin_path =\
-                "%s/%s/%s" % (model_output_dir,
-                              BUILD_TMP_OPENCL_BIN_DIR,
-                              CL_COMPILED_BINARY_FILE_NAME)
-            model_opencl_parameter_path = \
-                "%s/%s/%s" % (model_output_dir,
-                              BUILD_TMP_OPENCL_BIN_DIR,
-                              CL_TUNED_PARAMETER_FILE_NAME)
-            sh_commands.clear_phone_data_dir(serial_num, PHONE_DATA_DIR)
-            is_tuned = True
-        elif target_abi != ABIType.host and target_soc:
-            model_opencl_output_bin_path = get_opencl_binary_output_path(
-                library_name, target_abi, target_soc, serial_num
-            )
-            model_opencl_parameter_path = get_opencl_parameter_output_path(
-                library_name, target_abi, target_soc, serial_num
-            )
-
-        # generate input data
-        sh_commands.gen_random_input(
-            model_output_dir,
-            subgraphs[0][YAMLKeyword.input_tensors],
-            subgraphs[0][YAMLKeyword.input_shapes],
-            subgraphs[0][YAMLKeyword.validation_inputs_data],
-            input_ranges=subgraphs[0][YAMLKeyword.input_ranges],
-            input_data_types=subgraphs[0][YAMLKeyword.input_data_types])
-
-        runtime_list = []
-        if target_abi == ABIType.host:
-            runtime_list.extend([RuntimeType.cpu])
-        elif model_runtime == RuntimeType.cpu_gpu:
-            runtime_list.extend([RuntimeType.cpu, RuntimeType.gpu])
-        else:
-            runtime_list.extend([model_runtime])
-        for runtime in runtime_list:
-            device_type = parse_device_type(runtime)
-            # run for specified soc
-            if not subgraphs[0][YAMLKeyword.check_tensors]:
-                output_nodes = subgraphs[0][YAMLKeyword.output_tensors]
-                output_shapes = subgraphs[0][YAMLKeyword.output_shapes]
-            else:
-                output_nodes = subgraphs[0][YAMLKeyword.check_tensors]
-                output_shapes = subgraphs[0][YAMLKeyword.check_shapes]
-            run_output = sh_commands.tuning_run(
-                abi=target_abi,
-                serialno=serial_num,
-                target_dir=build_tmp_binary_dir,
-                target_name=target_name,
-                vlog_level=flags.vlog_level,
-                embed_model_data=embed_model_data,
-                model_output_dir=model_output_dir,
-                input_nodes=subgraphs[0][YAMLKeyword.input_tensors],
-                output_nodes=output_nodes,
-                input_shapes=subgraphs[0][YAMLKeyword.input_shapes],
-                output_shapes=output_shapes,
-                mace_model_dir=mace_model_dir,
-                model_tag=model_name,
-                device_type=device_type,
-                running_round=flags.round,
-                restart_round=flags.restart_round,
-                limit_opencl_kernel_time=model_config[YAMLKeyword.limit_opencl_kernel_time],  # noqa
-                tuning=False,
-                out_of_range_check=flags.gpu_out_of_range_check,
-                phone_data_dir=PHONE_DATA_DIR,
-                model_graph_format=configs[YAMLKeyword.model_graph_format],
-                omp_num_threads=flags.omp_num_threads,
-                cpu_affinity_policy=flags.cpu_affinity_policy,
-                gpu_perf_hint=flags.gpu_perf_hint,
-                gpu_priority_hint=flags.gpu_priority_hint,
-                input_dir=flags.input_dir,
-                output_dir=flags.output_dir,
-                runtime_failure_ratio=flags.runtime_failure_ratio,
-                address_sanitizer=flags.address_sanitizer,
-                opencl_binary_file=model_opencl_output_bin_path,
-                opencl_parameter_file=model_opencl_parameter_path,
-                libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH,
-                link_dynamic=link_dynamic,
-                quantize_stat=flags.quantize_stat,
-            )
-            if flags.validate:
-                model_file_path, weight_file_path = get_model_files(
-                    model_config[YAMLKeyword.model_file_path],
-                    model_config[YAMLKeyword.model_sha256_checksum],
-                    BUILD_DOWNLOADS_DIR,
-                    model_config[YAMLKeyword.weight_file_path],
-                    model_config[YAMLKeyword.weight_sha256_checksum])
-
-                validate_type = device_type
-                if model_config[YAMLKeyword.quantize] == 1 \
-                        and device_type == DeviceType.CPU:
-                    validate_type = device_type + "_QUANTIZE"
-
-                sh_commands.validate_model(
-                    abi=target_abi,
-                    serialno=serial_num,
-                    model_file_path=model_file_path,
-                    weight_file_path=weight_file_path,
-                    platform=model_config[YAMLKeyword.platform],
-                    device_type=device_type,
-                    input_nodes=subgraphs[0][YAMLKeyword.input_tensors],
-                    output_nodes=output_nodes,
-                    input_shapes=subgraphs[0][YAMLKeyword.input_shapes],
-                    output_shapes=output_shapes,
-                    model_output_dir=model_output_dir,
-                    phone_data_dir=PHONE_DATA_DIR,
-                    input_data_types=subgraphs[0][YAMLKeyword.input_data_types],  # noqa
-                    caffe_env=flags.caffe_env,
-                    validation_threshold=subgraphs[0][YAMLKeyword.validation_threshold][validate_type])  # noqa
-            if flags.report and flags.round > 0:
-                tuned = is_tuned and device_type == DeviceType.GPU
-                report_run_statistics(
-                    run_output, target_abi, serial_num,
-                    model_name, device_type, flags.report_dir,
-                    tuned)
-
-    if model_output_dirs:
-        opencl_output_bin_path = get_opencl_binary_output_path(
-            library_name, target_abi, target_soc, serial_num
-        )
-        opencl_parameter_bin_path = get_opencl_parameter_output_path(
-            library_name, target_abi, target_soc, serial_num
-        )
-        # clear opencl output dir
-        if os.path.exists(opencl_output_bin_path):
-            sh.rm('-rf', opencl_output_bin_path)
-        if os.path.exists(opencl_parameter_bin_path):
-            sh.rm('-rf', opencl_parameter_bin_path)
-
-        # merge all models' OpenCL binaries together
-        sh_commands.merge_opencl_binaries(
-            model_output_dirs, CL_COMPILED_BINARY_FILE_NAME,
-            opencl_output_bin_path)
-        # merge all models' OpenCL parameters together
-        sh_commands.merge_opencl_parameters(
-            model_output_dirs, CL_TUNED_PARAMETER_FILE_NAME,
-            opencl_parameter_bin_path)
-
-
 def print_package_summary(package_path):
    title = "Library"
    header = ["key", "value"]
@@ -1398,36 +926,37 @@ def run_mace(flags):
    clear_build_dirs(configs[YAMLKeyword.library_name])

    target_socs = configs[YAMLKeyword.target_socs]
-    if not target_socs or ALL_SOC_TAG in target_socs:
-        target_socs = sh_commands.adb_get_all_socs()
-
+    device_list = DeviceManager.list_devices(flags.device_yml)
+    if target_socs and ALL_SOC_TAG not in target_socs:
+        device_list = [dev for dev in device_list
+                       if dev[YAMLKeyword.target_socs].lower() in target_socs]
    for target_abi in configs[YAMLKeyword.target_abis]:
        # build target
-        if flags.example:
-            build_example(configs, target_abi,
-                          not flags.disable_openmp,
-                          flags.address_sanitizer,
-                          flags.mace_lib_type)
-        else:
-            build_mace_run(configs, target_abi,
-                           not flags.disable_openmp,
-                           flags.address_sanitizer,
-                           flags.mace_lib_type)
-
-        # run
-        if target_abi == ABIType.host:
-            run_specific_target(flags, configs, target_abi, None, None)
-        else:
-            for target_soc in target_socs:
-                serial_nums = \
-                    sh_commands.get_target_socs_serialnos([target_soc])
-                mace_check(serial_nums,
-                           ModuleName.RUN,
-                           'There is no device with soc: ' + target_soc)
-                for serial_num in serial_nums:
-                    with sh_commands.device_lock(serial_num):
-                        run_specific_target(flags, configs, target_abi,
-                                            target_soc, serial_num)
+        for dev in device_list:
+            if target_abi in dev[YAMLKeyword.target_abis]:
+                # get toolchain
+                toolchain = infer_toolchain(target_abi)
+                if flags.example:
+                    build_example(configs,
+                                  target_abi,
+                                  toolchain,
+                                  not flags.disable_openmp,
+                                  flags.mace_lib_type)
+                else:
+                    build_mace_run(configs,
+                                   target_abi,
+                                   toolchain,
+                                   not flags.disable_openmp,
+                                   flags.address_sanitizer,
+                                   flags.mace_lib_type)
+                # run
+                device = DeviceWrapper(dev)
+                with device.lock():
+                    device.run_specify_abi(flags, configs, target_abi)
+            elif dev[YAMLKeyword.device_name] != SystemType.host:
+                six.print_('The device with soc %s do not support abi %s' %
+                           (dev[YAMLKeyword.target_socs], target_abi),
+                           file=sys.stderr)

    # package the output files
    package_path = sh_commands.packaging_lib(BUILD_OUTPUT_DIR,
@@ -1438,7 +967,11 @@ def run_mace(flags):
 ################################
 #  benchmark model
 ################################
-def build_benchmark_model(configs, target_abi, enable_openmp, mace_lib_type):
+def build_benchmark_model(configs,
+                          target_abi,
+                          toolchain,
+                          enable_openmp,
+                          mace_lib_type):
    library_name = configs[YAMLKeyword.library_name]
    hexagon_mode = get_hexagon_mode(configs)

@@ -1459,6 +992,7 @@ def build_benchmark_model(configs, target_abi, enable_openmp, mace_lib_type):

    sh_commands.bazel_build(benchmark_target,
                            abi=target_abi,
+                            toolchain=toolchain,
                            enable_openmp=enable_openmp,
                            enable_opencl=get_opencl_mode(configs),
                            enable_quantize=get_quantize_mode(configs),
@@ -1475,133 +1009,34 @@ def build_benchmark_model(configs, target_abi, enable_openmp, mace_lib_type):
    sh.cp("-f", target_bin, build_tmp_binary_dir)


-def bm_specific_target(flags, configs, target_abi, target_soc, serial_num):
-    library_name = configs[YAMLKeyword.library_name]
-    embed_model_data = \
-        configs[YAMLKeyword.model_data_format] == ModelFormat.code
-    opencl_output_bin_path = ""
-    opencl_parameter_path = ""
-    link_dynamic = flags.mace_lib_type == MACELibType.dynamic
-
-    if link_dynamic:
-        bm_model_binary_name = BM_MODEL_DYNAMIC_NAME
-    else:
-        bm_model_binary_name = BM_MODEL_STATIC_NAME
-    build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi)
-
-    if configs[YAMLKeyword.target_socs] and target_abi != ABIType.host:
-        opencl_output_bin_path = get_opencl_binary_output_path(
-            library_name, target_abi, target_soc, serial_num
-        )
-        opencl_parameter_path = get_opencl_parameter_output_path(
-            library_name, target_abi, target_soc, serial_num
-        )
-
-    for model_name in configs[YAMLKeyword.models]:
-        check_model_converted(library_name, model_name,
-                              configs[YAMLKeyword.model_graph_format],
-                              configs[YAMLKeyword.model_data_format],
-                              target_abi)
-        if target_abi == ABIType.host:
-            device_name = ABIType.host
-        else:
-            device_name = \
-                sh_commands.adb_get_device_name_by_serialno(serial_num)
-        MaceLogger.header(
-            StringFormatter.block(
-                "Benchmark model %s on %s" % (model_name, device_name)))
-        model_config = configs[YAMLKeyword.models][model_name]
-        model_runtime = model_config[YAMLKeyword.runtime]
-        subgraphs = model_config[YAMLKeyword.subgraphs]
-
-        if not configs[YAMLKeyword.target_socs] or target_abi == ABIType.host:
-            model_output_base_dir, model_output_dir, mace_model_dir = \
-                get_build_model_dirs(library_name, model_name, target_abi,
-                                     None, None,
-                                     model_config[YAMLKeyword.model_file_path])
-        else:
-            model_output_base_dir, model_output_dir, mace_model_dir = \
-                get_build_model_dirs(library_name, model_name, target_abi,
-                                     target_soc, serial_num,
-                                     model_config[YAMLKeyword.model_file_path])
-        if os.path.exists(model_output_dir):
-            sh.rm("-rf", model_output_dir)
-        os.makedirs(model_output_dir)
-
-        if target_abi != ABIType.host:
-            sh_commands.clear_phone_data_dir(serial_num, PHONE_DATA_DIR)
-
-        sh_commands.gen_random_input(
-            model_output_dir,
-            subgraphs[0][YAMLKeyword.input_tensors],
-            subgraphs[0][YAMLKeyword.input_shapes],
-            subgraphs[0][YAMLKeyword.validation_inputs_data],
-            input_ranges=subgraphs[0][YAMLKeyword.input_ranges],
-            input_data_types=subgraphs[0][YAMLKeyword.input_data_types])
-        runtime_list = []
-        if target_abi == ABIType.host:
-            runtime_list.extend([RuntimeType.cpu])
-        elif model_runtime == RuntimeType.cpu_gpu:
-            runtime_list.extend([RuntimeType.cpu, RuntimeType.gpu])
-        else:
-            runtime_list.extend([model_runtime])
-        for runtime in runtime_list:
-            device_type = parse_device_type(runtime)
-            sh_commands.benchmark_model(
-                abi=target_abi,
-                serialno=serial_num,
-                benchmark_binary_dir=build_tmp_binary_dir,
-                benchmark_binary_name=bm_model_binary_name,
-                vlog_level=0,
-                embed_model_data=embed_model_data,
-                model_output_dir=model_output_dir,
-                input_nodes=subgraphs[0][YAMLKeyword.input_tensors],
-                output_nodes=subgraphs[0][YAMLKeyword.output_tensors],
-                input_shapes=subgraphs[0][YAMLKeyword.input_shapes],
-                output_shapes=subgraphs[0][YAMLKeyword.output_shapes],
-                mace_model_dir=mace_model_dir,
-                model_tag=model_name,
-                device_type=device_type,
-                phone_data_dir=PHONE_DATA_DIR,
-                model_graph_format=configs[YAMLKeyword.model_graph_format],
-                omp_num_threads=flags.omp_num_threads,
-                cpu_affinity_policy=flags.cpu_affinity_policy,
-                gpu_perf_hint=flags.gpu_perf_hint,
-                gpu_priority_hint=flags.gpu_priority_hint,
-                opencl_binary_file=opencl_output_bin_path,
-                opencl_parameter_file=opencl_parameter_path,
-                libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH,
-                link_dynamic=link_dynamic)
-
-
 def benchmark_model(flags):
    configs = format_model_config(flags)

    clear_build_dirs(configs[YAMLKeyword.library_name])

    target_socs = configs[YAMLKeyword.target_socs]
-    if not target_socs or ALL_SOC_TAG in target_socs:
-        target_socs = sh_commands.adb_get_all_socs()
+    device_list = DeviceManager.list_devices(flags.device_yml)
+    if target_socs and ALL_SOC_TAG not in target_socs:
+        device_list = [dev for dev in device_list
+                       if dev[YAMLKeyword.target_socs].lower() in target_socs]

    for target_abi in configs[YAMLKeyword.target_abis]:
        # build benchmark_model binary
-        build_benchmark_model(configs, target_abi,
-                              not flags.disable_openmp,
-                              flags.mace_lib_type)
-
-        if target_abi == ABIType.host:
-            bm_specific_target(flags, configs, target_abi, None, None)
-        else:
-            for target_soc in target_socs:
-                serial_nums = \
-                    sh_commands.get_target_socs_serialnos([target_soc])
-                mace_check(serial_nums,
-                           ModuleName.BENCHMARK,
-                           'There is no device with soc: ' + target_soc)
-                for serial_num in serial_nums:
-                    with sh_commands.device_lock(serial_num):
-                        bm_specific_target(flags, configs, target_abi,
-                                           target_soc, serial_num)
+        for dev in device_list:
+            if target_abi in dev[YAMLKeyword.target_abis]:
+                toolchain = infer_toolchain(target_abi)
+                build_benchmark_model(configs,
+                                      target_abi,
+                                      toolchain,
+                                      not flags.disable_openmp,
+                                      flags.mace_lib_type)
+                device = DeviceWrapper(dev)
+                with device.lock():
+                    device.bm_specific_target(flags, configs, target_abi)
+            else:
+                six.print_('There is no abi %s with soc %s' %
+                           (target_abi, dev[YAMLKeyword.target_socs]),
+                           file=sys.stderr)


 ################################
@@ -1698,7 +1133,12 @@ def parse_args():
        type=int,
        default=DefaultValues.gpu_priority_hint,
        help="0:DEFAULT/1:LOW/2:NORMAL/3:HIGH")
-
+    run_bm_parent_parser.add_argument(
+        "--device_yml",
+        type=str,
+        default='',
+        help='embedded linux device config yml file'
+    )
    parser = argparse.ArgumentParser()
    subparsers = parser.add_subparsers()
    convert = subparsers.add_parser(

--- a/tools/device.py
+++ b/tools/device.py
+# Copyright 2018 Xiaomi, Inc.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import socket
+import subprocess
+import time
+
+import six
+import sh
+import yaml
+
+import common
+from common import *
+
+import sh_commands
+
+
+class DeviceWrapper:
+    allow_scheme = ('ssh', 'adb')
+
+    def __init__(self, device_dict):
+        """
+        init device with device dict info
+        :type device_dict: Device
+        :param device_dict: a key-value dict that holds the device information,
+                       which attribute has:
+                       device_name, target_abis, target_socs, system,
+                        address, username
+        """
+        diff = set(device_dict.keys()) - set(YAMLKeyword.__dict__.keys())
+        if len(diff) > 0:
+            six.print_('Wrong key detected: ')
+            six.print_(diff)
+            raise KeyError(str(diff))
+        self.__dict__.update(device_dict)
+        if self.system == SystemType.android:
+            self.data_dir = PHONE_DATA_DIR
+            self.interior_dir = self.data_dir + '/interior'
+        elif self.system == SystemType.arm_linux:
+            try:
+                sh.ssh('-q', '{}@{}'.format(self.username, self.address),
+                       'exit')
+            except sh.ErrorReturnCode as e:
+                six.print_('device connect failed, '
+                           'please check your authentication')
+                raise e
+            self.data_dir = DEVICE_DATA_DIR
+            self.interior_dir = self.data_dir + '/interior'
+
+    ##################
+    #  internal use  #
+    ##################
+
+    def exec_command(self, command, *args, **kwargs):
+        if self.system == SystemType.android:
+            sh.adb('-s', self.address, 'shell', command, *args, **kwargs)
+        elif self.system == SystemType.arm_linux:
+            sh.ssh('{}@{}'.format(self.username, self.address),
+                   command, *args, **kwargs)
+
+    #####################
+    #  public interface #
+    #####################
+
+    def is_lock(self):
+        return sh_commands.is_device_locked(self.address)
+
+    def lock(self):
+        return sh_commands.device_lock(self.address)
+
+    def clear_data_dir(self):
+        if self.system == SystemType.android:
+            sh_commands.clear_phone_data_dir(self.address, PHONE_DATA_DIR)
+        elif self.system == SystemType.arm_linux:
+            self.exec_command('rm -rf {}'.format(self.data_dir))
+
+    def pull_from_data_dir(self, filename, dst_path):
+        if self.system == SystemType.android:
+            self.pull(PHONE_DATA_DIR, filename, dst_path)
+        elif self.system == SystemType.arm_linux:
+            self.pull(DEVICE_DATA_DIR, filename, dst_path)
+
+    def create_internal_storage_dir(self):
+        internal_storage_dir = '{}/interior/'.format(self.data_dir)
+        if self.system == SystemType.android:
+            sh_commands.create_internal_storage_dir(self.address,
+                                                    internal_storage_dir)
+        elif self.system == SystemType.arm_linux:
+            self.exec_command('mkdir -p {}'.format(internal_storage_dir))
+        return internal_storage_dir
+
+    def rm(self, file):
+        if self.system == SystemType.android:
+            sh.adb('-s', self.address, 'shell', 'rm', '-rf', file, _fg=True)
+        elif self.system == SystemType.arm_linux:
+            self.exec_command('rm -rf {}'.format(file), _fg=True)
+
+    def push(self, src_path, dst_path):
+        mace_check(os.path.exists(src_path), "Device",
+                   '{} not found'.format(src_path))
+        six.print_("Push %s to %s" % (src_path, dst_path))
+        if self.system == SystemType.android:
+            sh_commands.adb_push(src_path, dst_path, self.address)
+        elif self.system == SystemType.arm_linux:
+            try:
+                sh.scp(src_path, '{}@{}:{}'.format(self.username,
+                                                   self.address,
+                                                   dst_path))
+            except sh.ErrorReturnCode_1 as e:
+                six.print_('Push Failed !', e, file=sys.stderr)
+                raise e
+
+    def pull(self, src_path, file_name, dst_path='.'):
+        if not os.path.exists(dst_path):
+            sh.mkdir("-p", dst_path)
+        src_file = "%s/%s" % (src_path, file_name)
+        dst_file = "%s/%s" % (dst_path, file_name)
+        if os.path.exists(dst_file):
+            sh.rm('-f', dst_file)
+        six.print_("Pull %s to %s" % (src_path, dst_path))
+        if self.system == SystemType.android:
+            sh_commands.adb_pull(
+                src_file, dst_file, self.address)
+        elif self.system == SystemType.arm_linux:
+            try:
+                sh.scp('-r', '%s@%s:%s' % (self.username,
+                                           self.address,
+                                           src_file),
+                       dst_file)
+            except sh.ErrorReturnCode_1 as e:
+                six.print_("Pull Failed !", file=sys.stderr)
+                raise e
+
+    def tuning_run(self,
+                   abi,
+                   target_dir,
+                   target_name,
+                   vlog_level,
+                   embed_model_data,
+                   model_output_dir,
+                   input_nodes,
+                   output_nodes,
+                   input_shapes,
+                   output_shapes,
+                   mace_model_dir,
+                   model_tag,
+                   device_type,
+                   running_round,
+                   restart_round,
+                   limit_opencl_kernel_time,
+                   tuning,
+                   out_of_range_check,
+                   model_graph_format,
+                   opencl_binary_file,
+                   opencl_parameter_file,
+                   libmace_dynamic_library_path,
+                   omp_num_threads=-1,
+                   cpu_affinity_policy=1,
+                   gpu_perf_hint=3,
+                   gpu_priority_hint=3,
+                   input_file_name='model_input',
+                   output_file_name='model_out',
+                   runtime_failure_ratio=0.0,
+                   address_sanitizer=False,
+                   link_dynamic=False
+                   ):
+        six.print_("* Run '%s' with round=%s, restart_round=%s, tuning=%s, "
+                   "out_of_range_check=%s, omp_num_threads=%s, "
+                   "cpu_affinity_policy=%s, gpu_perf_hint=%s, "
+                   "gpu_priority_hint=%s" %
+                   (model_tag, running_round, restart_round, str(tuning),
+                    str(out_of_range_check), omp_num_threads,
+                    cpu_affinity_policy, gpu_perf_hint, gpu_priority_hint))
+        mace_model_path = ""
+        if model_graph_format == ModelFormat.file:
+            mace_model_path = "%s/%s.pb" % (mace_model_dir, model_tag)
+        if self.system == SystemType.host:
+            libmace_dynamic_lib_path = \
+                os.path.dirname(libmace_dynamic_library_path)
+            p = subprocess.Popen(
+                [
+                    "env",
+                    "LD_LIBRARY_PATH=%s" % libmace_dynamic_lib_path,
+                    "MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
+                    "MACE_RUNTIME_FAILURE_RATIO=%f" % runtime_failure_ratio,
+                    "%s/%s" % (target_dir, target_name),
+                    "--model_name=%s" % model_tag,
+                    "--input_node=%s" % ",".join(input_nodes),
+                    "--output_node=%s" % ",".join(output_nodes),
+                    "--input_shape=%s" % ":".join(input_shapes),
+                    "--output_shape=%s" % ":".join(output_shapes),
+                    "--input_file=%s/%s" % (model_output_dir,
+                                            input_file_name),
+                    "--output_file=%s/%s" % (model_output_dir,
+                                             output_file_name),
+                    "--model_data_file=%s/%s.data" % (mace_model_dir,
+                                                      model_tag),
+                    "--device=%s" % device_type,
+                    "--round=%s" % running_round,
+                    "--restart_round=%s" % restart_round,
+                    "--omp_num_threads=%s" % omp_num_threads,
+                    "--cpu_affinity_policy=%s" % cpu_affinity_policy,
+                    "--gpu_perf_hint=%s" % gpu_perf_hint,
+                    "--gpu_priority_hint=%s" % gpu_priority_hint,
+                    "--model_file=%s" % mace_model_path,
+                ],
+                stderr=subprocess.PIPE,
+                stdout=subprocess.PIPE)
+            out, err = p.communicate()
+            self.stdout = err + out
+            six.print_(self.stdout)
+            six.print_("Running finished!\n")
+        elif self.system in [SystemType.android, SystemType.arm_linux]:
+            self.rm(self.data_dir)
+            self.exec_command('mkdir -p {}'.format(self.data_dir))
+            internal_storage_dir = self.create_internal_storage_dir()
+
+            for input_name in input_nodes:
+                formatted_name = common.formatted_file_name(input_file_name,
+                                                            input_name)
+                self.push("%s/%s" % (model_output_dir, formatted_name),
+                          self.data_dir)
+            if self.system == SystemType.android and address_sanitizer:
+                self.push(sh_commands.find_asan_rt_library(abi),
+                          self.data_dir)
+
+            if not embed_model_data:
+                model_data_path = "%s/%s.data" % (mace_model_dir, model_tag)
+                mace_check(os.path.exists(model_data_path), "Device",
+                           'model data file not found,'
+                           ' please convert model first')
+                self.push(model_data_path, self.data_dir)
+
+            if device_type == common.DeviceType.GPU:
+                if os.path.exists(opencl_binary_file):
+                    self.push(opencl_binary_file, self.data_dir)
+                if os.path.exists(opencl_parameter_file):
+                    self.push(opencl_parameter_file, self.data_dir)
+
+            self.push("third_party/nnlib/libhexagon_controller.so",
+                      self.data_dir)
+
+            mace_model_phone_path = ""
+            if model_graph_format == ModelFormat.file:
+                mace_model_phone_path = "%s/%s.pb" % (self.data_dir,
+                                                      model_tag)
+                self.push(mace_model_path, mace_model_phone_path)
+            if link_dynamic:
+                self.push(libmace_dynamic_library_path, self.data_dir)
+                if self.system == SystemType.android:
+                    sh_commands.push_depended_so_libs(
+                        libmace_dynamic_library_path, abi, self.data_dir,
+                        self.address)
+            self.push("%s/%s" % (target_dir, target_name), self.data_dir)
+
+            stdout_buff = []
+            process_output = sh_commands.make_output_processor(stdout_buff)
+            cmd = [
+                "LD_LIBRARY_PATH=%s" % self.data_dir,
+                "MACE_TUNING=%s" % int(tuning),
+                "MACE_OUT_OF_RANGE_CHECK=%s" % int(out_of_range_check),
+                "MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
+                "MACE_RUN_PARAMETER_PATH=%s/mace_run.config" % self.data_dir,
+                "MACE_INTERNAL_STORAGE_PATH=%s" % internal_storage_dir,
+                "MACE_LIMIT_OPENCL_KERNEL_TIME=%s" % limit_opencl_kernel_time,
+                "MACE_RUNTIME_FAILURE_RATIO=%f" % runtime_failure_ratio,
+            ]
+            if self.system == SystemType.android and address_sanitizer:
+                cmd.extend([
+                    "LD_PRELOAD=%s/%s" %
+                    (self.data_dir,
+                     sh_commands.asan_rt_library_names(abi))
+                ])
+            cmd.extend([
+                "%s/%s" % (self.data_dir, target_name),
+                "--model_name=%s" % model_tag,
+                "--input_node=%s" % ",".join(input_nodes),
+                "--output_node=%s" % ",".join(output_nodes),
+                "--input_shape=%s" % ":".join(input_shapes),
+                "--output_shape=%s" % ":".join(output_shapes),
+                "--input_file=%s/%s" % (self.data_dir, input_file_name),
+                "--output_file=%s/%s" % (self.data_dir, output_file_name),
+                "--model_data_file=%s/%s.data" % (self.data_dir, model_tag),
+                "--device=%s" % device_type,
+                "--round=%s" % running_round,
+                "--restart_round=%s" % restart_round,
+                "--omp_num_threads=%s" % omp_num_threads,
+                "--cpu_affinity_policy=%s" % cpu_affinity_policy,
+                "--gpu_perf_hint=%s" % gpu_perf_hint,
+                "--gpu_priority_hint=%s" % gpu_priority_hint,
+                "--model_file=%s" % mace_model_phone_path,
+                "--opencl_binary_file=%s/%s" %
+                (self.data_dir, os.path.basename(opencl_binary_file)),
+                "--opencl_parameter_file=%s/%s" %
+                (self.data_dir, os.path.basename(opencl_parameter_file)),
+            ])
+            cmd = ' '.join(cmd)
+            cmd_file_name = "%s-%s-%s" % ('cmd_file',
+                                          model_tag,
+                                          str(time.time()))
+            cmd_file = "%s/%s" % (self.data_dir, cmd_file_name)
+            tmp_cmd_file = "%s/%s" % ('/tmp', cmd_file_name)
+            with open(tmp_cmd_file, 'w') as file:
+                file.write(cmd)
+            self.push(tmp_cmd_file, cmd_file)
+            os.remove(tmp_cmd_file)
+            self.exec_command('sh {}'.format(cmd_file),
+                              _tty_in=True,
+                              _out=process_output,
+                              _err_to_out=True)
+            self.stdout = "".join(stdout_buff)
+            if not sh_commands.stdout_success(self.stdout):
+                common.MaceLogger.error("Mace Run", "Mace run failed.")
+
+            six.print_("Running finished!\n")
+        else:
+            six.print_('Unsupported system %s' % self.system, file=sys.stderr)
+            raise Exception('Wrong device')
+
+        return self.stdout
+
+    def tuning(self, library_name, model_name, model_config,
+               model_graph_format, model_data_format,
+               target_abi, mace_lib_type):
+        six.print_('* Tuning, it may take some time')
+        build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi)
+        mace_run_name = MACE_RUN_STATIC_NAME
+        link_dynamic = False
+        if mace_lib_type == MACELibType.dynamic:
+            mace_run_name = MACE_RUN_DYNAMIC_NAME
+            link_dynamic = True
+        embed_model_data = model_data_format == ModelFormat.code
+
+        # build for specified soc
+        # device_wrapper = DeviceWrapper(device)
+
+        model_output_base_dir, model_output_dir, mace_model_dir = \
+            get_build_model_dirs(
+                library_name, model_name, target_abi, self,
+                model_config[YAMLKeyword.model_file_path])
+
+        self.clear_data_dir()
+
+        subgraphs = model_config[YAMLKeyword.subgraphs]
+        # generate input data
+        sh_commands.gen_random_input(
+            model_output_dir,
+            subgraphs[0][YAMLKeyword.input_tensors],
+            subgraphs[0][YAMLKeyword.input_shapes],
+            subgraphs[0][YAMLKeyword.validation_inputs_data],
+            input_ranges=subgraphs[0][YAMLKeyword.input_ranges],
+            input_data_types=subgraphs[0][YAMLKeyword.input_data_types]
+        )
+
+        self.tuning_run(
+            abi=target_abi,
+            target_dir=build_tmp_binary_dir,
+            target_name=mace_run_name,
+            vlog_level=0,
+            embed_model_data=embed_model_data,
+            model_output_dir=model_output_dir,
+            input_nodes=subgraphs[0][YAMLKeyword.input_tensors],
+            output_nodes=subgraphs[0][YAMLKeyword.output_tensors],
+            input_shapes=subgraphs[0][YAMLKeyword.input_shapes],
+            output_shapes=subgraphs[0][YAMLKeyword.output_shapes],
+            mace_model_dir=mace_model_dir,
+            model_tag=model_name,
+            device_type=DeviceType.GPU,
+            running_round=0,
+            restart_round=1,
+            limit_opencl_kernel_time=model_config[
+                YAMLKeyword.limit_opencl_kernel_time],
+            tuning=True,
+            out_of_range_check=False,
+            model_graph_format=model_graph_format,
+            opencl_binary_file='',
+            opencl_parameter_file='',
+            libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH,
+            link_dynamic=link_dynamic,
+        )
+
+        # pull opencl library
+        self.pull(self.interior_dir, CL_COMPILED_BINARY_FILE_NAME,
+                  '{}/{}'.format(model_output_dir,
+                                 BUILD_TMP_OPENCL_BIN_DIR))
+
+        # pull opencl parameter
+        self.pull_from_data_dir(CL_TUNED_PARAMETER_FILE_NAME,
+                                '{}/{}'.format(model_output_dir,
+                                               BUILD_TMP_OPENCL_BIN_DIR))
+
+        six.print_('Tuning done! \n')
+
+    def run_specify_abi(self, flags, configs, target_abi):
+        if target_abi not in self.target_abis:
+            six.print_('There is no device with soc: %s abi: %s' %
+                       (self.target_socs, target_abi))
+            return
+        library_name = configs[YAMLKeyword.library_name]
+        mace_lib_type = flags.mace_lib_type
+        embed_model_data = \
+            configs[YAMLKeyword.model_data_format] == ModelFormat.code
+        build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi)
+
+        # get target name for run
+        if flags.example:
+            if mace_lib_type == MACELibType.static:
+                target_name = EXAMPLE_STATIC_NAME
+            else:
+                target_name = EXAMPLE_DYNAMIC_NAME
+        else:
+            if mace_lib_type == MACELibType.static:
+                target_name = MACE_RUN_STATIC_NAME
+            else:
+                target_name = MACE_RUN_DYNAMIC_NAME
+        link_dynamic = mace_lib_type == MACELibType.dynamic
+        model_output_dirs = []
+
+        for model_name in configs[YAMLKeyword.models]:
+            check_model_converted(library_name, model_name,
+                                  configs[YAMLKeyword.model_graph_format],
+                                  configs[YAMLKeyword.model_data_format],
+                                  target_abi)
+            if target_abi != ABIType.host:
+                self.clear_data_dir()
+            MaceLogger.header(
+                StringFormatter.block(
+                    'Run model {} on {}'.format(model_name, self.device_name)))
+
+            model_config = configs[YAMLKeyword.models][model_name]
+            model_runtime = model_config[YAMLKeyword.runtime]
+            subgraphs = model_config[YAMLKeyword.subgraphs]
+
+            if not configs[YAMLKeyword.target_socs] \
+                    or target_abi == ABIType.host:
+                model_output_base_dir, model_output_dir, mace_model_dir = \
+                    get_build_model_dirs(
+                        library_name, model_name, target_abi, self,
+                        model_config[YAMLKeyword.model_file_path])
+            else:
+                model_output_base_dir, model_output_dir, mace_model_dir = \
+                    get_build_model_dirs(
+                        library_name, model_name, target_abi, self,
+                        model_config[YAMLKeyword.model_file_path])
+
+            # clear temp model output dir
+            if os.path.exists(model_output_dir):
+                sh.rm('-rf', model_output_dir)
+            os.makedirs(model_output_dir)
+
+            is_tuned = False
+            model_opencl_output_bin_path = ''
+            model_opencl_parameter_path = ''
+            if not flags.address_sanitizer \
+                    and not flags.example \
+                    and target_abi != ABIType.host \
+                    and configs[YAMLKeyword.target_socs] \
+                    and self.target_socs \
+                    and model_runtime in [RuntimeType.gpu,
+                                          RuntimeType.cpu_gpu] \
+                    and not flags.disable_tuning:
+                self.tuning(library_name, model_name, model_config,
+                            configs[YAMLKeyword.model_graph_format],
+                            configs[YAMLKeyword.model_data_format],
+                            target_abi, mace_lib_type)
+                model_output_dirs.append(model_output_dir)
+                model_opencl_output_bin_path = \
+                    '{}/{}/{}'.format(model_output_dir,
+                                      BUILD_TMP_OPENCL_BIN_DIR,
+                                      CL_COMPILED_BINARY_FILE_NAME)
+                model_opencl_parameter_path = \
+                    '{}/{}/{}'.format(model_output_dir,
+                                      BUILD_TMP_OPENCL_BIN_DIR,
+                                      CL_TUNED_PARAMETER_FILE_NAME)
+                self.clear_data_dir()
+                is_tuned = True
+            elif target_abi != ABIType.host and self.target_socs:
+                model_opencl_output_bin_path = get_opencl_binary_output_path(
+                    library_name, target_abi, self
+                )
+                model_opencl_parameter_path = get_opencl_parameter_output_path(
+                    library_name, target_abi, self
+                )
+            sh_commands.gen_random_input(
+                model_output_dir,
+                subgraphs[0][YAMLKeyword.input_tensors],
+                subgraphs[0][YAMLKeyword.input_shapes],
+                subgraphs[0][YAMLKeyword.validation_inputs_data],
+                input_ranges=subgraphs[0][YAMLKeyword.input_ranges],
+                input_data_types=subgraphs[0][YAMLKeyword.input_data_types]
+            )
+            runtime_list = []
+            if target_abi == ABIType.host:
+                runtime_list.append(RuntimeType.cpu)
+            elif model_runtime == RuntimeType.cpu_gpu:
+                runtime_list.extend([RuntimeType.cpu, RuntimeType.gpu])
+            else:
+                runtime_list.append(model_runtime)
+            for runtime in runtime_list:
+                device_type = parse_device_type(runtime)
+                # run for specified soc
+                run_output = self.tuning_run(
+                    abi=target_abi,
+                    target_dir=build_tmp_binary_dir,
+                    target_name=target_name,
+                    vlog_level=flags.vlog_level,
+                    embed_model_data=embed_model_data,
+                    model_output_dir=model_output_dir,
+                    input_nodes=subgraphs[0][YAMLKeyword.input_tensors],
+                    output_nodes=subgraphs[0][YAMLKeyword.output_tensors],
+                    input_shapes=subgraphs[0][YAMLKeyword.input_shapes],
+                    output_shapes=subgraphs[0][YAMLKeyword.output_shapes],
+                    mace_model_dir=mace_model_dir,
+                    model_tag=model_name,
+                    device_type=device_type,
+                    running_round=flags.round,
+                    restart_round=flags.restart_round,
+                    limit_opencl_kernel_time=model_config[
+                        YAMLKeyword.limit_opencl_kernel_time],
+                    tuning=False,
+                    out_of_range_check=flags.gpu_out_of_range_check,
+                    model_graph_format=configs[YAMLKeyword.model_graph_format],
+                    omp_num_threads=flags.omp_num_threads,
+                    cpu_affinity_policy=flags.cpu_affinity_policy,
+                    gpu_perf_hint=flags.gpu_perf_hint,
+                    gpu_priority_hint=flags.gpu_priority_hint,
+                    runtime_failure_ratio=flags.runtime_failure_ratio,
+                    address_sanitizer=flags.address_sanitizer,
+                    opencl_binary_file=model_opencl_output_bin_path,
+                    opencl_parameter_file=model_opencl_parameter_path,
+                    libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH,
+                    link_dynamic=link_dynamic
+                )
+                if flags.validate:
+                    model_file_path, weight_file_path = get_model_files(
+                        model_config[YAMLKeyword.model_file_path],
+                        model_config[YAMLKeyword.model_sha256_checksum],
+                        BUILD_DOWNLOADS_DIR,
+                        model_config[YAMLKeyword.weight_file_path],
+                        model_config[YAMLKeyword.weight_sha256_checksum]
+                    )
+
+                    validate_type = device_type
+                    if model_config[YAMLKeyword.quantize] == 1:
+                        validate_type = device_type + '_QUANTIZE'
+                    sh_commands.validate_model(
+                        abi=target_abi,
+                        device=self,
+                        model_file_path=model_file_path,
+                        weight_file_path=weight_file_path,
+                        platform=model_config[YAMLKeyword.platform],
+                        device_type=device_type,
+                        input_nodes=subgraphs[0][YAMLKeyword.input_tensors],
+                        output_nodes=subgraphs[0][YAMLKeyword.output_tensors],
+                        input_shapes=subgraphs[0][YAMLKeyword.input_shapes],
+                        output_shapes=subgraphs[0][YAMLKeyword.output_shapes],
+                        model_output_dir=model_output_dir,
+                        input_data_types=subgraphs[0][
+                            YAMLKeyword.input_data_types],
+                        caffe_env=flags.caffe_env,
+                        validation_threshold=subgraphs[0][
+                            YAMLKeyword.validation_threshold][validate_type]
+                    )
+                if flags.report and flags.round > 0:
+                    tuned = is_tuned and device_type == DeviceType.GPU
+                    self.report_run_statistics(
+                        target_abi=target_abi,
+                        model_name=model_name,
+                        device_type=device_type,
+                        output_dir=flags.report_dir,
+                        tuned=tuned
+                    )
+        if model_output_dirs:
+            opencl_output_bin_path = get_opencl_binary_output_path(
+                library_name, target_abi, self
+            )
+            opencl_parameter_bin_path = get_opencl_parameter_output_path(
+                library_name, target_abi, self
+            )
+
+            # clear opencl output dir
+            if os.path.exists(opencl_output_bin_path):
+                sh.rm('-rf', opencl_output_bin_path)
+            if os.path.exists(opencl_parameter_bin_path):
+                sh.rm('-rf', opencl_parameter_bin_path)
+
+            # merge all model's opencl binaries together
+            sh_commands.merge_opencl_binaries(
+                model_output_dirs, CL_COMPILED_BINARY_FILE_NAME,
+                opencl_output_bin_path
+            )
+            # merge all model's opencl parameter together
+            sh_commands.merge_opencl_parameters(
+                model_output_dirs, CL_TUNED_PARAMETER_FILE_NAME,
+                opencl_parameter_bin_path
+            )
+
+    def report_run_statistics(self,
+                              target_abi,
+                              model_name,
+                              device_type,
+                              output_dir,
+                              tuned):
+        metrics = [0] * 3
+        for line in self.stdout.split('\n'):
+            line = line.strip()
+            parts = line.split()
+            if len(parts) == 4 and parts[0].startswith('time'):
+                metrics[0] = str(float(parts[1]))
+                metrics[1] = str(float(parts[2]))
+                metrics[2] = str(float(parts[3]))
+                break
+        report_filename = output_dir + '/report.csv'
+        if not os.path.exists(report_filename):
+            with open(report_filename, 'w') as f:
+                f.write('model_name,device_name,soc,abi,runtime,'
+                        'init(ms),warmup(ms),run_avg(ms),tuned\n')
+
+        data_str = '{model_name},{device_name},{soc},{abi},{device_type},' \
+                   '{init},{warmup},{run_avg},{tuned}\n'.format(
+                    model_name=model_name,
+                    device_name=self.device_name,
+                    soc=self.target_socs,
+                    abi=target_abi,
+                    device_type=device_type,
+                    init=metrics[0],
+                    warmup=metrics[1],
+                    run_avg=metrics[2],
+                    tuned=tuned)
+        with open(report_filename, 'a') as f:
+            f.write(data_str)
+
+    def benchmark_model(self,
+                        abi,
+                        benchmark_binary_dir,
+                        benchmark_binary_name,
+                        vlog_level,
+                        embed_model_data,
+                        model_output_dir,
+                        mace_model_dir,
+                        input_nodes,
+                        output_nodes,
+                        input_shapes,
+                        output_shapes,
+                        model_tag,
+                        device_type,
+                        model_graph_format,
+                        opencl_binary_file,
+                        opencl_parameter_file,
+                        libmace_dynamic_library_path,
+                        omp_num_threads=-1,
+                        cpu_affinity_policy=1,
+                        gpu_perf_hint=3,
+                        gpu_priority_hint=3,
+                        input_file_name='model_input',
+                        link_dynamic=False):
+        six.print_('* Benchmark for %s' % model_tag)
+
+        mace_model_path = ''
+        if model_graph_format == ModelFormat.file:
+            mace_model_path = '%s/%s.pb' % (mace_model_dir, model_tag)
+        if abi == ABIType.host:
+            libmace_dynamic_lib_dir_path = \
+                os.path.dirname(libmace_dynamic_library_path)
+            p = subprocess.Popen(
+                [
+                    'env',
+                    'LD_LIBRARY_PATH=%s' % libmace_dynamic_lib_dir_path,
+                    'MACE_CPP_MIN_VLOG_LEVEL=%s' % vlog_level,
+                    '%s/%s' % (benchmark_binary_dir, benchmark_binary_name),
+                    '--model_name=%s' % model_tag,
+                    '--input_node=%s' % ','.join(input_nodes),
+                    '--output_node=%s' % ','.join(output_nodes),
+                    '--input_shape=%s' % ':'.join(input_shapes),
+                    '--output_shapes=%s' % ':'.join(output_shapes),
+                    '--input_file=%s/%s' % (model_output_dir, input_file_name),
+                    '--model_data_file=%s/%s.data' % (mace_model_dir,
+                                                      model_tag),
+                    '--device=%s' % device_type,
+                    '--omp_num_threads=%s' % omp_num_threads,
+                    '--cpu_addinity_policy=%s' % cpu_affinity_policy,
+                    '--gpu_perf_hint=%s' % gpu_perf_hint,
+                    '--gpu_priority_hint=%s' % gpu_priority_hint,
+                    '--model_file=%s' % mace_model_path
+                ])
+            p.wait()
+        elif self.system in [SystemType.android, SystemType.arm_linux]:
+            self.exec_command('mkdir -p %s' % self.data_dir)
+            internal_storage_dir = self.create_internal_storage_dir()
+            for input_name in input_nodes:
+                formatted_name = formatted_file_name(input_file_name,
+                                                     input_name)
+                self.push('%s/%s' % (model_output_dir, formatted_name),
+                          self.data_dir)
+            if not embed_model_data:
+                self.push('%s/%s.data' % (mace_model_dir, model_tag),
+                          self.data_dir)
+            if device_type == common.DeviceType.GPU:
+                if os.path.exists(opencl_binary_file):
+                    self.push(opencl_binary_file, self.data_dir)
+                if os.path.exists(opencl_parameter_file):
+                    self.push(opencl_parameter_file, self.data_dir)
+            mace_model_device_path = ''
+            if model_graph_format == ModelFormat.file:
+                mace_model_device_path = '%s/%s.pb' % \
+                                         (self.data_dir, model_tag)
+                self.push(mace_model_path, mace_model_device_path)
+            if link_dynamic:
+                self.push(libmace_dynamic_library_path, self.data_dir)
+                if self.system == SystemType.android:
+                    sh_commands.push_depended_so_libs(
+                        libmace_dynamic_library_path, abi, self.data_dir,
+                        self.address)
+            self.rm('%s/%s' % (self.data_dir, benchmark_binary_name))
+            self.push('%s/%s' % (benchmark_binary_dir, benchmark_binary_name),
+                      self.data_dir)
+
+            cmd = [
+                'LD_LIBRARY_PATH=%s' % self.data_dir,
+                'MACE_CPP_MIN_VLOG_LEVEL=%s' % vlog_level,
+                'MACE_RUN_PARAMETER_PATH=%s/mace_run.config' % self.data_dir,
+                'MACE_INTERNAL_STORAGE_PATH=%s' % internal_storage_dir,
+                'MACE_OPENCL_PROFILING=1',
+                '%s/%s' % (self.data_dir, benchmark_binary_name),
+                '--model_name=%s' % model_tag,
+                '--input_node=%s' % ','.join(input_nodes),
+                '--output_node=%s' % ','.join(output_nodes),
+                '--input_shape=%s' % ':'.join(input_shapes),
+                '--output_shape=%s' % ':'.join(output_shapes),
+                '--input_file=%s/%s' % (self.data_dir, input_file_name),
+                '--model_data_file=%s/%s.data' % (self.data_dir, model_tag),
+                '--device=%s' % device_type,
+                '--omp_num_threads=%s' % omp_num_threads,
+                '--cpu_affinity_policy=%s' % cpu_affinity_policy,
+                '--gpu_perf_hint=%s' % gpu_perf_hint,
+                '--gpu_priority_hint=%s' % gpu_priority_hint,
+                '--model_file=%s' % mace_model_device_path,
+                '--opencl_binary_file=%s/%s' %
+                (self.data_dir, os.path.basename(opencl_binary_file)),
+                '--opencl_parameter_file=%s/%s' %
+                (self.data_dir, os.path.basename(opencl_parameter_file))
+            ]
+
+            cmd = ' '.join(cmd)
+            cmd_file_name = '%s-%s-%s' % \
+                            ('cmd_file', model_tag, str(time.time()))
+
+            cmd_file_path = '%s/%s' % (self.data_dir, cmd_file_name)
+            tmp_cmd_file = '%s/%s' % ('/tmp', cmd_file_name)
+            with open(tmp_cmd_file, 'w') as f:
+                f.write(cmd)
+            self.push(tmp_cmd_file, cmd_file_path)
+            os.remove(tmp_cmd_file)
+
+            if self.system == SystemType.android:
+                sh.adb('-s', self.address, 'shell', 'sh', cmd_file_path,
+                       _fg=True)
+            elif self.system == SystemType.arm_linux:
+                sh.ssh('%s@%s' % (self.username, self.address),
+                       'sh', cmd_file_path, _fg=True)
+            self.rm(cmd_file_path)
+            six.print_('Benchmark done! \n')
+
+    def bm_specific_target(self, flags, configs, target_abi):
+        library_name = configs[YAMLKeyword.library_name]
+        embed_model_data = \
+            configs[YAMLKeyword.model_data_format] == ModelFormat.code
+        opencl_output_bin_path = ''
+        opencl_parameter_path = ''
+        link_dynamic = flags.mace_lib_type == MACELibType.dynamic
+
+        if link_dynamic:
+            bm_model_binary_name = BM_MODEL_DYNAMIC_NAME
+        else:
+            bm_model_binary_name = BM_MODEL_STATIC_NAME
+        build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi)
+        if configs[YAMLKeyword.target_socs] and target_abi != ABIType.host:
+            opencl_output_bin_path = get_opencl_binary_output_path(
+                library_name, target_abi, self
+            )
+            opencl_parameter_path = get_opencl_parameter_output_path(
+                library_name, target_abi, self
+            )
+
+        for model_name in configs[YAMLKeyword.models]:
+            check_model_converted(library_name,
+                                  model_name,
+                                  configs[YAMLKeyword.model_graph_format],
+                                  configs[YAMLKeyword.model_data_format],
+                                  target_abi)
+            MaceLogger.header(
+                StringFormatter.block(
+                    'Benchmark model %s on %s' % (model_name,
+                                                  self.device_name)))
+            model_config = configs[YAMLKeyword.models][model_name]
+            model_runtime = model_config[YAMLKeyword.runtime]
+            subgraphs = model_config[YAMLKeyword.subgraphs]
+
+            model_output_base_dir, model_output_dir, mace_model_dir = \
+                get_build_model_dirs(library_name, model_name,
+                                     target_abi, self,
+                                     model_config[YAMLKeyword.model_file_path])
+            if os.path.exists(model_output_dir):
+                sh.rm('-rf', model_output_dir)
+            os.makedirs(model_output_dir)
+
+            if target_abi != ABIType.host:
+                self.clear_data_dir()
+            sh_commands.gen_random_input(
+                model_output_dir,
+                subgraphs[0][YAMLKeyword.input_tensors],
+                subgraphs[0][YAMLKeyword.input_shapes],
+                subgraphs[0][YAMLKeyword.validation_inputs_data],
+                input_ranges=subgraphs[0][YAMLKeyword.input_ranges],
+                input_data_types=subgraphs[0][YAMLKeyword.input_data_types]
+            )
+            runtime_list = []
+            if target_abi == ABIType.host:
+                runtime_list.append(RuntimeType.cpu)
+            elif model_runtime == RuntimeType.cpu_gpu:
+                runtime_list.extend([RuntimeType.cpu, RuntimeType.cpu_gpu])
+            else:
+                runtime_list.append(model_runtime)
+            for runtime in runtime_list:
+                device_type = parse_device_type(runtime)
+                self.benchmark_model(
+                    abi=target_abi,
+                    benchmark_binary_dir=build_tmp_binary_dir,
+                    benchmark_binary_name=bm_model_binary_name,
+                    vlog_level=0,
+                    embed_model_data=embed_model_data,
+                    model_output_dir=model_output_dir,
+                    input_nodes=subgraphs[0][YAMLKeyword.input_tensors],
+                    output_nodes=subgraphs[0][YAMLKeyword.output_tensors],
+                    input_shapes=subgraphs[0][YAMLKeyword.input_shapes],
+                    output_shapes=subgraphs[0][YAMLKeyword.output_shapes],
+                    mace_model_dir=mace_model_dir,
+                    model_tag=model_name,
+                    device_type=device_type,
+                    model_graph_format=configs[YAMLKeyword.model_graph_format],
+                    omp_num_threads=flags.omp_num_threads,
+                    cpu_affinity_policy=flags.cpu_affinity_policy,
+                    gpu_perf_hint=flags.gpu_perf_hint,
+                    gpu_priority_hint=flags.gpu_priority_hint,
+                    opencl_binary_file=opencl_output_bin_path,
+                    opencl_parameter_file=opencl_parameter_path,
+                    libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH,
+                    link_dynamic=link_dynamic
+                )
+
+    def run(self,
+            abi,
+            host_bin_path,
+            bin_name,
+            args='',
+            opencl_profiling=True,
+            vlog_level=0,
+            out_of_range_check=True,
+            address_sanitizer=False,
+            simpleperf=False):
+        host_bin_full_path = '%s/%s' % (host_bin_path, bin_name)
+        device_bin_full_path = '%s/%s' % (self.data_dir, bin_name)
+        print(
+            '================================================================'
+        )
+        print('Trying to lock device %s' % self.address)
+        with self.lock():
+            print('Run on device: %s, %s, %s' %
+                  (self.address, self.target_socs, self.device_name))
+            self.rm(self.data_dir)
+            self.exec_command('mkdir -p %s' % self.data_dir)
+            self.push(host_bin_full_path, device_bin_full_path)
+            ld_preload = ''
+            if address_sanitizer:
+                self.push(sh_commands.find_asan_rt_library(abi),
+                          self.data_dir)
+                ld_preload = 'LD_PRELOAD=%s/%s' % \
+                             (self.data_dir,
+                              sh_commands.asan_rt_library_names(abi))
+            opencl_profiling = 1 if opencl_profiling else 0
+            out_of_range_check = 1 if out_of_range_check else 0
+            print('Run %s' % device_bin_full_path)
+            stdout_buf = []
+            process_output = sh_commands.make_output_processor(stdout_buf)
+
+            if simpleperf and self.system == SystemType.android:
+                self.push(sh_commands.find_simpleperf_library(abi),
+                          self.data_dir)
+                simpleperf_cmd = '%s/simpleperf' % self.data_dir
+                exec_cmd = [
+                    ld_preload,
+                    'MACE_OUT_OF_RANGE_CHECK=%s' % out_of_range_check,
+                    'MACE_OPENCL_PROFILING=%d' % opencl_profiling,
+                    'MACE_CPP_MIN_VLOG_LEVEL=%d' % vlog_level,
+                    simpleperf_cmd,
+                    'stat',
+                    '--group',
+                    'raw-l1-dcache,raw-l1-dcache-refill',
+                    '--group',
+                    'raw-l2-dcache,raw-l2-dcache-refill',
+                    '--group',
+                    'raw-l1-dtlb,raw-l1-dtlb-refill',
+                    '--group',
+                    'raw-l2-dtlb,raw-l2-dtlb-refill',
+                    device_bin_full_path,
+                    args,
+                ]
+            else:
+                exec_cmd = [
+                    ld_preload,
+                    'MACE_OUT_OF_RANGE_CHECK=%d' % out_of_range_check,
+                    'MACE_OPENCL_PROFILNG=%d' % opencl_profiling,
+                    'MACE_CPP_MIN_VLOG_LEVEL=%d' % vlog_level,
+                    device_bin_full_path,
+                    args
+                ]
+            exec_cmd = ' '.join(exec_cmd)
+            self.exec_command(exec_cmd, _tty_in=True,
+                              _out=process_output, _err_to_out=True)
+            return ''.join(stdout_buf)
+
+
+class DeviceManager:
+    @classmethod
+    def list_adb_device(cls):
+        adb_list = sh.adb('devices').stdout.decode('utf-8'). \
+                       strip().split('\n')[1:]
+        adb_list = [tuple(pair.split('\t')) for pair in adb_list]
+        devices = []
+        for adb in adb_list:
+            prop = sh_commands.adb_getprop_by_serialno(adb[0])
+            android = {
+                YAMLKeyword.device_name:
+                    prop['ro.product.model'].replace(' ', ''),
+                YAMLKeyword.target_abis:
+                    prop['ro.product.cpu.abilist'].split(','),
+                YAMLKeyword.target_socs: prop['ro.board.platform'],
+                YAMLKeyword.system: SystemType.android,
+                YAMLKeyword.address: adb[0],
+                YAMLKeyword.username: '',
+            }
+            devices.append(android)
+        return devices
+
+    @classmethod
+    def list_ssh_device(cls, yml):
+        with open(yml) as f:
+            devices = yaml.load(f.read())
+        devices = devices['devices']
+        device_list = []
+        for name, dev in six.iteritems(devices):
+            dev[YAMLKeyword.device_name] = \
+                dev[YAMLKeyword.models].replace(' ', '')
+            dev[YAMLKeyword.system] = SystemType.arm_linux
+            device_list.append(dev)
+        return device_list
+
+    @classmethod
+    def list_devices(cls, yml):
+        devices_list = []
+        devices_list.extend(cls.list_adb_device())
+        if not yml:
+            if os.path.exists('devices.yml'):
+                devices_list.extend(cls.list_ssh_device('devices.yml'))
+        else:
+            if os.path.exists(yml):
+                devices_list.extend(cls.list_ssh_device(yml))
+            else:
+                MaceLogger.error(ModuleName.RUN,
+                                 'no ARM linux device config file found')
+        host = {
+            YAMLKeyword.device_name: SystemType.host,
+            YAMLKeyword.target_abis: [ABIType.host],
+            YAMLKeyword.target_socs: '',
+            YAMLKeyword.system: SystemType.host,
+            YAMLKeyword.address: None,
+
+        }
+        devices_list.append(host)
+        return devices_list
+
+
+if __name__ == '__main__':
+    pass
--- a/tools/image/image_to_tensor.py
+++ b/tools/image/image_to_tensor.py
 import argparse
 import os
 import sys
+
+import six
+
 import tensorflow as tf

 # TODO(liyin): use dataset api and estimator with distributed strategy
@@ -70,7 +73,7 @@ def images_to_tensors(input_files, image_shape, mean_values=None):

 def main(unused_args):
    if not os.path.exists(FLAGS.input):
-        print ("input does not exist: %s" % FLAGS.input)
+        print("input does not exist: %s" % FLAGS.input)
        sys.exit(-1)

    input_files = []

--- a/tools/image/tensor_to_image.py
+++ b/tools/image/tensor_to_image.py
 import argparse
 import os
 import sys
+
+import six
+
 import numpy as np
 import tensorflow as tf

@@ -53,7 +56,7 @@ def tensors_to_images(input_files, image_shape):

 def main(unused_args):
    if not os.path.exists(FLAGS.input):
-        print ("input does not exist: %s" % FLAGS.input)
+        print("input does not exist: %s" % FLAGS.input)
        sys.exit(-1)

    input_files = []

--- a/tools/sh_commands.py
+++ b/tools/sh_commands.py
@@ -20,16 +20,14 @@ import os
 import re
 import sh
 import struct
-import subprocess
 import sys
 import time
-import urllib
 import platform
-from enum import Enum

 import six

 import common
+from common import abi_to_internal

 sys.path.insert(0, "mace/python/tools")
 try:
@@ -89,11 +87,6 @@ class BuildType(object):
    code = 'code'


-class ModelFormat(object):
-    file = 'file'
-    code = 'code'
-
-
 def stdout_success(stdout):
    stdout_lines = stdout.split("\n")
    for line in stdout_lines:
@@ -181,97 +174,14 @@ def adb_get_all_socs():


 def adb_push(src_path, dst_path, serialno):
-    six.print_("Push %s to %s" % (src_path, dst_path))
    sh.adb("-s", serialno, "push", src_path, dst_path)


 def adb_pull(src_path, dst_path, serialno):
-    six.print_("Pull %s to %s" % (src_path, dst_path))
    try:
        sh.adb("-s", serialno, "pull", src_path, dst_path)
    except Exception as e:
-        six.print_("Error msg: %s" % e.stderr)
-
-
-def adb_run(abi,
-            serialno,
-            host_bin_path,
-            bin_name,
-            args="",
-            opencl_profiling=True,
-            vlog_level=0,
-            device_bin_path="/data/local/tmp/mace",
-            out_of_range_check=True,
-            address_sanitizer=False,
-            simpleperf=False):
-    host_bin_full_path = "%s/%s" % (host_bin_path, bin_name)
-    device_bin_full_path = "%s/%s" % (device_bin_path, bin_name)
-    props = adb_getprop_by_serialno(serialno)
-    six.print_(
-        "====================================================================="
-    )
-    six.print_("Trying to lock device %s" % serialno)
-    with device_lock(serialno):
-        six.print_("Run on device: %s, %s, %s" %
-                   (serialno, props["ro.board.platform"],
-                    props["ro.product.model"]))
-        sh.adb("-s", serialno, "shell", "rm -rf %s" % device_bin_path)
-        sh.adb("-s", serialno, "shell", "mkdir -p %s" % device_bin_path)
-        adb_push(host_bin_full_path, device_bin_full_path, serialno)
-        ld_preload = ""
-        if address_sanitizer:
-            adb_push(find_asan_rt_library(abi), device_bin_path, serialno)
-            ld_preload = "LD_PRELOAD=%s/%s" % (device_bin_path,
-                                               asan_rt_library_names(abi)),
-
-        opencl_profiling = 1 if opencl_profiling else 0
-        out_of_range_check = 1 if out_of_range_check else 0
-        six.print_("Run %s" % device_bin_full_path)
-
-        stdout_buff = []
-        process_output = make_output_processor(stdout_buff)
-
-        if simpleperf:
-            adb_push(find_simpleperf_library(abi), device_bin_path, serialno)
-            simpleperf_cmd = "%s/simpleperf" % device_bin_path
-            sh.adb(
-                "-s",
-                serialno,
-                "shell",
-                ld_preload,
-                "MACE_OUT_OF_RANGE_CHECK=%d" % out_of_range_check,
-                "MACE_OPENCL_PROFILING=%d" % opencl_profiling,
-                "MACE_CPP_MIN_VLOG_LEVEL=%d" % vlog_level,
-                simpleperf_cmd,
-                "stat",
-                "--group",
-                "raw-l1-dcache,raw-l1-dcache-refill",
-                "--group",
-                "raw-l2-dcache,raw-l2-dcache-refill",
-                "--group",
-                "raw-l1-dtlb,raw-l1-dtlb-refill",
-                "--group",
-                "raw-l2-dtlb,raw-l2-dtlb-refill",
-                device_bin_full_path,
-                args,
-                _tty_in=True,
-                _out=process_output,
-                _err_to_out=True)
-        else:
-            sh.adb(
-                "-s",
-                serialno,
-                "shell",
-                ld_preload,
-                "MACE_OUT_OF_RANGE_CHECK=%d" % out_of_range_check,
-                "MACE_OPENCL_PROFILING=%d" % opencl_profiling,
-                "MACE_CPP_MIN_VLOG_LEVEL=%d" % vlog_level,
-                device_bin_full_path,
-                args,
-                _tty_in=True,
-                _out=process_output,
-                _err_to_out=True)
-        return "".join(stdout_buff)
+        six.print_("Error msg: %s" % e, file=sys.stderr)


 ################################
@@ -293,7 +203,7 @@ def find_asan_rt_library(abi, asan_rt_path=''):
        if len(candidates) == 0:
            common.MaceLogger.error(
                "Toolchain",
-                "Can't find AddressSanitizer runtime library in % s" %
+                "Can't find AddressSanitizer runtime library in %s" %
                find_path)
        elif len(candidates) > 1:
            common.MaceLogger.info(
@@ -338,6 +248,7 @@ def find_simpleperf_library(abi, simpleperf_path=''):
 ################################
 def bazel_build(target,
                abi="armeabi-v7a",
+                toolchain='android',
                hexagon_mode=False,
                enable_openmp=True,
                enable_neon=True,
@@ -361,8 +272,8 @@ def bazel_build(target,
            "build",
            target,
            "--config",
-            "android",
-            "--cpu=%s" % abi,
+            toolchain,
+            "--cpu=%s" % abi_to_internal(abi),
            "--define",
            "neon=%s" % str(enable_neon).lower(),
            "--define",
@@ -434,15 +345,6 @@ def gen_mace_engine_factory_source(model_tags,
    six.print_("Generate mace engine creator source done!\n")


-def pull_file_from_device(serial_num, file_path, file_name, output_dir):
-    if not os.path.exists(output_dir):
-        sh.mkdir("-p", output_dir)
-    output_path = "%s/%s" % (output_dir, file_path)
-    if os.path.exists(output_path):
-        sh.rm('-rf', output_path)
-    adb_pull(file_path + '/' + file_name, output_dir, serial_num)
-
-
 def merge_opencl_binaries(binaries_dirs,
                          cl_compiled_program_file_name,
                          output_file_path):
@@ -691,233 +593,21 @@ def push_depended_so_libs(libmace_dynamic_library_path,
                          abi, phone_data_dir, serialno):
    dep_so_libs = sh.bash(os.environ["ANDROID_NDK_HOME"] + "/ndk-depends",
                          libmace_dynamic_library_path)
+    src_file = ""
    for dep in split_stdout(dep_so_libs):
        if dep == "libgnustl_shared.so":
-            adb_push(
-                    "%s/sources/cxx-stl/gnu-libstdc++/4.9/libs/%s/libgnustl_shared.so"  # noqa
-                    % (os.environ["ANDROID_NDK_HOME"], abi),
-                    phone_data_dir,
-                    serialno)
+            src_file = "%s/sources/cxx-stl/gnu-libstdc++/4.9/libs/" \
+                "%s/libgnustl_shared.so"\
+                       % (os.environ["ANDROID_NDK_HOME"], abi)
        elif dep == "libc++_shared.so":
-            adb_push(
-                    "%s/sources/cxx-stl/llvm-libc++/libs/%s/libc++_shared.so"  # noqa
-                    % (os.environ["ANDROID_NDK_HOME"], abi),
-                    phone_data_dir,
-                    serialno)
-
-
-def tuning_run(abi,
-               serialno,
-               target_dir,
-               target_name,
-               vlog_level,
-               embed_model_data,
-               model_output_dir,
-               input_nodes,
-               output_nodes,
-               input_shapes,
-               output_shapes,
-               mace_model_dir,
-               model_tag,
-               device_type,
-               running_round,
-               restart_round,
-               limit_opencl_kernel_time,
-               tuning,
-               out_of_range_check,
-               phone_data_dir,
-               model_graph_format,
-               opencl_binary_file,
-               opencl_parameter_file,
-               libmace_dynamic_library_path,
-               omp_num_threads=-1,
-               cpu_affinity_policy=1,
-               gpu_perf_hint=3,
-               gpu_priority_hint=3,
-               input_file_name="model_input",
-               output_file_name="model_out",
-               input_dir="",
-               output_dir="",
-               runtime_failure_ratio=0.0,
-               address_sanitizer=False,
-               link_dynamic=False,
-               quantize_stat=False):
-    six.print_("* Run '%s' with round=%s, restart_round=%s, tuning=%s, "
-               "out_of_range_check=%s, omp_num_threads=%s, "
-               "cpu_affinity_policy=%s, gpu_perf_hint=%s, "
-               "gpu_priority_hint=%s" %
-               (model_tag, running_round, restart_round, str(tuning),
-                str(out_of_range_check), omp_num_threads, cpu_affinity_policy,
-                gpu_perf_hint, gpu_priority_hint))
-    sys.stdout.flush()
-
-    mace_model_path = ""
-    if model_graph_format == ModelFormat.file:
-        mace_model_path = "%s/%s.pb" % (mace_model_dir, model_tag)
-    if abi == "host":
-        libmace_dynamic_lib_path = \
-            os.path.dirname(libmace_dynamic_library_path)
-        cmd = [
-            "env",
-            "LD_LIBRARY_PATH=%s" % libmace_dynamic_lib_path,
-            "MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
-            "MACE_RUNTIME_FAILURE_RATIO=%f" % runtime_failure_ratio,
-        ]
-        if quantize_stat:
-            cmd.append("MACE_LOG_TENSOR_RANGE=1")
-        cmd.extend([
-            "%s/%s" % (target_dir, target_name),
-            "--model_name=%s" % model_tag,
-            "--input_node=%s" % ",".join(input_nodes),
-            "--output_node=%s" % ",".join(output_nodes),
-            "--input_shape=%s" % ":".join(input_shapes),
-            "--output_shape=%s" % ":".join(output_shapes),
-            "--input_file=%s/%s" % (model_output_dir, input_file_name),
-            "--output_file=%s/%s" % (model_output_dir, output_file_name),
-            "--input_dir=%s" % input_dir,
-            "--output_dir=%s" % output_dir,
-            "--model_data_file=%s/%s.data" % (mace_model_dir, model_tag),
-            "--device=%s" % device_type,
-            "--round=%s" % running_round,
-            "--restart_round=%s" % restart_round,
-            "--omp_num_threads=%s" % omp_num_threads,
-            "--cpu_affinity_policy=%s" % cpu_affinity_policy,
-            "--gpu_perf_hint=%s" % gpu_perf_hint,
-            "--gpu_priority_hint=%s" % gpu_priority_hint,
-            "--model_file=%s" % mace_model_path,
-        ])
-        p = subprocess.Popen(
-            cmd,
-            stderr=subprocess.PIPE,
-            stdout=subprocess.PIPE)
-        out, err = p.communicate()
-        stdout = err + out
-        six.print_(stdout)
-        six.print_("Running finished!\n")
-    else:
-        sh.adb("-s", serialno, "shell", "mkdir", "-p", phone_data_dir)
-        internal_storage_dir = create_internal_storage_dir(
-            serialno, phone_data_dir)
-
-        for input_name in input_nodes:
-            formatted_name = common.formatted_file_name(input_file_name,
-                                                        input_name)
-            adb_push("%s/%s" % (model_output_dir, formatted_name),
-                     phone_data_dir, serialno)
-        if address_sanitizer:
-            adb_push(find_asan_rt_library(abi), phone_data_dir, serialno)
-
-        if not embed_model_data:
-            adb_push("%s/%s.data" % (mace_model_dir, model_tag),
-                     phone_data_dir, serialno)
-
-        if device_type == common.DeviceType.GPU:
-            if os.path.exists(opencl_binary_file):
-                adb_push(opencl_binary_file, phone_data_dir, serialno)
-            if os.path.exists(opencl_parameter_file):
-                adb_push(opencl_parameter_file, phone_data_dir, serialno)
-
-        adb_push("third_party/nnlib/libhexagon_controller.so",
-                 phone_data_dir, serialno)
-
-        mace_model_phone_path = ""
-        if model_graph_format == ModelFormat.file:
-            mace_model_phone_path = "%s/%s.pb" % (phone_data_dir, model_tag)
-            adb_push(mace_model_path,
-                     mace_model_phone_path,
-                     serialno)
-
-        if link_dynamic:
-            adb_push(libmace_dynamic_library_path, phone_data_dir,
-                     serialno)
-            push_depended_so_libs(libmace_dynamic_library_path, abi,
-                                  phone_data_dir, serialno)
-
-        adb_push("%s/%s" % (target_dir, target_name), phone_data_dir,
-                 serialno)
-
-        stdout_buff = []
-        process_output = make_output_processor(stdout_buff)
-        adb_cmd = [
-            "LD_LIBRARY_PATH=%s" % phone_data_dir,
-            "MACE_TUNING=%s" % int(tuning),
-            "MACE_OUT_OF_RANGE_CHECK=%s" % int(out_of_range_check),
-            "MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
-            "MACE_RUN_PARAMETER_PATH=%s/mace_run.config" % phone_data_dir,
-            "MACE_INTERNAL_STORAGE_PATH=%s" % internal_storage_dir,
-            "MACE_LIMIT_OPENCL_KERNEL_TIME=%s" % limit_opencl_kernel_time,
-            "MACE_RUNTIME_FAILURE_RATIO=%f" % runtime_failure_ratio,
-        ]
-        if quantize_stat:
-            adb_cmd.append("MACE_LOG_TENSOR_RANGE=1")
-        if address_sanitizer:
-            adb_cmd.extend([
-                "LD_PRELOAD=%s/%s" % (phone_data_dir,
-                                      asan_rt_library_names(abi))
-            ])
-        adb_cmd.extend([
-            "%s/%s" % (phone_data_dir, target_name),
-            "--model_name=%s" % model_tag,
-            "--input_node=%s" % ",".join(input_nodes),
-            "--output_node=%s" % ",".join(output_nodes),
-            "--input_shape=%s" % ":".join(input_shapes),
-            "--output_shape=%s" % ":".join(output_shapes),
-            "--input_file=%s/%s" % (phone_data_dir, input_file_name),
-            "--output_file=%s/%s" % (phone_data_dir, output_file_name),
-            "--input_dir=%s" % input_dir,
-            "--output_dir=%s" % output_dir,
-            "--model_data_file=%s/%s.data" % (phone_data_dir, model_tag),
-            "--device=%s" % device_type,
-            "--round=%s" % running_round,
-            "--restart_round=%s" % restart_round,
-            "--omp_num_threads=%s" % omp_num_threads,
-            "--cpu_affinity_policy=%s" % cpu_affinity_policy,
-            "--gpu_perf_hint=%s" % gpu_perf_hint,
-            "--gpu_priority_hint=%s" % gpu_priority_hint,
-            "--model_file=%s" % mace_model_phone_path,
-            "--opencl_binary_file=%s/%s" %
-            (phone_data_dir, os.path.basename(opencl_binary_file)),
-            "--opencl_parameter_file=%s/%s" %
-            (phone_data_dir, os.path.basename(opencl_parameter_file)),
-        ])
-        adb_cmd = ' '.join(adb_cmd)
-        cmd_file_name = "%s-%s-%s" % ('cmd_file', model_tag, str(time.time()))
-        adb_cmd_file = "%s/%s" % (phone_data_dir, cmd_file_name)
-        tmp_cmd_file = "%s/%s" % ('/tmp', cmd_file_name)
-        with open(tmp_cmd_file, 'w') as cmd_file:
-            cmd_file.write(adb_cmd)
-        adb_push(tmp_cmd_file, adb_cmd_file, serialno)
-        os.remove(tmp_cmd_file)
-
-        sh.adb(
-            "-s",
-            serialno,
-            "shell",
-            "sh",
-            adb_cmd_file,
-            _tty_in=True,
-            _out=process_output,
-            _err_to_out=True)
-        stdout = "".join(stdout_buff)
-        if not stdout_success(stdout):
-            common.MaceLogger.error("Mace Run", "Mace run failed.")
-
-        sh.adb(
-            "-s",
-            serialno,
-            "shell",
-            "rm",
-            adb_cmd_file,
-            _fg=True)
-
-        six.print_("Running finished!\n")
-
-    sys.stdout.flush()
-    return stdout
+            src_file = "%s/sources/cxx-stl/llvm-libc++/libs/" \
+                 "%s/libc++_shared.so" % (os.environ["ANDROID_NDK_HOME"], abi)
+    print("push %s to %s" % (src_file, phone_data_dir))
+    adb_push(src_file, phone_data_dir, serialno)


 def validate_model(abi,
-                   serialno,
+                   device,
                   model_file_path,
                   weight_file_path,
                   platform,
@@ -927,7 +617,6 @@ def validate_model(abi,
                   input_shapes,
                   output_shapes,
                   model_output_dir,
-                   phone_data_dir,
                   input_data_types,
                   caffe_env,
                   input_file_name="model_input",
@@ -941,8 +630,7 @@ def validate_model(abi,
            if os.path.exists("%s/%s" % (model_output_dir,
                                         formatted_name)):
                sh.rm("-rf", "%s/%s" % (model_output_dir, formatted_name))
-            adb_pull("%s/%s" % (phone_data_dir, formatted_name),
-                     model_output_dir, serialno)
+            device.pull_from_data_dir(formatted_name, model_output_dir)

    if platform == "tensorflow":
        validate(platform, model_file_path, "",
@@ -956,11 +644,10 @@ def validate_model(abi,
        container_name = "mace_caffe_validator"

        if caffe_env == common.CaffeEnvType.LOCAL:
-            import imp
            try:
-                imp.find_module('caffe')
+                import caffe
            except ImportError:
-                logger.error('There is no caffe python module.')
+                logging.error('There is no caffe python module.')
            validate(platform, model_file_path, weight_file_path,
                     "%s/%s" % (model_output_dir, input_file_name),
                     "%s/%s" % (model_output_dir, output_file_name),
@@ -1075,149 +762,6 @@ def packaging_lib(libmace_output_dir, project_name):
 ################################
 # benchmark
 ################################
-def benchmark_model(abi,
-                    serialno,
-                    benchmark_binary_dir,
-                    benchmark_binary_name,
-                    vlog_level,
-                    embed_model_data,
-                    model_output_dir,
-                    mace_model_dir,
-                    input_nodes,
-                    output_nodes,
-                    input_shapes,
-                    output_shapes,
-                    model_tag,
-                    device_type,
-                    phone_data_dir,
-                    model_graph_format,
-                    opencl_binary_file,
-                    opencl_parameter_file,
-                    libmace_dynamic_library_path,
-                    omp_num_threads=-1,
-                    cpu_affinity_policy=1,
-                    gpu_perf_hint=3,
-                    gpu_priority_hint=3,
-                    input_file_name="model_input",
-                    link_dynamic=False):
-    six.print_("* Benchmark for %s" % model_tag)
-
-    mace_model_path = ""
-    if model_graph_format == ModelFormat.file:
-        mace_model_path = "%s/%s.pb" % (mace_model_dir, model_tag)
-    if abi == "host":
-        libmace_dynamic_lib_dir_path = \
-            os.path.dirname(libmace_dynamic_library_path)
-        p = subprocess.Popen(
-            [
-                "env",
-                "LD_LIBRARY_PATH=%s" % libmace_dynamic_lib_dir_path,
-                "MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
-                "%s/%s" % (benchmark_binary_dir, benchmark_binary_name),
-                "--model_name=%s" % model_tag,
-                "--input_node=%s" % ",".join(input_nodes),
-                "--output_node=%s" % ",".join(output_nodes),
-                "--input_shape=%s" % ":".join(input_shapes),
-                "--output_shape=%s" % ":".join(output_shapes),
-                "--input_file=%s/%s" % (model_output_dir, input_file_name),
-                "--model_data_file=%s/%s.data" % (mace_model_dir, model_tag),
-                "--device=%s" % device_type,
-                "--omp_num_threads=%s" % omp_num_threads,
-                "--cpu_affinity_policy=%s" % cpu_affinity_policy,
-                "--gpu_perf_hint=%s" % gpu_perf_hint,
-                "--gpu_priority_hint=%s" % gpu_priority_hint,
-                "--model_file=%s" % mace_model_path,
-            ])
-        p.wait()
-    else:
-        sh.adb("-s", serialno, "shell", "mkdir", "-p", phone_data_dir)
-        internal_storage_dir = create_internal_storage_dir(
-            serialno, phone_data_dir)
-
-        for input_name in input_nodes:
-            formatted_name = common.formatted_file_name(input_file_name,
-                                                        input_name)
-            adb_push("%s/%s" % (model_output_dir, formatted_name),
-                     phone_data_dir, serialno)
-        if not embed_model_data:
-            adb_push("%s/%s.data" % (mace_model_dir, model_tag),
-                     phone_data_dir, serialno)
-        if device_type == common.DeviceType.GPU:
-            if os.path.exists(opencl_binary_file):
-                adb_push(opencl_binary_file, phone_data_dir, serialno)
-            if os.path.exists(opencl_parameter_file):
-                adb_push(opencl_parameter_file, phone_data_dir, serialno)
-        mace_model_phone_path = ""
-        if model_graph_format == ModelFormat.file:
-            mace_model_phone_path = "%s/%s.pb" % (phone_data_dir, model_tag)
-            adb_push(mace_model_path,
-                     mace_model_phone_path,
-                     serialno)
-
-        if link_dynamic:
-            adb_push(libmace_dynamic_library_path, phone_data_dir,
-                     serialno)
-            push_depended_so_lib(libmace_dynamic_library_path, abi,
-                                 phone_data_dir, serialno)
-
-        adb_push("%s/%s" % (benchmark_binary_dir, benchmark_binary_name),
-                 phone_data_dir,
-                 serialno)
-
-        adb_cmd = [
-            "LD_LIBRARY_PATH=%s" % phone_data_dir,
-            "MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
-            "MACE_RUN_PARAMETER_PATH=%s/mace_run.config" %
-            phone_data_dir,
-            "MACE_INTERNAL_STORAGE_PATH=%s" % internal_storage_dir,
-            "MACE_OPENCL_PROFILING=1",
-            "%s/%s" % (phone_data_dir, benchmark_binary_name),
-            "--model_name=%s" % model_tag,
-            "--input_node=%s" % ",".join(input_nodes),
-            "--output_node=%s" % ",".join(output_nodes),
-            "--input_shape=%s" % ":".join(input_shapes),
-            "--output_shape=%s" % ":".join(output_shapes),
-            "--input_file=%s/%s" % (phone_data_dir, input_file_name),
-            "--model_data_file=%s/%s.data" % (phone_data_dir, model_tag),
-            "--device=%s" % device_type,
-            "--omp_num_threads=%s" % omp_num_threads,
-            "--cpu_affinity_policy=%s" % cpu_affinity_policy,
-            "--gpu_perf_hint=%s" % gpu_perf_hint,
-            "--gpu_priority_hint=%s" % gpu_priority_hint,
-            "--model_file=%s" % mace_model_phone_path,
-            "--opencl_binary_file=%s/%s" %
-            (phone_data_dir, os.path.basename(opencl_binary_file)),
-            "--opencl_parameter_file=%s/%s" %
-            (phone_data_dir, os.path.basename(opencl_parameter_file)),
-        ]
-        adb_cmd = ' '.join(adb_cmd)
-        cmd_file_name = "%s-%s-%s" % ('cmd_file', model_tag, str(time.time()))
-        adb_cmd_file = "%s/%s" % (phone_data_dir, cmd_file_name)
-        tmp_cmd_file = "%s/%s" % ('/tmp', cmd_file_name)
-        with open(tmp_cmd_file, 'w') as cmd_file:
-            cmd_file.write(adb_cmd)
-        adb_push(tmp_cmd_file, adb_cmd_file, serialno)
-        os.remove(tmp_cmd_file)
-
-        sh.adb(
-            "-s",
-            serialno,
-            "shell",
-            "sh",
-            adb_cmd_file,
-            _fg=True)
-
-        sh.adb(
-            "-s",
-            serialno,
-            "shell",
-            "rm",
-            adb_cmd_file,
-            _fg=True)
-
-    six.print_("Benchmark done!\n")
-
-
 def build_run_throughput_test(abi,
                              serialno,
                              vlog_level,