Merge branch 'device_support' into 'master'

Device support See merge request !898

Merge branch 'device_support' into 'master'
Device support See merge request !898
3cdf9973 · 叶剑武 · 66cf184f · 12c4dace · 3cdf9973 · 3cdf9973
28 changed file
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -47,8 +47,13 @@ ops_test:
  stage: ops_test
  script:
    - if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi
-    - python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS
+    - >
-    - python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS --enable_neon=false
+      if ping -c 1 v9.git.n.xiaomi.com 1>/dev/null 2>&1; then
+        GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git
+        DEVICE_CONF_FILE=generic-mobile-devices/devices.yml
+      fi
+    - python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64,armhf --target_socs=$TARGET_SOCS
+    - python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64,armhf --target_socs=$TARGET_SOCS --enable_neon=false
 api_test:
  stage: api_test
@@ -68,14 +73,19 @@ extra_tests:
  stage: extra_tests
  script:
    - if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi
-    - python tools/bazel_adb_run.py --target="//mace/utils:tuner_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS
+    - >
+      if ping -c 1 v9.git.n.xiaomi.com 1>/dev/null 2>&1; then
+        GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git
+        DEVICE_CONF_FILE=generic-mobile-devices/devices.yml
+      fi
+    - python tools/bazel_adb_run.py --target="//mace/utils:tuner_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64 --target_socs=$TARGET_SOCS
 platform_compatible_tests:
  stage: platform_compatible_tests
  script:
    - bazel build mace/core:core --define openmp=true
-    - bazel build --config arm_linux --define openmp=true --define opencl=true --define neon=true //mace/libmace:libmace.so
+    - bazel build --config arm_linux_gnueabihf --define openmp=true --define opencl=true --define neon=true //mace/libmace:libmace.so
-    - bazel build --config aarch64_linux --define openmp=true --define opencl=true --define neon=true //mace/libmace:libmace.so
+    - bazel build --config aarch64_linux_gnu --define openmp=true --define opencl=true --define neon=true //mace/libmace:libmace.so
 build_libraries:
  stage: build_libraries
@@ -87,6 +97,11 @@ ndk_versions_compatible_tests:
  script:
    - DEFAULT_NDK_PATH=$ANDROID_NDK_HOME
    - prefix_path=${DEFAULT_NDK_PATH%android-ndk-*}
+    - >
+      if ping -c 1 v9.git.n.xiaomi.com 1>/dev/null 2>&1; then
+        GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git
+        DEVICE_CONF_FILE=generic-mobile-devices/devices.yml
+      fi
    - >
      for ndk in android-ndk-r12b android-ndk-r15c android-ndk-r16 android-ndk-r17b;
      do
@@ -96,8 +111,8 @@ ndk_versions_compatible_tests:
        export PATH=$ANDROID_NDK_HOME:$PATH;
        echo "ndk path: $ANDROID_NDK_HOME";
        if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi
-        python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS --args="--gtest_filter=ActivationOpTest*";
+        python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64 --target_socs=$TARGET_SOCS --args="--gtest_filter=ActivationOpTest*";
-        python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS --enable_neon=false --args="--gtest_filter=ActivationOpTest*";
+        python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64 --target_socs=$TARGET_SOCS --enable_neon=false --args="--gtest_filter=ActivationOpTest*";
      fi
      done
    - export ANDROID_NDK_HOME=$DEFAULT_NDK_PATH
@@ -111,16 +126,27 @@ python_tools_tests:
    - GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@github.com:XiaoMi/mace-models.git
    - CONF_FILE=mace-models/mobilenet-v2/mobilenet-v2.yml
    - >
-      python tools/converter.py convert --config=${CONF_FILE} --target_abis=armeabi-v7a --model_graph_format=file --model_data_format=file || exit 1;
+      if ping -c 1 v9.git.n.xiaomi.com 1>/dev/null 2>&1; then
-      python tools/converter.py run --config=${CONF_FILE} --round=1 --target_abis=armeabi-v7a --validate --model_graph_format=file --model_data_format=file || exit 1;
+        GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git
-      python tools/converter.py run --config=${CONF_FILE} --example --target_abis=armeabi-v7a --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
+        DEVICE_CONF_FILE=generic-mobile-devices/devices.yml
+      fi
+    - >
+      python tools/converter.py convert --config=${CONF_FILE} --target_abis=armeabi-v7a,arm64 --model_graph_format=file --model_data_format=file || exit 1;
+      python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --round=1 --target_abis=armeabi-v7a,arm64 --validate --model_graph_format=file --model_data_format=file || exit 1;
+      python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --example --target_abis=armeabi-v7a,arm64 --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
 model_tests:
  stage: model_tests
  script:
    - pwd
    - rm -rf mace-models
+    - rm -rf generic-mobile-devices
    - GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@github.com:XiaoMi/mace-models.git
+    - >
+      if ping -c 1 v9.git.n.xiaomi.com 1>/dev/null 2>&1; then
+        GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git
+        DEVICE_CONF_FILE=generic-mobile-devices/devices.yml
+      fi
    - >
      for CONF_FILE in mace-models/mobilenet-v1/mobilenet-v1.yml mace-models/mobilenet-v1/mobilenet-v1-quantize-retrain.yml;
      do
@@ -131,8 +157,8 @@ model_tests:
    - CONF_FILE=mace-models/mobilenet-v2/mobilenet-v2-host.yml
    - >
      python tools/converter.py convert --config=${CONF_FILE} --model_graph_format=file --model_data_format=file || exit 1;
-      python tools/converter.py run --config=${CONF_FILE} --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
+      python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
-      python tools/converter.py run --config=${CONF_FILE} --example --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
+      python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --example --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
    - rm -rf mace-models
 build_android_demo:

--- a/docs/installation/env_requirement.rst
+++ b/docs/installation/env_requirement.rst
@@ -35,7 +35,7 @@ Required dependencies
      - Required by model validation
    * - six
      - pip install -I six==1.11.0
-      - Required for Python 2 and 3 compatibility (TODO)
+      - Required for Python 2 and 3 compatibility
 Optional dependencies
 ---------------------

--- a/docs/user_guide/advanced_usage.rst
+++ b/docs/user_guide/advanced_usage.rst
@@ -109,13 +109,66 @@ in one deployment file.
        sha256sum /path/to/your/file
 Advanced usage
 --------------
-There are two common advanced use cases:
+There are three common advanced use cases:
+  - run your model on the embedded device(ARM LINUX)
  - converting model to C++ code.
  - tuning GPU kernels for a specific SoC.
+Run you model on the embedded device(ARM Linux)
+-----------------------------------------------
+The way to run your model on the ARM Linux is nearly same as with android, except you need specify a device config file.
+.. code:: bash
+    python tools/converter.py run --config=/path/to/your/model_deployment_file.yml --device_yml=/path/to/devices.yml
+There are two steps to do before run:
+1. configure login without password
+    MACE use ssh to connect embedded device, you should copy your public key to embedded device with the blow command.
+    .. code:: bash
+      cat ~/.ssh/id_rsa.pub | ssh -q {user}@{ip} "cat >> ~/.ssh/authorized_keys"
+2. write your own device yaml configuration file.
+    * **Example**
+        Here is an device yaml config demo.
+        .. literalinclude:: devices/demo_device_nanopi.yml
+            :language: yaml
+    * **Configuration**
+        The detailed explanation is listed in the blow table.
+        .. list-table::
+            :header-rows: 1
+            * - Options
+              - Usage
+            * - target_abis
+              - Device supported abis, you can get it via ``dpkg --print-architecture`` and
+                ``dpkg --print-foreign-architectures`` command, if more than one abi is supported,
+                separate them by commas.
+            * - target_socs
+              - device soc, you can get it from device manual, we haven't found a way to get it in shell.
+            * - models
+              - device models full name, you can get via get ``lshw`` command (third party package, install it via your package manager).
+                see it's product value.
+            * - address
+              - Since we use ssh to connect device, ip address is required.
+            * - username
+              - login username, required.
 Convert model(s) to C++ code
 --------------------------------
@@ -403,6 +456,7 @@ Reduce Library Size
        - It is recommended to use ``version script`` and ``strip`` feature when linking mace static library. The effect is remarkable.
 * Remove the unused ops.
 Remove the registration of the ops unused for your models in the ``mace/ops/ops_register.cc``,
 which will reduce the library size significantly. the final binary just link the registered ops' code.

--- a/docs/user_guide/basic_usage.rst
+++ b/docs/user_guide/basic_usage.rst
@@ -68,7 +68,8 @@ Here we use the mobilenet-v2 model as an example.
    .. note::
-        If you want to run on device/phone, please plug in at least one device/phone.
+        If you want to run on phone, please plug in at least one phone.
+        Or if you want to run on embedded device, please give a :doc:`advanced_usage`.
    .. code:: sh
@@ -245,10 +246,14 @@ to run and validate your model.
    	# Test model run time
        python tools/converter.py run --config=/path/to/your/model_deployment_file.yml --round=100
-    	# Validate the correctness by comparing the results against the
+        # Validate the correctness by comparing the results against the
    	# original model and framework, measured with cosine distance for similarity.
    	python tools/converter.py run --config=/path/to/your/model_deployment_file.yml --validate
+        # If you want to run model on specified arm linux device, you should put device config file in the working directory or run with flag `--device_yml`
+        python tools/converter.py run --config=/path/to/your/model_deployment_file.yml --device_yml=/path/to/devices.yml
 * **benchmark**
    benchmark and profile the model.

--- a/docs/user_guide/devices/demo_device_nanopi.yml
+++ b/docs/user_guide/devices/demo_device_nanopi.yml
@@ -12,12 +12,9 @@ devices:
    address: 10.0.0.0
  # login username
    username: user
-  # login password, is required when you can login into device without password
-    password: 1234567
  raspberry:
    target_abis: [armv7l]
    target_socs: BCM2837
    models: Raspberry Pi 3 Model B Plus Rev 1.3
    address: 10.0.0.1
    username: user
-    password: 123456
--- a/mace/BUILD
+++ b/mace/BUILD
@@ -24,6 +24,24 @@ config_setting(
    visibility = ["//visibility:public"],
 )
+config_setting(
+    name = "arm_linux_aarch64",
+    values = {
+        "crosstool_top": "//tools/aarch64_compiler:toolchain",
+        "cpu": "aarch64",
+    },
+    visibility = ["//visibility:public"],
+)
+config_setting(
+    name = "arm_linux_armhf",
+    values = {
+        "crosstool_top": "//tools/arm_compiler:toolchain",
+        "cpu": "armeabi-v7a",
+    },
+    visibility = ["//visibility:public"],
+)
 config_setting(
    name = "neon_enabled",
    define_values = {

--- a/mace/core/runtime/cpu/cpu_runtime.cc
+++ b/mace/core/runtime/cpu/cpu_runtime.cc
@@ -42,7 +42,7 @@ struct CPUFreq {
 };
 namespace {
-#if defined(__ANDROID__)
 int GetCPUCount() {
  int cpu_count = 0;
  std::string cpu_sys_conf = "/proc/cpuinfo";
@@ -69,10 +69,8 @@ int GetCPUCount() {
  VLOG(2) << "CPU cores: " << cpu_count;
  return cpu_count;
 }
-#endif
 int GetCPUMaxFreq(std::vector<float> *max_freqs) {
-#if defined(__ANDROID__)
  int cpu_count = GetCPUCount();
  for (int cpu_id = 0; cpu_id < cpu_count; ++cpu_id) {
    std::string cpuinfo_max_freq_sys_conf = MakeString(
@@ -94,34 +92,6 @@ int GetCPUMaxFreq(std::vector<float> *max_freqs) {
    }
    f.close();
  }
-#else
-  std::string cpu_sys_conf = "/proc/cpuinfo";
-  std::ifstream f(cpu_sys_conf);
-  if (!f.is_open()) {
-    LOG(ERROR) << "failed to open " << cpu_sys_conf;
-    return -1;
-  }
-  std::string line;
-  const std::string freq_key = "cpu MHz";
-  while (std::getline(f, line)) {
-    if (line.size() >= freq_key.size()
-        && line.compare(0, freq_key.size(), freq_key) == 0) {
-      size_t pos = line.find(":");
-      if (pos != std::string::npos) {
-        std::string freq_str = line.substr(pos + 1);
-        float freq = atof(freq_str.c_str());
-        max_freqs->push_back(freq);
-      }
-    }
-  }
-  if (f.bad()) {
-    LOG(ERROR) << "failed to read " << cpu_sys_conf;
-  }
-  if (!f.eof()) {
-    LOG(ERROR) << "failed to read end of " << cpu_sys_conf;
-  }
-  f.close();
-#endif
  for (float freq : *max_freqs) {
    VLOG(2) << "CPU freq: " << freq;

--- a/mace/examples/cli/BUILD
+++ b/mace/examples/cli/BUILD
 # Examples
 load(
    "//mace:mace.bzl",
-    "if_openmp_enabled",
    "if_android",
    "if_hexagon_enabled",
    "if_opencl_enabled",
+    "if_openmp_enabled",
 )
 cc_binary(
@@ -18,8 +18,9 @@ cc_binary(
    ]),
    linkopts = [
        "-lm",
+        "-ldl",
    ] + if_openmp_enabled([
-        "-fopenmp"
+        "-fopenmp",
    ]) + if_android([
        "-ldl",
        "-pie",
@@ -47,6 +48,7 @@ cc_binary(
    ]),
    linkopts = [
        "-lm",
+        "-ldl",
    ] + if_android([
        "-ldl",
        "-pie",
@@ -55,8 +57,7 @@ cc_binary(
    linkstatic = 0,
    deps = [
        "//external:gflags_nothreads",
-        "//mace/codegen:generated_mace_engine_factory",
        "//mace/codegen:generated_libmace",
+        "//mace/codegen:generated_mace_engine_factory",
    ],
 )
--- a/mace/mace.bzl
+++ b/mace/mace.bzl
@@ -24,6 +24,18 @@ def if_android_arm64(a):
      "//conditions:default": [],
  })
+def if_arm_linux_aarch64(a):
+  return select({
+      "//mace:arm_linux_aarch64": a,
+      "//conditions:default": [],
+  })
+def if_arm_linux_armhf(a):
+  return select({
+      "//mace:arm_linux_armhf": a,
+      "//conditions:default": []
+  })
 def if_neon_enabled(a):
  return select({
      "//mace:neon_enabled": a,
@@ -81,4 +93,3 @@ def encrypt_opencl_kernel_genrule():
      outs = ["opencl/encrypt_opencl_kernel.cc"],
      cmd = "cat $(SRCS) > $@;"
  )
--- a/mace/ops/depthwise_conv2d_test.cc
+++ b/mace/ops/depthwise_conv2d_test.cc
@@ -233,7 +233,7 @@ void TestNxNS12(const index_t height, const index_t width) {
  auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w,
                  Padding type) {
    // generate random input
-    static unsigned int seed = time(NULL);
+    // static unsigned int seed = time(NULL);
    index_t batch = 1;
    index_t channel = 32;
    index_t multiplier = 1;

--- a/mace/ops/local_response_norm.cc
+++ b/mace/ops/local_response_norm.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 #include <algorithm>
+#include <cmath>
 #include "mace/core/operator.h"

--- a/mace/ops/resize_bicubic.cc
+++ b/mace/ops/resize_bicubic.cc
@@ -15,6 +15,7 @@
 #include "mace/ops/resize_bicubic.h"
 #include <algorithm>
+#include <cmath>
 #include <memory>
 #include <vector>

--- a/mace/ops/scalar_math.cc
+++ b/mace/ops/scalar_math.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 #include <algorithm>
+#include <cmath>
 #include <vector>
 #include "mace/core/operator.h"

--- a/mace/ops/softmax.cc
+++ b/mace/ops/softmax.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 #include <algorithm>
+#include <cmath>
 #include <limits>
 #include <memory>
 #include <vector>
@@ -106,7 +107,7 @@ class SoftmaxOp<DeviceType::CPU, float> : public Operation {
        float sum = 0;
        for (index_t c = 0; c < class_count; ++c) {
-          float exp_value = ::exp(input_ptr[c] - max_val);
+          float exp_value = std::exp(input_ptr[c] - max_val);
          sum += exp_value;
          output_ptr[c] = exp_value;
        }

--- a/mace/ops/strided_slice.cc
+++ b/mace/ops/strided_slice.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 #include <algorithm>
+#include <cmath>
 #include <vector>
 #include "mace/core/operator.h"

--- a/mace/ops/transpose.cc
+++ b/mace/ops/transpose.cc
@@ -16,8 +16,9 @@
 #include <arm_neon.h>
 #endif
-#include <vector>
 #include <algorithm>
+#include <cmath>
+#include <vector>
 #include "mace/core/operator.h"
 #include "mace/ops/transpose.h"

--- a/mace/python/tools/converter_tool/tensorflow_converter.py
+++ b/mace/python/tools/converter_tool/tensorflow_converter.py
@@ -112,6 +112,8 @@ TFSupportedOps = [
 TFOpType = Enum('TFOpType', [(op, op) for op in TFSupportedOps], type=str)
+TFSupportedOps = [six.b(op) for op in TFSupportedOps]
 class TensorflowConverter(base_converter.ConverterInterface):
    """A class for convert tensorflow frozen model to mace model.

--- a/mace/python/tools/model_saver.py
+++ b/mace/python/tools/model_saver.py
@@ -14,6 +14,7 @@
 import datetime
 import os
+import six
 import uuid
 import numpy as np
 import hashlib
@@ -34,8 +35,8 @@ class ModelFormat(object):
 def generate_obfuscated_name(namespace, name):
    md5 = hashlib.md5()
-    md5.update(namespace)
+    md5.update(six.b(namespace))
-    md5.update(name)
+    md5.update(six.b(name))
    md5_digest = md5.hexdigest()
    name = md5_digest[:8]

--- a/mace/utils/quantize.h
+++ b/mace/utils/quantize.h
@@ -15,8 +15,9 @@
 #ifndef MACE_UTILS_QUANTIZE_H_
 #define MACE_UTILS_QUANTIZE_H_
-#include <limits>
 #include <algorithm>
+#include <cmath>
+#include <limits>
 namespace mace {

--- a/tools/bazel.rc
+++ b/tools/bazel.rc
 # Partially borrowed from tensorflow tools/bazel.rc
 # By default, we don't distinct target and host platfroms.
-# When doing cross compilation, use --config=cross_compile to distinct them.
 build --distinct_host_configuration=false
-build:cross_compile --distinct_host_configuration=true
 build --verbose_failures
 build --copt=-std=c++11
@@ -17,34 +15,33 @@ build --copt=-DMACE_USE_NNLIB_CAF
 build:symbol_hidden --copt=-fvisibility=hidden
 # Usage example: bazel build --config android
-build:android --config=cross_compile
+build:android --distinct_host_configuration=true
 build:android --crosstool_top=//external:android/crosstool
 build:android --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
-# Usage example: bazel build --config arm_linux
+# Usage example: bazel build --config arm_linux_gnueabihf
-build:arm_linux --config=cross_compile
+build:arm_linux_gnueabihf --distinct_host_configuration=true
-build:arm_linux --crosstool_top=//tools/arm_compiler:toolchain
+build:arm_linux_gnueabihf --crosstool_top=//tools/arm_compiler:toolchain
-build:arm_linux --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
+build:arm_linux_gnueabihf --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
-build:arm_linux --cpu=armeabi-v7a
+build:arm_linux_gnueabihf --cpu=armeabi-v7a
-build:arm_linux --copt -mfloat-abi=hard
+build:arm_linux_gnueabihf --copt -mfloat-abi=hard
-build:arm_linux --copt -mfpu=neon 
+build:arm_linux_gnueabihf --copt -mfpu=neon
-build:arm_linux --copt -Wno-ignored-attributes
+build:arm_linux_gnueabihf --copt -Wno-ignored-attributes
-build:arm_linux --copt -Wno-unused-function
+build:arm_linux_gnueabihf --copt -Wno-unused-function
-build:arm_linux --copt -Wno-sequence-point
+build:arm_linux_gnueabihf --copt -Wno-sequence-point
-build:arm_linux --copt -Wno-implicit-fallthrough
+build:arm_linux_gnueabihf --copt -Wno-implicit-fallthrough
-build:arm_linux --copt -Wno-psabi
-# Usage example: bazel build --config aarch64_linux
+# Usage example: bazel build --config aarch64_linux_gnu
-build:aarch64_linux --config=cross_compile
+build:aarch64_linux_gnu --distinct_host_configuration=true
-build:aarch64_linux --crosstool_top=//tools/aarch64_compiler:toolchain
+build:aarch64_linux_gnu --crosstool_top=//tools/aarch64_compiler:toolchain
-build:aarch64_linux --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
+build:aarch64_linux_gnu --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
-build:aarch64_linux --cpu=aarch64
+build:aarch64_linux_gnu --cpu=aarch64
-build:aarch64_linux --copt -Wno-ignored-attributes
+build:aarch64_linux_gnu --copt -Wno-ignored-attributes
-build:aarch64_linux --copt -Wno-unused-function
+build:aarch64_linux_gnu --copt -Wno-unused-function
-build:aarch64_linux --copt -Wno-sequence-point
+build:aarch64_linux_gnu --copt -Wno-sequence-point
-build:aarch64_linux --copt -Wno-implicit-fallthrough
+build:aarch64_linux_gnu --copt -Wno-implicit-fallthrough
-# Usage example: bazel build --config optimization 
+# Usage example: bazel build --config optimization
 build:optimization -c opt
 build:optimization --copt=-O3
 build:optimization --linkopt=-Wl,--strip-all

--- a/tools/bazel_adb_run.py
+++ b/tools/bazel_adb_run.py
@@ -26,9 +26,9 @@ import sys
 import sh_commands
+from common import *
-def stdout_processor(stdout, device_properties, abi):
+from device import DeviceWrapper, DeviceManager
-    pass
 def unittest_stdout_processor(stdout, device_properties, abi):
@@ -39,7 +39,7 @@ def unittest_stdout_processor(stdout, device_properties, abi):
            raise Exception("Command failed")
-def ops_benchmark_stdout_processor(stdout, device_properties, abi):
+def ops_benchmark_stdout_processor(stdout, dev, abi):
    stdout_lines = stdout.split("\n")
    metrics = {}
    for line in stdout_lines:
@@ -52,13 +52,13 @@ def ops_benchmark_stdout_processor(stdout, device_properties, abi):
            metrics["%s.input_mb_per_sec" % parts[0]] = parts[3]
            metrics["%s.gmacc_per_sec" % parts[0]] = parts[4]
-    platform = device_properties["ro.board.platform"].replace(" ", "-")
+    # platform = dev[YAMLKeyword.target_socs]
-    model = device_properties["ro.product.model"].replace(" ", "-")
+    # model = dev[YAMLKeyword.device_name]
-    tags = {
+    # tags = {
-        "ro.board.platform": platform,
+    #     "ro.board.platform": platform,
-        "ro.product.model": model,
+    #     "ro.product.model": model,
-        "abi": abi
+    #     "abi": abi
-    }
+    # }
    # sh_commands.falcon_push_metrics(server,
    #    metrics, tags=tags, endpoint="mace_ops_benchmark")
@@ -87,7 +87,7 @@ def parse_args():
        type=str,
        default="all",
        help="SoCs (ro.board.platform from getprop) to build, "
-        "comma seperated list or all/random")
+             "comma seperated list or all/random")
    parser.add_argument(
        "--target", type=str, default="//...", help="Bazel target to build")
    parser.add_argument(
@@ -99,7 +99,7 @@ def parse_args():
    parser.add_argument(
        "--stdout_processor",
        type=str,
-        default="stdout_processor",
+        default="unittest_stdout_processor",
        help="Stdout processing function, default: stdout_processor")
    parser.add_argument(
        "--enable_neon",
@@ -115,14 +115,22 @@ def parse_args():
        type=str2bool,
        default=False,
        help="Whether to use simpleperf stat")
+    parser.add_argument(
+        '--device_yml',
+        type=str,
+        default='',
+        help='embedded linux device config yml file'
+    )
    return parser.parse_known_args()
 def main(unused_args):
    target_socs = None
+    target_devices = DeviceManager.list_devices(FLAGS.device_yml)
    if FLAGS.target_socs != "all" and FLAGS.target_socs != "random":
        target_socs = set(FLAGS.target_socs.split(','))
-    target_devices = sh_commands.get_target_socs_serialnos(target_socs)
+        target_devices = [dev for dev in target_devices
+                          if dev[YAMLKeyword.target_socs] in target_socs]
    if FLAGS.target_socs == "random":
        unlocked_devices = \
            [d for d in target_devices if not sh_commands.is_device_locked(d)]
@@ -136,31 +144,29 @@ def main(unused_args):
    target_abis = FLAGS.target_abis.split(',')
    for target_abi in target_abis:
+        toolchain = infer_toolchain(target_abi)
        sh_commands.bazel_build(target, abi=target_abi,
+                                toolchain=toolchain,
                                enable_neon=FLAGS.enable_neon,
                                address_sanitizer=FLAGS.address_sanitizer)
        if FLAGS.run_target:
-            for serialno in target_devices:
+            for dev in target_devices:
-                if target_abi not in set(
+                if target_abi not in dev[YAMLKeyword.target_abis]:
-                        sh_commands.adb_supported_abis(serialno)):
                    print("Skip device %s which does not support ABI %s" %
-                          (serialno, target_abi))
+                          (dev, target_abi))
                    continue
-                stdouts = sh_commands.adb_run(
+                device_wrapper = DeviceWrapper(dev)
+                stdouts = device_wrapper.run(
                    target_abi,
-                    serialno,
                    host_bin_path,
                    bin_name,
                    args=FLAGS.args,
                    opencl_profiling=True,
                    vlog_level=0,
-                    device_bin_path="/data/local/tmp/mace",
                    out_of_range_check=True,
                    address_sanitizer=FLAGS.address_sanitizer,
                    simpleperf=FLAGS.simpleperf)
-                device_properties = sh_commands.adb_getprop_by_serialno(
+                globals()[FLAGS.stdout_processor](stdouts, dev,
-                    serialno)
-                globals()[FLAGS.stdout_processor](stdouts, device_properties,
                                                  target_abi)

--- a/tools/build-standalone-lib.sh
+++ b/tools/build-standalone-lib.sh
@@ -22,6 +22,14 @@ mkdir -p $LIB_DIR/arm64-v8a/cpu_gpu
 rm -rf $LIB_DIR/linux-x86-64
 mkdir -p $LIB_DIR/linux-x86-64
+rm -rf $LIB_DIR/arm_linux_gnueabihf
+mkdir -p $LIB_DIR/arm_linux_gnueabihf/cpu_gpu
+rm -rf $LIB_DIR/aarch64_linux_gnu
+mkdir -p $LIB_DIR/aarch64_linux_gnu/cpu_gpu
 # build shared libraries
 echo "build shared lib for armeabi-v7a + cpu_gpu_dsp"
 bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --define hexagon=true --define quantize=true --cpu=armeabi-v7a
@@ -36,6 +44,14 @@ echo "build shared lib for arm64-v8a + cpu_gpu"
 bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --define quantize=true --cpu=arm64-v8a
 cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/arm64-v8a/cpu_gpu/
+echo "build shared lib for arm_linux_gnueabihf + cpu_gpu"
+bazel build --config arm_linux_gnueabihf --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --define quantize=true
+cp bazel-bin/mace/libmace/libmace.so  $LIB_DIR/arm_linux_gnueabihf/cpu_gpu/
+echo "build shared lib for aarch64_linux_gnu + cpu_gpu"
+bazel build --config aarch64_linux_gnu  --config optimization mace/libmace:libmace_dynamic  --define neon=true --define openmp=true --define opencl=true --define quantize=true
+cp bazel-bin/mace/libmace/libmace.so  $LIB_DIR/aarch64_linux_gnu/cpu_gpu/
 if [[ "$OSTYPE" != "darwin"* ]];then
 	echo "build shared lib for linux-x86-64"
 	bazel build mace/libmace:libmace_dynamic --config optimization --define quantize=true --define openmp=true
@@ -56,6 +72,14 @@ echo "build static lib for arm64-v8a + cpu_gpu"
 bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true --define quantize=true --cpu=arm64-v8a
 cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/arm64-v8a/cpu_gpu/
+echo "build static lib for arm_linux_gnueabihf + cpu_gpu"
+bazel build --config arm_linux_gnueabihf --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true --define quantize=true
+cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/arm_linux_gnueabihf/cpu_gpu/
+echo "build static lib for aarch64_linux_gnu + cpu_gpu"
+bazel build --config aarch64_linux_gnu --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true --define quantize=true
+cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/aarch64_linux_gnu/cpu_gpu/
 if [[ "$OSTYPE" != "darwin"* ]];then
 	echo "build static lib for linux-x86-64"
 	bazel build mace/libmace:libmace_static --config optimization --define quantize=true --define openmp=true

--- a/tools/common.py
+++ b/tools/common.py
@@ -13,7 +13,9 @@
 # limitations under the License.
 import enum
+import hashlib
 import re
+import os
 import six
@@ -135,3 +137,340 @@ def formatted_file_name(input_file_name, input_name):
    for c in input_name:
        res += c if c.isalnum() else '_'
    return res
+def md5sum(s):
+    md5 = hashlib.md5()
+    md5.update(s.encode('utf-8'))
+    return md5.hexdigest()
+def get_build_binary_dir(library_name, target_abi):
+    return "%s/%s/%s/%s" % (
+        BUILD_OUTPUT_DIR, library_name, BUILD_TMP_DIR_NAME, target_abi)
+def get_model_lib_output_path(library_name, abi):
+    lib_output_path = os.path.join(BUILD_OUTPUT_DIR, library_name,
+                                   MODEL_OUTPUT_DIR_NAME, abi,
+                                   "%s.a" % library_name)
+    return lib_output_path
+def check_model_converted(library_name, model_name,
+                          model_graph_format, model_data_format,
+                          abi):
+    model_output_dir = \
+        '%s/%s/%s' % (BUILD_OUTPUT_DIR, library_name, MODEL_OUTPUT_DIR_NAME)
+    if model_graph_format == ModelFormat.file:
+        mace_check(os.path.exists("%s/%s.pb" % (model_output_dir, model_name)),
+                   ModuleName.RUN,
+                   "You should convert model first.")
+    else:
+        model_lib_path = get_model_lib_output_path(library_name, abi)
+        mace_check(os.path.exists(model_lib_path),
+                   ModuleName.RUN,
+                   "You should convert model first.")
+    if model_data_format == ModelFormat.file:
+        mace_check(os.path.exists("%s/%s.data" %
+                                  (model_output_dir, model_name)),
+                   ModuleName.RUN,
+                   "You should convert model first.")
+def parse_device_type(runtime):
+    device_type = ""
+    if runtime == RuntimeType.dsp:
+        device_type = DeviceType.HEXAGON
+    elif runtime == RuntimeType.gpu:
+        device_type = DeviceType.GPU
+    elif runtime == RuntimeType.cpu:
+        device_type = DeviceType.CPU
+    return device_type
+def sha256_checksum(fname):
+    hash_func = hashlib.sha256()
+    with open(fname, "rb") as f:
+        for chunk in iter(lambda: f.read(4096), b""):
+            hash_func.update(chunk)
+    return hash_func.hexdigest()
+def get_model_files(model_file_path,
+                    model_sha256_checksum,
+                    model_output_dir,
+                    weight_file_path="",
+                    weight_sha256_checksum=""):
+    model_file = model_file_path
+    weight_file = weight_file_path
+    if model_file_path.startswith("http://") or \
+            model_file_path.startswith("https://"):
+        model_file = model_output_dir + "/" + md5sum(model_file_path) + ".pb"
+        if not os.path.exists(model_file) or \
+                sha256_checksum(model_file) != model_sha256_checksum:
+            MaceLogger.info("Downloading model, please wait ...")
+            six.moves.urllib.request.urlretrieve(model_file_path, model_file)
+            MaceLogger.info("Model downloaded successfully.")
+    if sha256_checksum(model_file) != model_sha256_checksum:
+        MaceLogger.error(ModuleName.MODEL_CONVERTER,
+                         "model file sha256checksum not match")
+    if weight_file_path.startswith("http://") or \
+            weight_file_path.startswith("https://"):
+        weight_file = \
+            model_output_dir + "/" + md5sum(weight_file_path) + ".caffemodel"
+        if not os.path.exists(weight_file) or \
+                sha256_checksum(weight_file) != weight_sha256_checksum:
+            MaceLogger.info("Downloading model weight, please wait ...")
+            six.moves.urllib.request.urlretrieve(weight_file_path, weight_file)
+            MaceLogger.info("Model weight downloaded successfully.")
+    if weight_file:
+        if sha256_checksum(weight_file) != weight_sha256_checksum:
+            MaceLogger.error(ModuleName.MODEL_CONVERTER,
+                             "weight file sha256checksum not match")
+    return model_file, weight_file
+def get_opencl_binary_output_path(library_name, target_abi, device):
+    target_soc = device.target_socs
+    device_name = device.device_name
+    return '%s/%s/%s/%s/%s_%s.%s.%s.bin' % \
+           (BUILD_OUTPUT_DIR,
+            library_name,
+            OUTPUT_OPENCL_BINARY_DIR_NAME,
+            target_abi,
+            library_name,
+            OUTPUT_OPENCL_BINARY_FILE_NAME,
+            device_name,
+            target_soc)
+def get_opencl_parameter_output_path(library_name, target_abi, device):
+    target_soc = device.target_socs
+    device_name = device.device_name
+    return '%s/%s/%s/%s/%s_%s.%s.%s.bin' % \
+           (BUILD_OUTPUT_DIR,
+            library_name,
+            OUTPUT_OPENCL_BINARY_DIR_NAME,
+            target_abi,
+            library_name,
+            OUTPUT_OPENCL_PARAMETER_FILE_NAME,
+            device_name,
+            target_soc)
+def get_build_model_dirs(library_name,
+                         model_name,
+                         target_abi,
+                         device,
+                         model_file_path):
+    device_name = device.device_name
+    target_socs = device.target_socs
+    model_path_digest = md5sum(model_file_path)
+    model_output_base_dir = '{}/{}/{}/{}/{}'.format(
+        BUILD_OUTPUT_DIR, library_name, BUILD_TMP_DIR_NAME,
+        model_name, model_path_digest)
+    if target_abi == ABIType.host:
+        model_output_dir = '%s/%s' % (model_output_base_dir, target_abi)
+    elif not target_socs or not device.address:
+        model_output_dir = '%s/%s/%s' % (model_output_base_dir,
+                                         BUILD_TMP_GENERAL_OUTPUT_DIR_NAME,
+                                         target_abi)
+    else:
+        model_output_dir = '{}/{}_{}/{}'.format(
+            model_output_base_dir,
+            device_name,
+            target_socs,
+            target_abi
+        )
+    mace_model_dir = '{}/{}/{}'.format(
+        BUILD_OUTPUT_DIR, library_name, MODEL_OUTPUT_DIR_NAME
+    )
+    return model_output_base_dir, model_output_dir, mace_model_dir
+def abi_to_internal(abi):
+    if abi in [ABIType.armeabi_v7a, ABIType.arm64_v8a]:
+        return abi
+    if abi == ABIType.arm64:
+        return ABIType.aarch64
+    if abi == ABIType.armhf:
+        return ABIType.armeabi_v7a
+def infer_toolchain(abi):
+    if abi in [ABIType.armeabi_v7a, ABIType.arm64_v8a]:
+        return ToolchainType.android
+    if abi == ABIType.armhf:
+        return ToolchainType.arm_linux_gnueabihf
+    if abi == ABIType.arm64:
+        return ToolchainType.aarch64_linux_gnu
+    return ''
+################################
+# YAML key word
+################################
+class YAMLKeyword(object):
+    library_name = 'library_name'
+    target_abis = 'target_abis'
+    target_socs = 'target_socs'
+    model_graph_format = 'model_graph_format'
+    model_data_format = 'model_data_format'
+    models = 'models'
+    platform = 'platform'
+    device_name = 'device_name'
+    system = 'system'
+    address = 'address'
+    username = 'username'
+    password = 'password'
+    model_file_path = 'model_file_path'
+    model_sha256_checksum = 'model_sha256_checksum'
+    weight_file_path = 'weight_file_path'
+    weight_sha256_checksum = 'weight_sha256_checksum'
+    subgraphs = 'subgraphs'
+    input_tensors = 'input_tensors'
+    input_shapes = 'input_shapes'
+    input_ranges = 'input_ranges'
+    output_tensors = 'output_tensors'
+    output_shapes = 'output_shapes'
+    check_tensors = 'check_tensors'
+    check_shapes = 'check_shapes'
+    runtime = 'runtime'
+    data_type = 'data_type'
+    input_data_types = 'input_data_types'
+    input_data_formats = 'input_data_formats'
+    output_data_formats = 'output_data_formats'
+    limit_opencl_kernel_time = 'limit_opencl_kernel_time'
+    nnlib_graph_mode = 'nnlib_graph_mode'
+    obfuscate = 'obfuscate'
+    winograd = 'winograd'
+    quantize = 'quantize'
+    quantize_range_file = 'quantize_range_file'
+    change_concat_ranges = 'change_concat_ranges'
+    validation_inputs_data = 'validation_inputs_data'
+    validation_threshold = 'validation_threshold'
+    graph_optimize_options = 'graph_optimize_options'  # internal use for now
+    cl_mem_type = 'cl_mem_type'
+################################
+# SystemType
+################################
+class SystemType:
+    host = 'host'
+    android = 'android'
+    arm_linux = 'arm_linux'
+################################
+# common device str
+################################
+PHONE_DATA_DIR = '/data/local/tmp/mace_run'
+DEVICE_DATA_DIR = '/tmp/data/mace_run'
+DEVICE_INTERIOR_DIR = PHONE_DATA_DIR + "/interior"
+BUILD_OUTPUT_DIR = 'builds'
+BUILD_TMP_DIR_NAME = '_tmp'
+BUILD_DOWNLOADS_DIR = BUILD_OUTPUT_DIR + '/downloads'
+BUILD_TMP_GENERAL_OUTPUT_DIR_NAME = 'general'
+MODEL_OUTPUT_DIR_NAME = 'model'
+EXAMPLE_STATIC_NAME = "example_static"
+EXAMPLE_DYNAMIC_NAME = "example_dynamic"
+EXAMPLE_STATIC_TARGET = "//mace/examples/cli:" + EXAMPLE_STATIC_NAME
+EXAMPLE_DYNAMIC_TARGET = "//mace/examples/cli:" + EXAMPLE_DYNAMIC_NAME
+MACE_RUN_STATIC_NAME = "mace_run_static"
+MACE_RUN_DYNAMIC_NAME = "mace_run_dynamic"
+MACE_RUN_STATIC_TARGET = "//mace/tools/validation:" + MACE_RUN_STATIC_NAME
+MACE_RUN_DYNAMIC_TARGET = "//mace/tools/validation:" + MACE_RUN_DYNAMIC_NAME
+CL_COMPILED_BINARY_FILE_NAME = "mace_cl_compiled_program.bin"
+BUILD_TMP_OPENCL_BIN_DIR = 'opencl_bin'
+LIBMACE_DYNAMIC_PATH = "bazel-bin/mace/libmace/libmace.so"
+CL_TUNED_PARAMETER_FILE_NAME = "mace_run.config"
+MODEL_HEADER_DIR_PATH = 'include/mace/public'
+OUTPUT_LIBRARY_DIR_NAME = 'lib'
+OUTPUT_OPENCL_BINARY_DIR_NAME = 'opencl'
+OUTPUT_OPENCL_BINARY_FILE_NAME = 'compiled_opencl_kernel'
+OUTPUT_OPENCL_PARAMETER_FILE_NAME = 'tuned_opencl_parameter'
+CODEGEN_BASE_DIR = 'mace/codegen'
+MODEL_CODEGEN_DIR = CODEGEN_BASE_DIR + '/models'
+ENGINE_CODEGEN_DIR = CODEGEN_BASE_DIR + '/engine'
+LIB_CODEGEN_DIR = CODEGEN_BASE_DIR + '/lib'
+LIBMACE_SO_TARGET = "//mace/libmace:libmace.so"
+LIBMACE_STATIC_TARGET = "//mace/libmace:libmace_static"
+LIBMACE_STATIC_PATH = "bazel-genfiles/mace/libmace/libmace.a"
+MODEL_LIB_TARGET = "//mace/codegen:generated_models"
+MODEL_LIB_PATH = "bazel-genfiles/mace/codegen/libgenerated_models.a"
+QUANTIZE_STAT_TARGET = "//mace/tools/quantization:quantize_stat"
+BM_MODEL_STATIC_NAME = "benchmark_model_static"
+BM_MODEL_DYNAMIC_NAME = "benchmark_model_dynamic"
+BM_MODEL_STATIC_TARGET = "//mace/benchmark:" + BM_MODEL_STATIC_NAME
+BM_MODEL_DYNAMIC_TARGET = "//mace/benchmark:" + BM_MODEL_DYNAMIC_NAME
+ALL_SOC_TAG = 'all'
+################################
+# Model File Format
+################################
+class ModelFormat(object):
+    file = 'file'
+    code = 'code'
+################################
+# ABI Type
+################################
+class ABIType(object):
+    armeabi_v7a = 'armeabi-v7a'
+    arm64_v8a = 'arm64-v8a'
+    arm64 = 'arm64'
+    aarch64 = 'aarch64'
+    armhf = 'armhf'
+    host = 'host'
+################################
+# Module name
+################################
+class ModuleName(object):
+    YAML_CONFIG = 'YAML CONFIG'
+    MODEL_CONVERTER = 'Model Converter'
+    RUN = 'RUN'
+    BENCHMARK = 'Benchmark'
+#################################
+# mace lib type
+#################################
+class MACELibType(object):
+    static = 0
+    dynamic = 1
+#################################
+# Run time type
+#################################
+class RuntimeType(object):
+    cpu = 'cpu'
+    gpu = 'gpu'
+    dsp = 'dsp'
+    cpu_gpu = 'cpu+gpu'
+#################################
+# Tool chain Type
+#################################
+class ToolchainType:
+    android = 'android'
+    arm_linux_gnueabihf = 'arm_linux_gnueabihf'
+    aarch64_linux_gnu = 'aarch64_linux_gnu'
--- a/tools/converter.py
+++ b/tools/converter.py
--- a/tools/device.py
+++ b/tools/device.py
--- a/tools/image/image_to_tensor.py
+++ b/tools/image/image_to_tensor.py
 import argparse
 import os
 import sys
+import six
 import tensorflow as tf
 # TODO(liyin): use dataset api and estimator with distributed strategy
@@ -70,7 +73,7 @@ def images_to_tensors(input_files, image_shape, mean_values=None):
 def main(unused_args):
    if not os.path.exists(FLAGS.input):
-        print ("input does not exist: %s" % FLAGS.input)
+        print("input does not exist: %s" % FLAGS.input)
        sys.exit(-1)
    input_files = []

--- a/tools/image/tensor_to_image.py
+++ b/tools/image/tensor_to_image.py
 import argparse
 import os
 import sys
+import six
 import numpy as np
 import tensorflow as tf
@@ -53,7 +56,7 @@ def tensors_to_images(input_files, image_shape):
 def main(unused_args):
    if not os.path.exists(FLAGS.input):
-        print ("input does not exist: %s" % FLAGS.input)
+        print("input does not exist: %s" % FLAGS.input)
        sys.exit(-1)
    input_files = []

--- a/tools/sh_commands.py
+++ b/tools/sh_commands.py