Merge branch 'cmake' into 'master'

Add support for CMake See merge request !1086

Merge branch 'cmake' into 'master'
Add support for CMake See merge request !1086
52a379a2 · 卢旭辉 · 2d650b67 · 91e37ff1 · 52a379a2 · 52a379a2
334 changed file
--- a/.clang-format
+++ b/.clang-format
+BasedOnStyle: google
+MaxEmptyLinesToKeep: 3
+DerivePointerAlignment: false
+PointerAlignment: Right
+BinPackParameters: false
--- a/.gitignore
+++ b/.gitignore
 bazel-*
-tags
-.idea/
-cmake-build-debug/
-*.pyc
-mace/codegen/models/
-mace/codegen/opencl/
-mace/codegen/opencl_bin/
-mace/codegen/tuning/
-mace/codegen/version/
-mace/codegen/engine/
-mace/codegen/lib/
 build/
+cmake-build/
+cmake-build-debug/
 docs/_build/
 *.a
+.idea/
 .vscode/
-builds/
+tags
-mace/examples/android/macelibrary/src/main/cpp/mace/
 \.project/
 *swp
 *~
+*.pyc
 .python-version
-mace/examples/android/macelibrary/src/main/cpp/include/mace/public/
+mace/codegen/models/
+mace/codegen/opencl/
-mace/examples/android/macelibrary/src/main/cpp/lib/arm64-v8a/
+mace/codegen/opencl_bin/
+mace/codegen/tuning/
+mace/codegen/version/
+mace/codegen/engine/
+mace/codegen/lib/
-mace/examples/android/macelibrary/src/main/jniLibs/arm64-v8a/
+examples/android/macelibrary/src/main/cpp/mace/
+examples/android/macelibrary/src/main/cpp/include/
+examples/android/macelibrary/src/main/cpp/lib/arm64-v8a/
+examples/android/macelibrary/src/main/jniLibs/arm64-v8a/
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
 stages:
-  - cpplint
+  - linting
-  - pycodestyle
+  - build
-  - docs
+  - test
-  - platform_compatible_tests
+  - extra
-  - build_libraries
-  - ndk_versions_compatible_tests
-  - ops_test_disable_neon
-  - ops_test
-  - api_test
-  - model_tests
-  - quantization_tests
-  - build_android_demo
-  - ops_benchmark
-  - extra_tests
-  - so_size_check
-  - dynamic_link_test
-  - python3_test
 cpplint:
-  stage: cpplint
+  stage: linting
  script:
-    - curl -o cpplint.py https://raw.githubusercontent.com/google/styleguide/gh-pages/cpplint/cpplint.py
+    - sh tools/cpplint.sh
-    - python cpplint.py --linelength=80 --counting=detailed $(find mace -name "*.h" -or -name "*.cc")
-pycodestyle:
+pylint:
-  stage: pycodestyle
+  stage: linting
  script:
    - pycodestyle $(find -name "*.py")
-docs:
+build_docs:
-  stage: docs
+  stage: build
  script:
    - cd docs
    - make html
@@ -46,58 +32,46 @@ docs:
    paths:
      - docs/_build
-platform_compatible_tests:
+cmake_build_android-armeabi-v7a:
-  stage: platform_compatible_tests
+  stage: build
  script:
-    - bazel build mace/core:core --define openmp=false
+    - sh tools/cmake-build-android-armeabi-v7a-full.sh
-    - bazel build --config arm_linux_gnueabihf --define openmp=false --define opencl=true --define neon=true //mace/libmace:libmace.so
+    - LIBMACE32_FULL_SIZE=`stat -c%s cmake-build/android-armeabi-v7a-full/install/lib/libmace.so`
-    - bazel build --config aarch64_linux_gnu --define openmp=false --define opencl=true --define neon=true //mace/libmace:libmace.so
+    - if (( LIBMACE32_FULL_SIZE > 2200000 )) ; then echo "The libmace.so size too large"; exit 1; fi
-build_libraries:
+cmake_build_android-arm64-v8:
-  stage: build_libraries
+  stage: build
  script:
-    - bash tools/build-standalone-lib.sh
+    - sh tools/cmake-build-android-arm64-v8a-full.sh
+    - LIBMACE64_FULL_SIZE=`stat -c%s cmake-build/android-arm64-v8a-full/install/lib/libmace.so`
+    - if (( LIBMACE64_FULL_SIZE > 3100000 )) ; then echo "The libmace.so size too large"; exit 1; fi
-ndk_versions_compatible_tests:
+bazel_build:
-  stage: ndk_versions_compatible_tests
+  stage: build
  script:
-    - DEFAULT_NDK_PATH=$ANDROID_NDK_HOME
+    - sh tools/bazel-build-standalone-lib.sh
-    - prefix_path=${DEFAULT_NDK_PATH%android-ndk-*}
+  only:
-    - if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi
+    - triggers
-    - >
-      if ping -c 1 v9.git.n.xiaomi.com 1>/dev/null 2>&1; then
-        GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git
-        DEVICE_CONF_FILE=generic-mobile-devices/devices.yml
-      fi
-    - >
-      for ndk in android-ndk-r15c android-ndk-r16 android-ndk-r17b;
-      do
-      new_ndk_path=${prefix_path}${ndk};
-      if [ "$new_ndk_path" != "$DEFAULT_NDK_PATH" ]; then
-        export ANDROID_NDK_HOME=$new_ndk_path;
-        export PATH=$ANDROID_NDK_HOME:$PATH;
-        echo "ndk path: $ANDROID_NDK_HOME";
-        python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a --target_socs=$TARGET_SOCS --enable_neon=false --args="--gtest_filter=ActivationOpTest*" || exit 1;
-        python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a --target_socs=$TARGET_SOCS --args="--gtest_filter=ActivationOpTest*" || exit 1;
-        python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=arm64-v8a --target_socs=$TARGET_SOCS --args="--gtest_filter=ActivationOpTest*" || exit 1;
-      fi
-      done
-    - export ANDROID_NDK_HOME=$DEFAULT_NDK_PATH
-    - export PATH=$ANDROID_NDK_HOME:$PATH
-ops_test_disable_neon:
+cmake_build:
-  stage: ops_test_disable_neon
+  stage: build
  script:
-    - if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi
+    - sh tools/cmake-build-standalone-lib.sh
-    - >
+  only:
-      if ping -c 1 v9.git.n.xiaomi.com 1>/dev/null 2>&1; then
+    - triggers
-        GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git
-        DEVICE_CONF_FILE=generic-mobile-devices/devices.yml
-      fi
-    - python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64 --target_socs=$TARGET_SOCS --enable_neon=false || exit 1;
-ops_test:
+build_android_demo:
-  stage: ops_test
+  stage: build
+  script:
+    - ANDROID_NDK_HOME_SAVED=${ANDROID_NDK_HOME}
+    - export ANDROID_NDK_HOME=/opt/android-ndk-r17b
+    - pushd examples/android/ && bash build.sh static && bash build.sh dynamic && popd
+    - export ANDROID_NDK_HOME=${ANDROID_NDK_HOME_SAVED}
+  only:
+    - triggers
+mace_cc_test:
+  stage: test
  script:
    - if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi
    - >
@@ -105,26 +79,18 @@ ops_test:
        GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git
        DEVICE_CONF_FILE=generic-mobile-devices/devices.yml
      fi
-    - python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64 --target_socs=$TARGET_SOCS || exit 1;
+    - python tools/bazel_adb_run.py --target="//test/ccunit:mace_cc_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64 --target_socs=$TARGET_SOCS
-api_test:
+mace_cc_benchmark:
-  stage: api_test
+  stage: test
  script:
    - if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi
-    - python tools/bazel_adb_run.py --target="//mace/test:mace_api_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS || exit 1;
+    - python tools/bazel_adb_run.py --target="//test/ccbenchmark:mace_cc_benchmark" --run_target=True --stdout_processor=ops_benchmark_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS --args="--filter=.*SIGMOID.*"
-    - python tools/bazel_adb_run.py --target="//mace/test:mace_api_mt_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS || exit 1;
+  only:
-    - python tools/bazel_adb_run.py --target="//mace/test:mace_api_exception_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS || exit 1;
+    - triggers
-    - >
-      if ping -c 1 v9.git.n.xiaomi.com 1>/dev/null 2>&1; then
-        GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git
-        DEVICE_CONF_FILE=generic-mobile-devices/devices.yml
-      fi
-    - python tools/bazel_adb_run.py --target="//mace/test:mace_api_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=arm64 --target_socs=$TARGET_SOCS || exit 1;
-    - python tools/bazel_adb_run.py --target="//mace/test:mace_api_mt_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=arm64 --target_socs=$TARGET_SOCS || exit 1;
-    - python tools/bazel_adb_run.py --target="//mace/test:mace_api_exception_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=arm64 --target_socs=$TARGET_SOCS || exit 1;
 model_tests:
-  stage: model_tests
+  stage: test
  script:
    - pwd
    - rm -rf mace-models
@@ -137,25 +103,22 @@ model_tests:
        DEVICE_CONF_FILE=generic-mobile-devices/devices.yml
      fi
    - if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi
-    - >
+    - python tools/converter.py convert --config=${CONF_FILE}  --target_socs=$TARGET_SOCS --model_graph_format=file --model_data_format=file --cl_mem_type=buffer
-      python tools/converter.py convert --config=${CONF_FILE}  --target_socs=$TARGET_SOCS --model_graph_format=file --model_data_format=file --cl_mem_type=buffer || exit 1;
+    - python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --device_yml=${DEVICE_CONF_FILE} --round=1 --target_abis=armeabi-v7a,arm64 --validate --model_graph_format=file --model_data_format=file
-      python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --device_yml=${DEVICE_CONF_FILE} --round=1 --target_abis=armeabi-v7a,arm64 --validate --model_graph_format=file --model_data_format=file || exit 1;
+    - python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --device_yml=${DEVICE_CONF_FILE} --example --target_abis=armeabi-v7a,arm64 --round=1 --validate --model_graph_format=file --model_data_format=file
-      python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --device_yml=${DEVICE_CONF_FILE} --example --target_abis=armeabi-v7a,arm64 --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
    - CONF_FILE=mace-models/mobilenet-v2/mobilenet-v2-host.yml
-    - >
+    - python tools/converter.py convert --config=${CONF_FILE} --target_socs=$TARGET_SOCS --model_graph_format=file --model_data_format=file
-      python tools/converter.py convert --config=${CONF_FILE} --target_socs=$TARGET_SOCS --model_graph_format=file --model_data_format=file || exit 1;
+    - python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --round=1 --validate --model_graph_format=file --model_data_format=file --address_sanitizer
-      python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --round=1 --validate --model_graph_format=file --model_data_format=file --address_sanitizer || exit 1;
+    - python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --example --round=1 --validate --model_graph_format=file --model_data_format=file
-      python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --example --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
+    - python tools/converter.py benchmark --config=${CONF_FILE} --target_socs=$TARGET_SOCS --round=5 --model_graph_format=file --model_data_format=file
-      python tools/converter.py benchmark --config=${CONF_FILE} --target_socs=$TARGET_SOCS --round=5 --model_graph_format=file --model_data_format=file || exit 1;
+    - python tools/converter.py convert --config=${CONF_FILE} --target_socs=$TARGET_SOCS --model_graph_format=code --model_data_format=file
-      python tools/converter.py convert --config=${CONF_FILE} --target_socs=$TARGET_SOCS --model_graph_format=code --model_data_format=file || exit 1;
+    - python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --round=1 --validate --model_graph_format=code --model_data_format=file
-      python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --round=1 --validate --model_graph_format=code --model_data_format=file || exit 1;
+    - python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --example --round=1 --validate --model_graph_format=code --model_data_format=file
-      python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --example --round=1 --validate --model_graph_format=code --model_data_format=file || exit 1;
+    - python tools/converter.py benchmark --config=${CONF_FILE} --target_socs=$TARGET_SOCS --round=5 --model_graph_format=code --model_data_format=file
-      python tools/converter.py benchmark --config=${CONF_FILE} --target_socs=$TARGET_SOCS --round=5 --model_graph_format=code --model_data_format=file || exit 1;
    - rm -rf mace-models
 quantization_tests:
-  stage: quantization_tests
+  stage: test
  script:
    - pwd
    - rm -rf mace-models
@@ -174,50 +137,11 @@ quantization_tests:
      python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --device_yml=${DEVICE_CONF_FILE} --example --round=1 --validate --layers=0 --model_graph_format=file --model_data_format=file || exit 1;
      done
    - rm -rf mace-models
+  only:
+    - triggers
-build_android_demo:
+dynamic_linking_test:
-  stage: build_android_demo
+  stage: extra
-  script:
-    - pushd mace/examples/android/ && bash build.sh static && bash build.sh dynamic && popd
-ops_benchmark:
-  stage: ops_benchmark
-  script:
-    - if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi
-    - python tools/bazel_adb_run.py --target="//mace/ops:ops_benchmark" --run_target=True --stdout_processor=ops_benchmark_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS --args="--filter=.*SIGMOID.*" || exit 1;
-extra_tests:
-  stage: extra_tests
-  script:
-    - if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi
-    - >
-      if ping -c 1 v9.git.n.xiaomi.com 1>/dev/null 2>&1; then
-        GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git
-        DEVICE_CONF_FILE=generic-mobile-devices/devices.yml
-      fi
-    - python tools/bazel_adb_run.py --target="//mace/utils:utils_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64 --target_socs=$TARGET_SOCS || exit 1;
-    - python tools/bazel_adb_run.py --target="//mace/port:port_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64 --target_socs=$TARGET_SOCS || exit 1;
-so_size_check:
-  stage: so_size_check
-  script:
-    - DYNAMIC_LIB_PATH="bazel-bin/mace/libmace/libmace.so"
-    - bazel build -s --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=false --define opencl=false --define quantize=false --cpu=armeabi-v7a
-    - CURRENT_LIBMACE_SO_SIZE=`ls -l $DYNAMIC_LIB_PATH --block-size=K -s | cut -f 1 -d "K"`
-    - TARGET_MACE_WORK_DIR=`mktemp -d`
-    - pushd $TARGET_MACE_WORK_DIR
-    - GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@github.com:XiaoMi/mace.git
-    - pushd mace
-    - bazel build -s --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=false --define opencl=false --define quantize=false --cpu=armeabi-v7a
-    - TARGET_LIBMACE_SO_SIZE=`ls -l $DYNAMIC_LIB_PATH --block-size=K -s | cut -f 1 -d "K"`
-    - popd
-    - popd
-    - rm -rf $TARGET_MACE_WORK_DIR
-    - echo "The libmace.so size grows from $TARGET_LIBMACE_SO_SIZE(KB) to $CURRENT_LIBMACE_SO_SIZE(KB)"
-    - if [ $TARGET_LIBMACE_SO_SIZE -lt `expr \( $CURRENT_LIBMACE_SO_SIZE - $TARGET_LIBMACE_SO_SIZE \) \* 10` ]; then echo "The libmace.so size grows larger than 10%"; exit 1; fi
-dynamic_link_test:
-  stage: dynamic_link_test
  script:
    - pwd
    - rm -rf mace-models
@@ -230,21 +154,8 @@ dynamic_link_test:
        DEVICE_CONF_FILE=generic-mobile-devices/devices.yml
      fi
    - if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi
-    - >
+    - python tools/converter.py convert --config=${CONF_FILE}  --target_socs=$TARGET_SOCS --model_graph_format=file --model_data_format=file --cl_mem_type=buffer
-      python tools/converter.py convert --config=${CONF_FILE}  --target_socs=$TARGET_SOCS --model_graph_format=file --model_data_format=file --cl_mem_type=buffer || exit 1;
+    - python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --device_yml=${DEVICE_CONF_FILE} --example --mace_lib_type=dynamic --target_abis=armeabi-v7a,arm64 --round=1 --validate --model_graph_format=file --model_data_format=file
-      python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --device_yml=${DEVICE_CONF_FILE} --example --mace_lib_type=dynamic --target_abis=armeabi-v7a,arm64 --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
    - rm -rf mace-models
+  only:
-python3_test:
+    - triggers
-  stage: python3_test
-  script:
-    - pyenv local 3.6.3
-    - rm -rf mace-models
-    - GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@github.com:XiaoMi/mace-models.git
-    - CONF_FILE=mace-models/mobilenet-v1/mobilenet-v1.yml
-    - if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi
-    - >
-      python tools/converter.py convert --config=${CONF_FILE} --target_socs=$TARGET_SOCS --model_graph_format=file --model_data_format=file || exit 1;
-      python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
-    - python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --run_target=False --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a --target_socs=$TARGET_SOCS --validate || exit 1;
-    - pyenv local 2.7.14
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
+# new CUDA support requires 3.8 for Linux/Mac, and 3.9 for Windows
+cmake_minimum_required(VERSION 3.2 FATAL_ERROR)
+message("CMAKE_VERSION: ${CMAKE_VERSION}")
+project(mace C CXX)
+option(MACE_ENABLE_NEON        "whether to enable NEON support"             OFF)
+option(MACE_ENABLE_QUANTIZE    "whether to enable NEON int8 support"        OFF)
+option(MACE_ENABLE_OPENCL      "whether to enable OpenCL support"           OFF)
+option(MACE_ENABLE_CUDA        "whether to enable CUDA support"             OFF)
+option(MACE_ENABLE_HEXAGON_DSP "whether to enable Hexagon DSP support"      OFF)
+option(MACE_ENABLE_HEXAGON_HTA "whether to enable Hexagon HTA support"      OFF)
+option(MACE_ENABLE_TESTS       "whether to build c++ unit tests"            OFF)
+option(MACE_ENABLE_BENCHMARKS  "whether to build c++ micro benchmarks"      OFF)
+option(MACE_ENABLE_EXAMPLES    "whether to build examples"                  OFF)
+option(MACE_ENABLE_OPT_SIZE    "whether to build with optimized binary size" ON)
+option(MACE_ENABLE_OBFUSCATE   "whether to build with code obfuscation"      ON)
+message("CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}")
+# TODO make these flags well defined and organized
+# TODO enable sanitizer
+set(MACE_CC_FLAGS "${MACE_CC_FLAGS} -fPIC")
+if(MACE_ENABLE_OPT_SIZE)
+  if(APPLE)
+    set(MACE_LINKER_FLAGS "${MACE_LINKER_FLAGS} -Wl,-dead_strip -Wl,-dead_strip_dylibs")
+  else(APPLE)
+    set(MACE_LINKER_FLAGS "${MACE_LINKER_FLAGS} -Wl,--strip-all -Wl,--gc-sections")
+    set(MACE_CC_FLAGS "${MACE_CC_FLAGS} -ffunction-sections -fdata-sections")
+  endif(APPLE)
+  set(MACE_CC_FLAGS "${MACE_CC_FLAGS} -fvisibility=hidden -fvisibility-inlines-hidden")
+  set(MACE_CODE_CC_FLAGS "${MACE_CODE_CC_FLAGS} -fno-rtti -fno-exceptions -DGOOGLE_PROTOBUF_NO_RTTI -DPROTOBUF_USE_EXCEPTIONS=0")
+endif(MACE_ENABLE_OPT_SIZE)
+# flags apply only to mace code (third_party excluded)
+set(MACE_CODE_CC_FLAGS "${MACE_CODE_CC_FLAGS} -Wall -Werror")
+set(MACE_CODE_CC_FLAGS "${MACE_CODE_CC_FLAGS} -std=c++11 -D_GLIBCXX_USE_C99_MATH_TR1")
+if(IOS)
+  # TODO correct the code
+  set(MACE_CODE_CC_FLAGS "${MACE_CODE_CC_FLAGS} -Wno-error=shorten-64-to-32")
+endif(IOS)
+if(MACE_ENABLE_NEON)
+  add_definitions(-DMACE_ENABLE_NEON)
+  if(ANDROID_ABI STREQUAL "armeabi-v7a")
+    # Enable NEON fp16 support
+    string(REPLACE "-mfpu=neon " "-mfpu=neon-fp16 " CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
+  endif(ANDROID_ABI STREQUAL "armeabi-v7a")
+endif(MACE_ENABLE_NEON)
+if(MACE_ENABLE_QUANTIZE)
+  add_definitions(-DMACE_ENABLE_QUANTIZE)
+  add_definitions(-DGEMMLOWP_USE_MACE_THREAD_POOL)
+  add_definitions(-DMACE_DEPTHWISE_U8_USE_MULTI_THREAD)
+endif(MACE_ENABLE_QUANTIZE)
+if(MACE_ENABLE_OPENCL)
+  if(IOS)
+    message(FATAL_ERROR "OpenCL is not supported for iOS")
+  endif(IOS)
+  add_definitions(-DMACE_ENABLE_OPENCL)
+endif(MACE_ENABLE_OPENCL)
+if(MACE_ENABLE_CUDA)
+  # new CUDA support requires 3.8 for Linux/Mac, and 3.9 for Windows
+  cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
+  enable_language(CUDA)
+endif(MACE_ENABLE_CUDA)
+if((MACE_ENABLE_HEXAGON_DSP OR MACE_ENABLE_HEXAGON_HTA))
+  if(ANDROID_ABI STREQUAL "arm64-v8a")
+    # Use gold linker to avoid linking check of libcdsprpc.so
+    set(MACE_LINKER_FLAGS "${MACE_LINKER_FLAGS} -fuse-ld=gold")
+  endif(ANDROID_ABI STREQUAL "arm64-v8a")
+endif(MACE_ENABLE_HEXAGON_DSP OR MACE_ENABLE_HEXAGON_HTA)
+if(MACE_ENABLE_HEXAGON_DSP)
+  if(NOT ANDROID)
+    message(FATAL_ERROR "Hexagon DSP is only supported on Android")
+  endif(NOT ANDROID)
+  # TODO => -DMACE_ENABLE_HEXAGON_DSP
+  add_definitions(-DMACE_ENABLE_HEXAGON)
+endif(MACE_ENABLE_HEXAGON_DSP)
+if(MACE_ENABLE_HEXAGON_HTA)
+  if(NOT ANDROID)
+    message(FATAL_ERROR "Hexagon HTA is only supported on Android")
+  endif(NOT ANDROID)
+  add_definitions(-DMACE_ENABLE_HEXAGON_HTA)
+endif(MACE_ENABLE_HEXAGON_HTA)
+if(MACE_ENABLE_OBFUSCATE)
+  add_definitions(-DMACE_OBFUSCATE_LITERALS)
+endif(MACE_ENABLE_OBFUSCATE)
+if(NOT MSVC)
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MACE_CODE_CC_FLAGS} ${MACE_CC_FLAGS}")
+  set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${MACE_LINKER_FLAGS}")
+  set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${MACE_LINKER_FLAGS}")
+endif(NOT MSVC)
+include(${PROJECT_SOURCE_DIR}/third_party/third_party.cmake)
+include_directories("${PROJECT_SOURCE_DIR}")
+include_directories("${PROJECT_SOURCE_DIR}/include")
+include_directories("${PROJECT_BINARY_DIR}") # proto
+add_subdirectory(include)
+add_subdirectory(mace)
+if(MACE_ENABLE_EXAMPLES)
+  add_subdirectory(examples)
+endif(MACE_ENABLE_EXAMPLES)
+if(MACE_ENABLE_TESTS OR MACE_ENABLE_BENCHMARKS)
+  add_subdirectory(test)
+endif(MACE_ENABLE_TESTS OR MACE_ENABLE_BENCHMARKS)
--- a/README.md
+++ b/README.md
@@ -18,7 +18,7 @@
 [中文](README_zh.md)
 **Mobile AI Compute Engine** (or **MACE** for short) is a deep learning inference framework optimized for
-mobile heterogeneous computing on Android, iOS and Linux devices. The design focuses on the following
+mobile heterogeneous computing on Android, iOS, Linux and Windows devices. The design focuses on the following
 targets:
 * Performance
  * Runtime is optimized with NEON, OpenCL and Hexagon, and

--- a/README_zh.md
+++ b/README_zh.md
@@ -15,7 +15,7 @@
 [加入我们](JOBS.md) |
 [English](README.md)
-**Mobile AI Compute Engine (MACE)** 是一个专为移动端异构计算平台(支持Android, iOS, Linux)优化的神经网络计算框架。
+**Mobile AI Compute Engine (MACE)** 是一个专为移动端异构计算平台(支持Android, iOS, Linux, Windows)优化的神经网络计算框架。
 主要从以下的角度做了专门的优化：
 * 性能
  * 代码经过NEON指令，OpenCL以及Hexagon HVX专门优化，并且采用

--- a/WORKSPACE
+++ b/WORKSPACE
@@ -61,7 +61,6 @@ new_http_archive(
    strip_prefix = "half-code-356-trunk",
    urls = [
        "https://cnbj1.fds.api.xiaomi.com/mace/third-party/half/half-code-356-trunk.zip",
-        "https://sourceforge.net/code-snapshots/svn/h/ha/half/code/half-code-356-trunk.zip",
    ],
 )

--- a/cmake/toolchains/aarch64-linux-gnu.cmake
+++ b/cmake/toolchains/aarch64-linux-gnu.cmake
+# Tested with gcc-linaro-7.3.1-2018.05-x86_64_aarch64-linux-gnu
+# https://releases.linaro.org/components/toolchain/binaries/7.3-2018.05/
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR aarch64)
+set(CMAKE_C_COMPILER "${CROSSTOOL_ROOT}/bin/aarch64-linux-gnu-gcc")
+set(CMAKE_CXX_COMPILER "${CROSSTOOL_ROOT}/bin/aarch64-linux-gnu-g++")
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
+set(CMAKE_CXX_FLAGS "-march=armv8-a ${CMAKE_CXX_FLAGS}")
--- a/cmake/toolchains/arm-linux-gnueabi.cmake
+++ b/cmake/toolchains/arm-linux-gnueabi.cmake
+# Tested with gcc-linaro-7.3.1-2018.05-x86_64_arm-linux-gnueabi
+# https://releases.linaro.org/components/toolchain/binaries/7.3-2018.05/
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR arm)
+set(CMAKE_C_COMPILER "${CROSSTOOL_ROOT}/bin/arm-linux-gnueabi-gcc")
+set(CMAKE_CXX_COMPILER "${CROSSTOOL_ROOT}/bin/arm-linux-gnueabi-g++")
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
+set(CMAKE_CXX_FLAGS "-march=armv7-a -mfloat-abi=softfp -mfpu=neon-vfpv4 ${CMAKE_CXX_FLAGS}")
--- a/cmake/toolchains/arm-linux-gnueabihf.cmake
+++ b/cmake/toolchains/arm-linux-gnueabihf.cmake
+# Tested with gcc-linaro-7.3.1-2018.05-x86_64_arm-linux-gnueabihf
+# https://releases.linaro.org/components/toolchain/binaries/7.3-2018.05/
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR arm)
+set(CMAKE_C_COMPILER "${CROSSTOOL_ROOT}/bin/arm-linux-gnueabihf-gcc")
+set(CMAKE_CXX_COMPILER "${CROSSTOOL_ROOT}/bin/arm-linux-gnueabihf-g++")
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
+set(CMAKE_CXX_FLAGS "-march=armv7-a -mfloat-abi=hard -mfpu=neon-vfpv4 ${CMAKE_CXX_FLAGS}")
--- a/cmake/toolchains/ios.toolchain.cmake
+++ b/cmake/toolchains/ios.toolchain.cmake
--- a/docker/mace-dev-lite/Dockerfile
+++ b/docker/mace-dev-lite/Dockerfile
@@ -50,8 +50,8 @@ RUN curl -L https://raw.githubusercontent.com/yyuu/pyenv-installer/master/bin/py
 ENV PATH /root/.pyenv/bin:/root/.pyenv/shims/:${PATH}
 RUN eval "$(pyenv init -)"
 RUN eval "$(pyenv virtualenv-init -)"
-RUN pyenv install 2.7.14 && pyenv install 3.6.3
+RUN pyenv install 3.6.3
-RUN pyenv global 2.7.14
+RUN pyenv global 3.6.3
 # Setup vim
 RUN apt-get install -y --no-install-recommends \
@@ -98,45 +98,46 @@ RUN mkdir -p /opt/sdk && \
    yes | android update sdk --no-ui -a --filter tools,platform-tools,${ANDROID_API_LEVELS},build-tools-${ANDROID_BUILD_TOOLS_VERSION}
 RUN ${ANDROID_HOME}/tools/bin/sdkmanager "cmake;3.6.4111459"
-# Download NDK
+# Download NDK 19c
 RUN cd /opt/ && \
-    wget -q https://dl.google.com/android/repository/android-ndk-r15c-linux-x86_64.zip && \
+    wget -q https://dl.google.com/android/repository/android-ndk-r19c-linux-x86_64.zip && \
-    unzip -q android-ndk-r15c-linux-x86_64.zip && \
+    unzip -q android-ndk-r19c-linux-x86_64.zip && \
-    rm -f android-ndk-r15c-linux-x86_64.zip
+    rm -f android-ndk-r19c-linux-x86_64.zip
-ENV ANDROID_NDK_VERSION r15c
+ENV ANDROID_NDK_VERSION r19c
 ENV ANDROID_NDK_HOME /opt/android-ndk-${ANDROID_NDK_VERSION}
-# add to PATH
-ENV PATH ${PATH}:${ANDROID_NDK_HOME}
 # Install tools
 RUN apt-get install -y --no-install-recommends \
    android-tools-adb
-RUN pip install -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com setuptools
+# fix docker in docker error: `error while loading shared libraries: libltdl.so.7`
-RUN pip install -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com \
+# refer to: https://github.com/jenkinsci/docker/issues/506
-    "numpy>=1.14.0" \
+RUN apt-get install -y libltdl7
-    scipy \
-    jinja2 \
+RUN pip install -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com --upgrade pip setuptools
-    pyyaml \
+RUN pip install -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com \
+    numpy==1.15.4 \
+    scipy==1.2.0 \
+    Jinja2==2.10 \
+    PyYAML==3.13 \
    sh==1.12.14 \
    pycodestyle==2.4.0 \
-    filelock \
+    filelock==3.0.10 \
-    PTable
+    PTable==0.9.2
-# Install Tensorflow
+RUN pip install -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com \
-RUN pip install -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com \
    tensorflow==1.8.0
 # Install pytorch (refer to: https://pytorch.org/get-started/locally/)
-RUN pip install future==0.17.1 torchvision_nightly
+RUN pip install -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com \
-RUN pip install torch_nightly -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
+    future==0.17.1 \
+    Pillow==5.4.1 \
+    torch==1.1.0 \
+    torchvision==0.2.2.post3
-# Install ONNX
+RUN pip install -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com \
-RUN pip install -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com \
    onnx==1.3.0 \
    onnx-tf==1.2.0
-# fix docker in docker error: `error while loading shared libraries: libltdl.so.7`
+RUN pip install -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com \
-# refer to: https://github.com/jenkinsci/docker/issues/506
+    cpplint==1.4.4
-RUN apt-get install -y libltdl7
--- a/docker/mace-dev/Dockerfile
+++ b/docker/mace-dev/Dockerfile
@@ -9,78 +9,75 @@ RUN apt-get install -y --no-install-recommends \
    screen \
    htop
-RUN pip install -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com \
-    sphinx \
-    sphinx-autobuild \
-    sphinx_rtd_theme \
-    recommonmark
-# ============ Install same deps for python3 ============
 RUN pyenv global 3.6.3
-# Install tools
+RUN pip install -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com \
-RUN pip install -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com setuptools
-RUN pip install -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com \
-    "numpy>=1.14.0" \
-    scipy \
-    jinja2 \
-    pyyaml \
-    sh==1.12.14 \
-    pycodestyle==2.4.0 \
-    filelock \
-    PTable
-# Install Tensorflow
-RUN pip install -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com \
-    tensorflow==1.8.0
-# Install pytorch (refer to: https://pytorch.org/get-started/locally/)
-RUN pip install future==0.17.1 torchvision_nightly
-RUN pip install torch_nightly -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
-# Install ONNX
-RUN pip install -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com \
-    onnx==1.3.0 \
-    onnx-tf==1.2.0
-RUN pip install -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com \
    sphinx \
    sphinx-autobuild \
    sphinx_rtd_theme \
    recommonmark
-RUN pyenv global 2.7.14
-# ============ End installing same deps for python3 ============
 # Customize vim
-RUN mkdir -p ~/.vim/autoload ~/.vim/bundle && \
+RUN git clone --recursive https://github.com/llhe/vimrc.git && \
-    curl -LSso ~/.vim/autoload/pathogen.vim https://tpo.pe/pathogen.vim
+    cd vimrc && \
-RUN mkdir -p ~/.vim/bundle
+    rm -rf ~/.vim && \
-RUN cd ~/.vim/bundle && \
+    rm -rf ~/.vimrc && \
-    git clone https://github.com/scrooloose/nerdtree.git && \
+    ln -s `pwd`/vim ~/.vim && \
-    git clone https://github.com/vim-syntastic/syntastic.git && \
+    ln -s `pwd`/vimrc ~/.vimrc
-    git clone https://github.com/vim-airline/vim-airline.git && \
-    git clone https://github.com/altercation/vim-colors-solarized.git && \
-    git clone https://github.com/bazelbuild/vim-ft-bzl.git && \
-    git clone https://github.com/google/vim-maktaba.git && \
-    git clone https://github.com/google/vim-codefmt.git
-RUN curl -LSso ~/.vimrc  https://gist.githubusercontent.com/llhe/71c5802919debd5825c100c0135478a7/raw/16a35020cc65f9c25d0cf8f11a3ba7b345a1271d/.vimrc
 # Upgrade CMake
-RUN wget https://cmake.org/files/v3.11/cmake-3.11.3-Linux-x86_64.tar.gz -P /tmp/ && \
+RUN wget -q https://cmake.org/files/v3.11/cmake-3.11.3-Linux-x86_64.tar.gz -P /tmp/ && \
    tar zxf /tmp/cmake-3.11.3-Linux-x86_64.tar.gz --strip-components=1 -C /usr/local/ && \
    update-alternatives --install /usr/bin/cmake cmake /usr/local/bin/cmake 1 --force && \
    rm -f /tmp/cmake-3.11.3-Linux-x86_64.tar.gz
-# mace-dev-lite image already included NDK r15c
+# mace-dev-lite image already included NDK r19c
-## Download other NDK r16
+## Download other NDK r15c
+RUN cd /opt/ && \
+    wget -q https://dl.google.com/android/repository/android-ndk-r15c-linux-x86_64.zip && \
+    unzip -q android-ndk-r15c-linux-x86_64.zip && \
+    rm -f android-ndk-r15c-linux-x86_64.zip
+## Download other NDK r16b
 RUN cd /opt/ && \
-    wget -q https://dl.google.com/android/repository/android-ndk-r16-linux-x86_64.zip && \
+    wget -q https://dl.google.com/android/repository/android-ndk-r16b-linux-x86_64.zip && \
-    unzip -q android-ndk-r16-linux-x86_64.zip && \
+    unzip -q android-ndk-r16b-linux-x86_64.zip && \
-    rm -f android-ndk-r16-linux-x86_64.zip
+    rm -f android-ndk-r16b-linux-x86_64.zip
 ## Download other NDK r17b
 RUN cd /opt/ && \
    wget -q https://dl.google.com/android/repository/android-ndk-r17b-linux-x86_64.zip && \
    unzip -q android-ndk-r17b-linux-x86_64.zip && \
    rm -f android-ndk-r17b-linux-x86_64.zip
+## Download other NDK r18b
+RUN cd /opt/ && \
+    wget -q https://dl.google.com/android/repository/android-ndk-r18b-linux-x86_64.zip && \
+    unzip -q android-ndk-r18b-linux-x86_64.zip && \
+    rm -f android-ndk-r18b-linux-x86_64.zip
+# Mirror of https://releases.linaro.org/components/toolchain/binaries/7.3-2018.05/arm-linux-gnueabihf/gcc-linaro-7.3.1-2018.05-x86_64_arm-linux-gnueabihf.tar.xz
+RUN cd /opt && \
+    wget -q https://cnbj1.fds.api.xiaomi.com/mace/third-party/gcc-linaro/gcc-linaro-7.3.1-2018.05-x86_64_arm-linux-gnueabihf.tar.xz && \
+    tar xf gcc-linaro-7.3.1-2018.05-x86_64_arm-linux-gnueabihf.tar.xz && \
+    rm -rf gcc-linaro-7.3.1-2018.05-x86_64_arm-linux-gnueabihf.tar.xz
+# Mirror of https://releases.linaro.org/components/toolchain/binaries/7.3-2018.05/aarch64-linux-gnu/gcc-linaro-7.3.1-2018.05-x86_64_aarch64-linux-gnu.tar.xz
+RUN cd /opt && \
+    wget -q https://cnbj1.fds.api.xiaomi.com/mace/third-party/gcc-linaro/gcc-linaro-7.3.1-2018.05-x86_64_aarch64-linux-gnu.tar.xz && \
+    tar xf gcc-linaro-7.3.1-2018.05-x86_64_aarch64-linux-gnu.tar.xz && \
+    rm -rf gcc-linaro-7.3.1-2018.05-x86_64_aarch64-linux-gnu.tar.xz
+# install protoc
+RUN cd opt/ && \
+    wget -q https://github.com/protocolbuffers/protobuf/releases/download/v3.6.1/protoc-3.6.1-linux-x86_64.zip && \
+    unzip protoc-3.6.1-linux-x86_64.zip -d protoc-3.6.1 && \
+    rm -rf protoc-3.6.1-linux-x86_64.zip
+ENV CROSS_TOOLCHAIN_PARENT /opt
+ENV ANDROID_NDK_VERSION r19c
+ENV ANDROID_NDK_HOME /opt/android-ndk-${ANDROID_NDK_VERSION}
+ENV LINARO_ARM_LINUX_GNUEABIHF /opt/gcc-linaro-7.3.1-2018.05-x86_64_arm-linux-gnueabihf
+ENV LINARO_AARCH64_LINUX_GNU /opt/gcc-linaro-7.3.1-2018.05-x86_64_aarch64-linux-gnu
+ENV PATH /opt/protoc-3.6.1/bin:${PATH}
--- a/docker/update_images.sh
+++ b/docker/update_images.sh
@@ -11,18 +11,16 @@ if [ $BUILD_DIR_NAME != $CURRENT_DIR_NAME ]; then
 fi
 # build images
-docker build -t registry.cn-hangzhou.aliyuncs.com/xiaomimace/mace-dev-lite ./mace-dev-lite || exit 1
+docker build --network host -t registry.cn-hangzhou.aliyuncs.com/xiaomimace/mace-dev-lite ./mace-dev-lite
-docker build -t registry.cn-hangzhou.aliyuncs.com/xiaomimace/mace-dev ./mace-dev-lite || exit 1
+docker build --network host -t registry.cn-hangzhou.aliyuncs.com/xiaomimace/mace-dev ./mace-dev
-docker build -t registry.cn-hangzhou.aliyuncs.com/xiaomimace/gitlab-runner ./gitlab-runner || exit 1
+docker build --network host -t registry.cn-hangzhou.aliyuncs.com/xiaomimace/gitlab-runner ./gitlab-runner
 if grep -lq registry.cn-hangzhou.aliyuncs.com ~/.docker/config.json; then
  # update images to repository
-  docker push registry.cn-hangzhou.aliyuncs.com/xiaomimace/mace-dev-lite || exit 1
+  docker push registry.cn-hangzhou.aliyuncs.com/xiaomimace/mace-dev-lite
-  docker push registry.cn-hangzhou.aliyuncs.com/xiaomimace/mace-dev || exit 1
+  docker push registry.cn-hangzhou.aliyuncs.com/xiaomimace/mace-dev
-  docker push registry.cn-hangzhou.aliyuncs.com/xiaomimace/gitlab-runner || exit 1
+  docker push registry.cn-hangzhou.aliyuncs.com/xiaomimace/gitlab-runner
 else
  echo "Login docker registry server is needed!"
  exit 1
 fi
--- a/docs/development/how_to_debug.rst
+++ b/docs/development/how_to_debug.rst
@@ -72,7 +72,7 @@ If model's output is suspected to be incorrect, it might be useful to debug your
 or use binary search method until suspicious layer is found.
 You can also specify `--layers` after `--validate` to validate all or some of the layers of the model(excluding some layers changed by MACE, e.g., BatchToSpaceND),
-it only supports TensorFlow now. You can find validation results in `builds/your_model/model/runtime_in_yaml/log.csv`.
+it only supports TensorFlow now. You can find validation results in `build/your_model/model/runtime_in_yaml/log.csv`.
 For quantized model, if you want to check one layer, you can add `check_tensors` and `check_shapes` like in the yaml above. You can only specify
 MACE op's output.
@@ -113,7 +113,7 @@ Debug model conversion
 After model is converted to MACE model, a literal model graph is generated in directory `mace/codegen/models/your_model`.
 You can refer to it when debugging model conversion.
-MACE also provides model visualization HTML generated in `builds` directory, generated after converting model.
+MACE also provides model visualization HTML generated in `build` directory, generated after converting model.
 Debug engine using log

--- a/docs/development/how_to_run_tests.md
+++ b/docs/development/how_to_run_tests.md
@@ -12,10 +12,10 @@ Run unit tests
 MACE use [gtest](https://github.com/google/googletest) for unit tests.
-* Run all unit tests defined in a Bazel target, for example, run `ops_test`:
+* Run all unit tests defined in a Bazel target, for example, run `mace_cc_test`:
  ```sh
-  python tools/bazel_adb_run.py --target="//mace/ops:ops_test" \
+  python tools/bazel_adb_run.py --target="//test/ccunit:mace_cc_test" \
                                --run_target=True
  ```
@@ -23,7 +23,7 @@ MACE use [gtest](https://github.com/google/googletest) for unit tests.
 for example, run `Conv2dOpTest` unit tests:
  ```sh
-  python tools/bazel_adb_run.py --target="//mace/ops:ops_test" \
+  python tools/bazel_adb_run.py --target="//test/ccunit:mace_cc_test" \
                                --run_target=True \
                                --args="--gtest_filter=Conv2dOpTest*"
  ```
@@ -34,10 +34,10 @@ Run micro benchmarks
 MACE provides a micro benchmark framework for performance tuning.
 * Run all micro benchmarks defined in a Bazel target, for example, run all
-`ops_benchmark` micro benchmarks:
+`mace_cc_benchmark` micro benchmarks:
  ```sh
-  python tools/bazel_adb_run.py --target="//mace/ops:ops_benchmark" \
+  python tools/bazel_adb_run.py --target="//test/ccbenchmark:mace_cc_benchmark" \
                                --run_target=True
  ```
@@ -45,7 +45,7 @@ MACE provides a micro benchmark framework for performance tuning.
 micro benchmarks:
  ```sh
-  python tools/bazel_adb_run.py --target="//mace/ops:ops_benchmark" \
+  python tools/bazel_adb_run.py --target="//test/ccbenchmark:mace_cc_benchmark" \
                                --run_target=True \
                                --args="--filter=MACE_BM_CONV_2D_.*_GPU"
  ```
--- a/docs/user_guide/advanced_usage.rst
+++ b/docs/user_guide/advanced_usage.rst
@@ -204,15 +204,15 @@ Convert model(s) to C++ code
            python tools/converter.py convert --config=/path/to/model_deployment_file.yml
-        The command will generate **${library_name}.a** in **builds/${library_name}/model** directory and
+        The command will generate **${library_name}.a** in **build/${library_name}/model** directory and
-        ** *.h ** in **builds/${library_name}/include** like the following dir-tree.
+        ** *.h ** in **build/${library_name}/include** like the following dir-tree.
        .. code::
             # model_graph_format: code
             # model_data_format: file
-             builds
+             build
               ├── include
               │   └── mace
               │       └── public
@@ -225,7 +225,7 @@ Convert model(s) to C++ code
             # model_graph_format: code
             # model_data_format: code
-             builds
+             build
               ├── include
               │   └── mace
               │       └── public
@@ -305,11 +305,11 @@ Tuning for specific SoC's GPU
            python tools/converter.py run --config=/path/to/model_deployment_file.yml --validate
-        The command will generate two files in `builds/${library_name}/opencl`, like the following dir-tree.
+        The command will generate two files in `build/${library_name}/opencl`, like the following dir-tree.
        .. code::
-              builds
+              build
              └── mobilenet-v2
                  ├── model
                  │   ├── mobilenet_v2.data

--- a/docs/user_guide/basic_usage.rst
+++ b/docs/user_guide/basic_usage.rst
@@ -44,17 +44,17 @@ Here we use the mobilenet-v2 model as an example.
        cd path/to/mace
        # Build library
-        # output lib path: builds/lib
+        # output lib path: build/lib
-        bash tools/build-standalone-lib.sh
+        bash tools/bazel-build-standalone-lib.sh
    .. note::
        - This step can be skipped if you just want to run a model using ``tools/converter.py``, such as commands in step 5.
-        - Libraries in ``builds/lib/armeabi-v7a/cpu_gpu/`` means it can run on ``cpu`` or ``gpu`` devices.
+        - Libraries in ``build/lib/armeabi-v7a/cpu_gpu/`` means it can run on ``cpu`` or ``gpu`` devices.
-        - The results in ``builds/lib/armeabi-v7a/cpu_gpu_dsp/`` need HVX supported.
+        - The results in ``build/lib/armeabi-v7a/cpu_gpu_dsp/`` need HVX supported.
    4. Convert the pre-trained mobilenet-v2 model to MACE format model.
@@ -170,7 +170,7 @@ When the deployment file is ready, you can use MACE converter tool to convert yo
    python tools/converter.py convert --config=/path/to/your/model_deployment_file.yml
 This command will download or load your pre-trained model and convert it to a MACE model proto file and weights data file.
-The generated model files will be stored in ``builds/${library_name}/model`` folder.
+The generated model files will be stored in ``build/${library_name}/model`` folder.
 .. warning::
@@ -188,10 +188,10 @@ Or use bazel to build MACE source code into a library.
        cd path/to/mace
        # Build library
-        # output lib path: builds/lib
+        # output lib path: build/lib
-        bash tools/build-standalone-lib.sh
+        bash tools/bazel-build-standalone-lib.sh
-The above command will generate dynamic library ``builds/lib/${ABI}/${DEVICES}/libmace.so`` and static library ``builds/lib/${ABI}/${DEVICES}/libmace.a``.
+The above command will generate dynamic library ``build/lib/${ABI}/${DEVICES}/libmace.so`` and static library ``build/lib/${ABI}/${DEVICES}/libmace.a``.
    .. warning::
@@ -278,7 +278,7 @@ header files.
 .. code::
-    builds
+    build
    ├── include
    │   └── mace
    │       └── public

--- a/docs/user_guide/benchmark.rst
+++ b/docs/user_guide/benchmark.rst
@@ -20,7 +20,7 @@ Usage
    .. code:: bash
-        python tools/bazel_adb_run.py --target="//mace/ops:ops_benchmark" --run_target=True  --args="--filter=.*BM_CONV.*"
+        python tools/bazel_adb_run.py --target="//test/ccbenchmark:mace_cc_benchmark" --run_target=True  --args="--filter=.*BM_CONV.*"
 ======
 Output

--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
+file(GLOB MACE_EXAMPLE_SRCS
+  cli/example.cc
+)
+add_executable(mace_example ${MACE_EXAMPLE_SRCS})
+target_link_libraries(mace_example PUBLIC
+  mace_static
+  gflags
+)
+install(TARGETS mace_example RUNTIME DESTINATION bin)
--- a/mace/examples/android/.gitignore
+++ b/mace/examples/android/.gitignore
--- a/mace/examples/android/README.md
+++ b/mace/examples/android/README.md
--- a/mace/examples/android/app/.gitignore
+++ b/mace/examples/android/app/.gitignore
--- a/mace/examples/android/app/build.gradle
+++ b/mace/examples/android/app/build.gradle
--- a/mace/examples/android/app/mace_demo.jks
+++ b/mace/examples/android/app/mace_demo.jks
--- a/mace/examples/android/app/src/main/AndroidManifest.xml
+++ b/mace/examples/android/app/src/main/AndroidManifest.xml
--- a/mace/examples/android/app/src/main/assets/cacheLabel.txt
+++ b/mace/examples/android/app/src/main/assets/cacheLabel.txt
--- a/mace/examples/android/app/src/main/java/com/xiaomi/mace/demo/AppModel.java
+++ b/mace/examples/android/app/src/main/java/com/xiaomi/mace/demo/AppModel.java
--- a/mace/examples/android/app/src/main/java/com/xiaomi/mace/demo/CameraActivity.java
+++ b/mace/examples/android/app/src/main/java/com/xiaomi/mace/demo/CameraActivity.java
--- a/mace/examples/android/app/src/main/java/com/xiaomi/mace/demo/CameraFactory.java
+++ b/mace/examples/android/app/src/main/java/com/xiaomi/mace/demo/CameraFactory.java
--- a/mace/examples/android/app/src/main/java/com/xiaomi/mace/demo/Constant.java
+++ b/mace/examples/android/app/src/main/java/com/xiaomi/mace/demo/Constant.java
--- a/mace/examples/android/app/src/main/java/com/xiaomi/mace/demo/MaceApp.java
+++ b/mace/examples/android/app/src/main/java/com/xiaomi/mace/demo/MaceApp.java
--- a/mace/examples/android/app/src/main/java/com/xiaomi/mace/demo/camera/CameraApiLessM.java
+++ b/mace/examples/android/app/src/main/java/com/xiaomi/mace/demo/camera/CameraApiLessM.java
--- a/mace/examples/android/app/src/main/java/com/xiaomi/mace/demo/camera/CameraApiMoreM.java
+++ b/mace/examples/android/app/src/main/java/com/xiaomi/mace/demo/camera/CameraApiMoreM.java
--- a/mace/examples/android/app/src/main/java/com/xiaomi/mace/demo/camera/CameraEngage.java
+++ b/mace/examples/android/app/src/main/java/com/xiaomi/mace/demo/camera/CameraEngage.java
--- a/mace/examples/android/app/src/main/java/com/xiaomi/mace/demo/camera/CameraTextureView.java
+++ b/mace/examples/android/app/src/main/java/com/xiaomi/mace/demo/camera/CameraTextureView.java
--- a/mace/examples/android/app/src/main/java/com/xiaomi/mace/demo/camera/ContextMenuDialog.java
+++ b/mace/examples/android/app/src/main/java/com/xiaomi/mace/demo/camera/ContextMenuDialog.java
--- a/mace/examples/android/app/src/main/java/com/xiaomi/mace/demo/camera/ItemAdapter.java
+++ b/mace/examples/android/app/src/main/java/com/xiaomi/mace/demo/camera/ItemAdapter.java
--- a/mace/examples/android/app/src/main/java/com/xiaomi/mace/demo/camera/MessageEvent.java
+++ b/mace/examples/android/app/src/main/java/com/xiaomi/mace/demo/camera/MessageEvent.java
--- a/mace/examples/android/app/src/main/java/com/xiaomi/mace/demo/camera/MobilenetResultCallback.java
+++ b/mace/examples/android/app/src/main/java/com/xiaomi/mace/demo/camera/MobilenetResultCallback.java
--- a/mace/examples/android/app/src/main/java/com/xiaomi/mace/demo/result/InitData.java
+++ b/mace/examples/android/app/src/main/java/com/xiaomi/mace/demo/result/InitData.java
--- a/mace/examples/android/app/src/main/java/com/xiaomi/mace/demo/result/LabelCache.java
+++ b/mace/examples/android/app/src/main/java/com/xiaomi/mace/demo/result/LabelCache.java
--- a/mace/examples/android/app/src/main/java/com/xiaomi/mace/demo/result/ResultData.java
+++ b/mace/examples/android/app/src/main/java/com/xiaomi/mace/demo/result/ResultData.java
--- a/mace/examples/android/app/src/main/res/drawable-v24/ic_launcher_foreground.xml
+++ b/mace/examples/android/app/src/main/res/drawable-v24/ic_launcher_foreground.xml
--- a/mace/examples/android/app/src/main/res/drawable/ic_launcher_background.xml
+++ b/mace/examples/android/app/src/main/res/drawable/ic_launcher_background.xml
--- a/mace/examples/android/app/src/main/res/layout/activity_camera.xml
+++ b/mace/examples/android/app/src/main/res/layout/activity_camera.xml
--- a/mace/examples/android/app/src/main/res/layout/layout_dialog.xml
+++ b/mace/examples/android/app/src/main/res/layout/layout_dialog.xml
--- a/mace/examples/android/app/src/main/res/layout/layout_item.xml
+++ b/mace/examples/android/app/src/main/res/layout/layout_item.xml
--- a/mace/examples/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
+++ b/mace/examples/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
--- a/mace/examples/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
+++ b/mace/examples/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
--- a/mace/examples/android/app/src/main/res/mipmap-hdpi/ic_launcher.png
+++ b/mace/examples/android/app/src/main/res/mipmap-hdpi/ic_launcher.png
--- a/mace/examples/android/app/src/main/res/mipmap-hdpi/ic_launcher_round.png
+++ b/mace/examples/android/app/src/main/res/mipmap-hdpi/ic_launcher_round.png
--- a/mace/examples/android/app/src/main/res/mipmap-mdpi/ic_launcher.png
+++ b/mace/examples/android/app/src/main/res/mipmap-mdpi/ic_launcher.png
--- a/mace/examples/android/app/src/main/res/mipmap-mdpi/ic_launcher_round.png
+++ b/mace/examples/android/app/src/main/res/mipmap-mdpi/ic_launcher_round.png
--- a/mace/examples/android/app/src/main/res/mipmap-xhdpi/ic_launcher.png
+++ b/mace/examples/android/app/src/main/res/mipmap-xhdpi/ic_launcher.png
--- a/mace/examples/android/app/src/main/res/mipmap-xhdpi/ic_launcher_round.png
+++ b/mace/examples/android/app/src/main/res/mipmap-xhdpi/ic_launcher_round.png
--- a/mace/examples/android/app/src/main/res/mipmap-xxhdpi/ic_launcher.png
+++ b/mace/examples/android/app/src/main/res/mipmap-xxhdpi/ic_launcher.png
--- a/mace/examples/android/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.png
+++ b/mace/examples/android/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.png
--- a/mace/examples/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png
+++ b/mace/examples/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png
--- a/mace/examples/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png
+++ b/mace/examples/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png
--- a/mace/examples/android/app/src/main/res/values/colors.xml
+++ b/mace/examples/android/app/src/main/res/values/colors.xml
--- a/mace/examples/android/app/src/main/res/values/strings.xml
+++ b/mace/examples/android/app/src/main/res/values/strings.xml
--- a/mace/examples/android/app/src/main/res/values/styles.xml
+++ b/mace/examples/android/app/src/main/res/values/styles.xml
--- a/mace/examples/android/build.gradle
+++ b/mace/examples/android/build.gradle
--- a/mace/examples/android/build.sh
+++ b/mace/examples/android/build.sh
@@ -20,12 +20,12 @@ fi
 MACE_LINK_TYPE=$1
-pushd ../../../
+pushd ../..
 TARGET_ABI=arm64-v8a
-ANDROID_DEMO_DIR=mace/examples/android/
+ANDROID_DEMO_DIR=examples/android/
 LIBRARY_DIR=$ANDROID_DEMO_DIR/macelibrary/src/main/cpp/
-INCLUDE_DIR=$LIBRARY_DIR/include/mace/public/
+INCLUDE_DIR=$LIBRARY_DIR/include
 LIBMACE_DIR=$LIBRARY_DIR/lib/$TARGET_ABI/
 LIBGNUSTL_SHARED_SO=libgnustl_shared.so
 LIBCPP_SHARED_SO=libc++_shared.so
@@ -44,20 +44,17 @@ else
  exit 1
 fi
-rm -rf $LIBRARY_DIR/include/
+python tools/converter.py convert --config=examples/android/mobilenet.yml --target_abis=$TARGET_ABI
-mkdir -p $INCLUDE_DIR
-rm -rf $LIBRARY_DIR/lib/
-mkdir -p $LIBMACE_DIR
+rm -rf $INCLUDE_DIR && mkdir -p $INCLUDE_DIR
+rm -rf $LIBMACE_DIR && mkdir -p $LIBMACE_DIR
 rm -rf $LIBRARY_DIR/model/
-python tools/converter.py convert --config=mace/examples/android/mobilenet.yml --target_abis=$TARGET_ABI
+cp -rf include/mace $INCLUDE_DIR
-cp -rf builds/mobilenet/include/mace/public/*.h $INCLUDE_DIR
+cp -rf build/mobilenet/include/mace/public/*.h $INCLUDE_DIR/mace/public/
-cp -rf builds/mobilenet/model $LIBRARY_DIR
+cp -rf build/mobilenet/model $LIBRARY_DIR
 bazel build --config android --config optimization $BAZEL_LIBMACE_TARGET --define neon=true --define openmp=true --define opencl=true --define quantize=true --cpu=$TARGET_ABI
-cp -rf mace/public/*.h $INCLUDE_DIR
 cp -rf $BAZEL_GEN_LIBMACE_PATH $LIBMACE_DIR
 if [ $MACE_LINK_TYPE == "dynamic" ]; then

--- a/mace/examples/android/gradle.properties
+++ b/mace/examples/android/gradle.properties
--- a/mace/examples/android/gradle/wrapper/gradle-wrapper.jar
+++ b/mace/examples/android/gradle/wrapper/gradle-wrapper.jar
--- a/mace/examples/android/gradle/wrapper/gradle-wrapper.properties
+++ b/mace/examples/android/gradle/wrapper/gradle-wrapper.properties
--- a/mace/examples/android/gradlew
+++ b/mace/examples/android/gradlew
--- a/mace/examples/android/gradlew.bat
+++ b/mace/examples/android/gradlew.bat
--- a/mace/examples/android/macelibrary/.gitignore
+++ b/mace/examples/android/macelibrary/.gitignore
--- a/mace/examples/android/macelibrary/CMakeLists.txt
+++ b/mace/examples/android/macelibrary/CMakeLists.txt
@@ -13,6 +13,7 @@ cmake_minimum_required(VERSION 3.4.1)
 #set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/../app/libs/${ANDROID_ABI})
 include_directories(${CMAKE_SOURCE_DIR}/)
+include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp)
 include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp/include)
 file(GLOB LIBMACE "${CMAKE_SOURCE_DIR}/src/main/cpp/lib/arm64-v8a/*")
 set(mace_lib ${LIBMACE})

--- a/mace/examples/android/macelibrary/build.gradle
+++ b/mace/examples/android/macelibrary/build.gradle
--- a/mace/examples/android/macelibrary/proguard-rules.pro
+++ b/mace/examples/android/macelibrary/proguard-rules.pro
--- a/mace/examples/android/macelibrary/src/main/AndroidManifest.xml
+++ b/mace/examples/android/macelibrary/src/main/AndroidManifest.xml
--- a/mace/examples/android/macelibrary/src/main/cpp/image_classify.cc
+++ b/mace/examples/android/macelibrary/src/main/cpp/image_classify.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "src/main/cpp/image_classify.h"
+#include "image_classify.h"
 #include <android/log.h>
 #include <jni.h>
@@ -25,9 +25,8 @@
 #include <vector>
 #include <numeric>
-#include "src/main/cpp/include/mace/public/mace.h"
-#include "src/main/cpp/include/mace/public/mace_engine_factory.h"
 #include "mace/public/mace.h"
+#include "mace/public/mace_engine_factory.h"
 namespace {

--- a/mace/examples/android/macelibrary/src/main/cpp/image_classify.h
+++ b/mace/examples/android/macelibrary/src/main/cpp/image_classify.h
--- a/mace/examples/android/macelibrary/src/main/java/com/xiaomi/mace/JniMaceUtils.java
+++ b/mace/examples/android/macelibrary/src/main/java/com/xiaomi/mace/JniMaceUtils.java
--- a/mace/examples/android/macelibrary/src/main/res/values/strings.xml
+++ b/mace/examples/android/macelibrary/src/main/res/values/strings.xml
--- a/mace/examples/android/macelibrary/version_script.lds
+++ b/mace/examples/android/macelibrary/version_script.lds
--- a/mace/examples/android/mobilenet.yml
+++ b/mace/examples/android/mobilenet.yml
--- a/mace/examples/android/settings.gradle
+++ b/mace/examples/android/settings.gradle
--- a/mace/examples/cli/BUILD.bazel
+++ b/mace/examples/cli/BUILD.bazel
@@ -35,8 +35,8 @@ cc_binary(
    deps = [
        "//external:gflags_nothreads",
        "//mace/codegen:generated_mace_engine_factory",
-        "//mace/codegen:generated_libmace",
+        "//mace/codegen:generated_models",
-        "//mace/utils:utils_hdrs",
+        "//mace/libmace",
    ] + if_opencl_enabled([
        "//mace/codegen:generated_opencl_binary",
        "//mace/codegen:generated_opencl_parameter",
@@ -70,9 +70,9 @@ cc_binary(
    linkstatic = 0,
    deps = [
        "//external:gflags_nothreads",
-        "//mace/codegen:generated_libmace",
        "//mace/codegen:generated_mace_engine_factory",
-        "//mace/utils:utils_hdrs",
+        "//mace/codegen:generated_models",
+        "//mace/libmace:libmace_dynamic",
    ] + if_opencl_enabled([
        "//mace/codegen:generated_opencl_binary",
        "//mace/codegen:generated_opencl_parameter",

--- a/mace/examples/cli/README.md
+++ b/mace/examples/cli/README.md
--- a/mace/examples/cli/example.cc
+++ b/mace/examples/cli/example.cc
@@ -102,17 +102,20 @@ DEFINE_string(model_name,
              "",
              "model name in model deployment file");
 DEFINE_string(input_node,
-              "input_node0,input_node1",
+              "",
-              "input nodes, separated by comma");
+              "input nodes, separated by comma,"
+              "example: input_node0,input_node1");
 DEFINE_string(input_shape,
-              "1,224,224,3:1,1,1,10",
+              "",
-              "input shapes, separated by colon and comma");
+              "input shapes, separated by colon and comma, "
+              "example: 1,224,224,3:1,1,1,10");
 DEFINE_string(output_node,
              "output_node0,output_node1",
              "output nodes, separated by comma");
 DEFINE_string(output_shape,
-              "1,224,224,2:1,1,1,10",
+              "",
-              "output shapes, separated by colon and comma");
+              "output shapes, separated by colon and comma, "
+              "example: 1,224,224,2:1,1,1,10");
 DEFINE_string(input_data_format,
              "NHWC",
              "input data formats, NONE|NHWC|NCHW");

--- a/mace/public/BUILD.bazel
+++ b/mace/public/BUILD.bazel
-# Description:
-# MACE public API.
-#
 package(
    default_visibility = ["//visibility:public"],
 )
@@ -8,12 +5,12 @@ package(
 licenses(["notice"])  # Apache 2.0
 cc_library(
-    name = "public",
+    name = "public_headers",
-    hdrs = [
+    hdrs = glob([
-        "mace.h",
+            "mace/public/*.h",
-    ],
+            "mace/port/*.h",
-    srcs = [
+            "mace/utils/*.h",
-        "status.cc",
+            ]),
-    ],
+    strip_include_prefix = "",
    copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"],
 )
--- a/include/CMakeLists.txt
+++ b/include/CMakeLists.txt
+install(DIRECTORY mace DESTINATION include)
--- a/mace/port/env.h
+++ b/mace/port/env.h
@@ -16,11 +16,18 @@
 #define MACE_PORT_ENV_H_
 #include <cstdint>
+#include <cstdlib>
 #include <memory>
 #include <sstream>
 #include <string>
 #include <vector>
+#ifdef _WIN32
+#include <malloc.h>
+#endif
+#include <sys/stat.h>
 #include "mace/public/mace.h"
 namespace mace {
@@ -38,6 +45,7 @@ class LogWriter;
 class Env {
 public:
  virtual int64_t NowMicros() = 0;
+  virtual MaceStatus AdviseFree(void *addr, size_t length);
  virtual MaceStatus GetCPUMaxFreq(std::vector<float> *max_freqs);
  virtual MaceStatus SchedSetAffinity(const std::vector<size_t> &cpu_ids);
  virtual FileSystem *GetFileSystem() = 0;
@@ -58,6 +66,10 @@ inline int64_t NowMicros() {
  return port::Env::Default()->NowMicros();
 }
+inline MaceStatus AdviseFree(void *addr, size_t length) {
+  return port::Env::Default()->AdviseFree(addr, length);
+}
 inline MaceStatus GetCPUMaxFreq(std::vector<float> *max_freqs) {
  return port::Env::Default()->GetCPUMaxFreq(max_freqs);
 }
@@ -70,6 +82,63 @@ inline port::FileSystem *GetFileSystem() {
  return port::Env::Default()->GetFileSystem();
 }
+inline MaceStatus Memalign(void **memptr, size_t alignment, size_t size) {
+#ifdef _WIN32
+  *memptr = _aligned_malloc(size, alignment);
+  if (*memptr == nullptr) {
+    return MaceStatus::MACE_OUT_OF_RESOURCES;
+  } else {
+    return MaceStatus::MACE_SUCCESS;
+  }
+#else
+#if defined(__ANDROID__) || defined(__hexagon__)
+  *memptr = memalign(alignment, size);
+  if (*memptr == nullptr) {
+    return MaceStatus::MACE_OUT_OF_RESOURCES;
+  } else {
+    return MaceStatus::MACE_SUCCESS;
+  }
+#else
+  int error = posix_memalign(memptr, alignment, size);
+  if (error != 0) {
+    if (*memptr != nullptr) {
+      free(*memptr);
+      *memptr = nullptr;
+    }
+    return MaceStatus::MACE_OUT_OF_RESOURCES;
+  } else {
+    return MaceStatus::MACE_SUCCESS;
+  }
+#endif
+#endif
+}
+inline MaceStatus GetEnv(const char *name, std::string *value) {
+#ifdef _WIN32
+  char *val;
+  size_t len;
+  errno_t error = _dupenv_s(&val, &len, name);
+  if (error != 0) {
+    return MaceStatus::MACE_RUNTIME_ERROR;
+  } else {
+    if (val != nullptr) {
+      *value = std::string(val);
+      free(val);
+    }
+    return MaceStatus::MACE_SUCCESS;
+  }
+#else
+  char *val = getenv(name);
+  if (val != nullptr) {
+    *value = std::string(val);
+  }
+  return MaceStatus::MACE_SUCCESS;
+#endif
+}
+#if defined(_WIN32) && !defined(S_ISREG)
+#define S_ISREG(m) (((m) & 0170000) == (0100000))
+#endif
 }  // namespace mace
 #endif  // MACE_PORT_ENV_H_
--- a/mace/port/file_system.h
+++ b/mace/port/file_system.h
@@ -19,6 +19,7 @@
 #include <memory>
 #include "mace/public/mace.h"
+#include "mace/utils/macros.h"
 namespace mace {
 namespace port {
@@ -29,6 +30,8 @@ class ReadOnlyMemoryRegion {
  virtual ~ReadOnlyMemoryRegion() = default;
  virtual const void *data() const = 0;
  virtual uint64_t length() const = 0;
+ private:
+  MACE_DISABLE_COPY_AND_ASSIGN(ReadOnlyMemoryRegion);
 };
 class ReadOnlyBufferMemoryRegion : public ReadOnlyMemoryRegion {
@@ -44,12 +47,25 @@ class ReadOnlyBufferMemoryRegion : public ReadOnlyMemoryRegion {
  uint64_t length_;
 };
+class WritableFile {
+ public:
+  WritableFile() {}
+  virtual ~WritableFile();
+  virtual MaceStatus Append(const char *data, size_t length) = 0;
+  virtual MaceStatus Close() = 0;
+  virtual MaceStatus Flush() = 0;
+ private:
+  MACE_DISABLE_COPY_AND_ASSIGN(WritableFile);
+};
 class FileSystem {
 public:
  FileSystem() = default;
  virtual ~FileSystem() = default;
  virtual MaceStatus NewReadOnlyMemoryRegionFromFile(const char *fname,
      std::unique_ptr<ReadOnlyMemoryRegion>* result) = 0;
+  virtual MaceStatus NewWritableFile(const char *fname,
+      std::unique_ptr<WritableFile>* result);
 };
 }  // namespace port

--- a/mace/port/logger.h
+++ b/mace/port/logger.h
--- a/mace/port/port-arch.h
+++ b/mace/port/port-arch.h
--- a/mace/port/port.h
+++ b/mace/port/port.h
--- a/mace/public/mace.h
+++ b/mace/public/mace.h
@@ -25,8 +25,20 @@
 #include <vector>
 #ifndef MACE_API
+#ifdef _MSC_VER
+#define MACE_API
+#else
 #define MACE_API __attribute__((visibility("default")))
 #endif
+#endif
+#ifndef MACE_DEPRECATED
+#ifdef _MSC_VER
+#define MACE_DEPRECATED
+#else
+#define MACE_DEPRECATED __attribute__((deprecated))
+#endif
+#endif
 namespace mace {
@@ -435,7 +447,8 @@ MACE_API MaceStatus CreateMaceEngineFromProto(
    const std::vector<std::string> &input_nodes,
    const std::vector<std::string> &output_nodes,
    const MaceEngineConfig &config,
-    std::shared_ptr<MaceEngine> *engine) __attribute__((deprecated));
+    std::shared_ptr<MaceEngine> *engine) MACE_DEPRECATED;
 }  // namespace mace

--- a/mace/utils/logging.h
+++ b/mace/utils/logging.h
@@ -75,6 +75,15 @@ T &&CheckNotNull(const char *file, int line, const char *exprtext, T &&t) {
 #define MACE_NOT_IMPLEMENTED MACE_CHECK(false, "not implemented")
+#define MACE_CHECK_SUCCESS(stmt)                             \
+  {                                                          \
+    MaceStatus status = (stmt);                              \
+    if (status != MaceStatus::MACE_SUCCESS) {                \
+      LOG(FATAL) << #stmt << " failed with error: "          \
+              << status.information();                       \
+    }                                                        \
+  }
 #define MACE_RETURN_IF_ERROR(stmt)                           \
  {                                                          \
    MaceStatus status = (stmt);                              \

--- a/mace/utils/macros.h
+++ b/mace/utils/macros.h
--- a/mace/utils/memory.h
+++ b/mace/utils/memory.h
--- a/mace/utils/string_util.h
+++ b/mace/utils/string_util.h
--- a/mace/CMakeLists.txt
+++ b/mace/CMakeLists.txt
+add_subdirectory(codegen)
+add_subdirectory(core)
+add_subdirectory(libmace)
+add_subdirectory(ops)
+add_subdirectory(port)
+add_subdirectory(proto)
+add_subdirectory(tools)
+add_subdirectory(utils)
--- a/mace/codegen/BUILD.bazel
+++ b/mace/codegen/BUILD.bazel
@@ -8,26 +8,19 @@ package(
 load("//mace:mace.bzl", "mace_version_genrule", "encrypt_opencl_kernel_genrule")
 cc_library(
-    name = "libmodels",
+    name = "generated_models",
    srcs = glob(["models/*/*.cc"]),
    hdrs = glob(["models/*/*.h"]),
-    copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"],
+    copts = [
+        "-Werror",
+        "-Wextra",
+        "-Wno-missing-field-initializers",
+    ],
    deps = [
        "//mace/core",
    ],
 )
-genrule(
-    name = "generated_models",
-    srcs = [
-        ":libmodels",
-    ],
-    outs = ["libgenerated_models.a"],
-    cmd = "binary=`echo $(locations :libmodels) | xargs -n 1 | grep libmodels.a`;" +
-          "$(STRIP) -x $$binary -o $@",
-    visibility = ["//visibility:public"],
-)
 mace_version_genrule()
 encrypt_opencl_kernel_genrule()
@@ -35,39 +28,52 @@ encrypt_opencl_kernel_genrule()
 cc_library(
    name = "generated_opencl",
    srcs = ["opencl/encrypt_opencl_kernel.cc"],
-    copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"],
+    copts = [
+        "-Werror",
+        "-Wextra",
+        "-Wno-missing-field-initializers",
+    ],
 )
 cc_library(
    name = "generated_opencl_binary",
    srcs = ["opencl/opencl_binary.cc"],
-    copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"],
+    copts = [
+        "-Werror",
+        "-Wextra",
+        "-Wno-missing-field-initializers",
+    ],
 )
 cc_library(
    name = "generated_opencl_parameter",
    srcs = ["opencl/opencl_parameter.cc"],
-    copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"],
+    copts = [
+        "-Werror",
+        "-Wextra",
+        "-Wno-missing-field-initializers",
+    ],
 )
 cc_library(
    name = "generated_version",
    srcs = ["version/version.cc"],
-    copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"],
+    copts = [
+        "-Werror",
+        "-Wextra",
+        "-Wno-missing-field-initializers",
+    ],
 )
 cc_library(
    name = "generated_mace_engine_factory",
    hdrs = glob(["engine/*.h"]),
-    copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"],
+    copts = [
+        "-Werror",
+        "-Wextra",
+        "-Wno-missing-field-initializers",
+    ],
    deps = [
-        "//mace/public",
+        "//include:public_headers",
    ],
 )
-cc_library(
-  name = "generated_libmace",
-  srcs = glob(["lib/*"]),
-  linkstatic = 1,
-  visibility = ["//visibility:public"],
-)
--- a/mace/codegen/CMakeLists.txt
+++ b/mace/codegen/CMakeLists.txt
+# Generate MACE version source
+set(MACE_VERSION_GEN_TOOL "${CMAKE_CURRENT_SOURCE_DIR}/tools/gen_version_source.sh")
+set(MACE_VERSION_SRC "${CMAKE_CURRENT_BINARY_DIR}/version/version.cc")
+add_custom_command(
+  OUTPUT ${MACE_VERSION_SRC}
+  COMMAND ${MACE_VERSION_GEN_TOOL} ${MACE_VERSION_SRC}
+  DEPENDS ${MACE_VERSION_GEN_TOOL}
+  COMMENT "Generating version source file" )
+add_custom_target(version_src DEPENDS ${MACE_VERSION_SRC}
+  COMMENT "Checking if re-generation is required" )
+add_library(generated_version ${MACE_VERSION_SRC})
+add_dependencies(generated_version version_src)
+# Generate OpenCL kernel source
+set(MACE_OPENCL_KERNELS_GEN_TOOL "${PROJECT_SOURCE_DIR}/mace/python/tools/encrypt_opencl_codegen.py")
+set(MACE_OPENCL_KERNELS_DIR "${PROJECT_SOURCE_DIR}/mace/ops/opencl/cl")
+set(MACE_OPENCL_KERNELS_SRC "${CMAKE_CURRENT_BINARY_DIR}/opencl/encrypt_opencl_kernel.cc")
+add_custom_command(
+  OUTPUT ${MACE_OPENCL_KERNELS_SRC}
+  COMMAND python ${MACE_OPENCL_KERNELS_GEN_TOOL} --output_path=${MACE_OPENCL_KERNELS_SRC} --cl_kernel_dir=${MACE_OPENCL_KERNELS_DIR}
+  DEPENDS ${MACE_OPENCL_KERNELS_GEN_TOOL}
+  COMMENT "Generating OpenCL kernel source file" )
+add_custom_target(opencl_kernel_src DEPENDS ${MACE_OPENCL_KERNELS_SRC}
+  COMMENT "Checking if re-generation is required" )
+add_library(generated_opencl_kernel ${MACE_OPENCL_KERNELS_SRC})
+add_dependencies(generated_opencl_kernel opencl_kernel_src)
+install(TARGETS generated_version ARCHIVE DESTINATION lib)
+install(TARGETS generated_opencl_kernel ARCHIVE DESTINATION lib)
--- a/mace/tools/git/gen_version_source.sh
+++ b/mace/tools/git/gen_version_source.sh
--- a/mace/core/BUILD.bazel
+++ b/mace/core/BUILD.bazel
--- a/mace/core/CMakeLists.txt
+++ b/mace/core/CMakeLists.txt
--- a/mace/core/allocator.cc
+++ b/mace/core/allocator.cc
--- a/mace/core/allocator.h
+++ b/mace/core/allocator.h
--- a/mace/core/device_context.cc
+++ b/mace/core/device_context.cc
--- a/mace/core/kv_storage.cc
+++ b/mace/core/kv_storage.cc
--- a/mace/core/runtime/hexagon/hexagon_hta_wrapper.cc
+++ b/mace/core/runtime/hexagon/hexagon_hta_wrapper.cc
--- a/mace/core/runtime/hexagon/hexagon_hta_wrapper.h
+++ b/mace/core/runtime/hexagon/hexagon_hta_wrapper.h
--- a/mace/core/runtime/opencl/cl2_header.h
+++ b/mace/core/runtime/opencl/cl2_header.h
--- a/mace/core/runtime/opencl/opencl_allocator.cc
+++ b/mace/core/runtime/opencl/opencl_allocator.cc
--- a/mace/core/runtime/opencl/opencl_runtime.cc
+++ b/mace/core/runtime/opencl/opencl_runtime.cc
--- a/mace/libmace/BUILD.bazel
+++ b/mace/libmace/BUILD.bazel
--- a/mace/libmace/CMakeLists.txt
+++ b/mace/libmace/CMakeLists.txt
--- a/mace/libmace/mace.cc
+++ b/mace/libmace/mace.cc
--- a/mace/ops/BUILD.bazel
+++ b/mace/ops/BUILD.bazel
--- a/mace/ops/CMakeLists.txt
+++ b/mace/ops/CMakeLists.txt
--- a/mace/ops/arm/fp32/common_neon.h
+++ b/mace/ops/arm/fp32/common_neon.h
--- a/mace/ops/arm/fp32/gemm.cc
+++ b/mace/ops/arm/fp32/gemm.cc
--- a/mace/ops/quantization_util.cc
+++ b/mace/ops/quantization_util.cc
--- a/mace/ops/quantization_util.h
+++ b/mace/ops/quantization_util.h
--- a/mace/ops/quantize.cc
+++ b/mace/ops/quantize.cc
--- a/mace/ops/conv_2d.cc
+++ b/mace/ops/conv_2d.cc
--- a/mace/ops/depthwise_conv2d.cc
+++ b/mace/ops/depthwise_conv2d.cc
--- a/mace/ops/buffer_transform.cc
+++ b/mace/ops/buffer_transform.cc
--- a/mace/ops/lstm_cell.cc
+++ b/mace/ops/lstm_cell.cc
--- a/mace/ops/ops_registry.cc
+++ b/mace/ops/ops_registry.cc
--- a/mace/ops/ops_registry.h
+++ b/mace/ops/ops_registry.h
--- a/mace/port/BUILD.bazel
+++ b/mace/port/BUILD.bazel
--- a/mace/port/CMakeLists.txt
+++ b/mace/port/CMakeLists.txt
--- a/mace/port/android/CMakeLists.txt
+++ b/mace/port/android/CMakeLists.txt
--- a/mace/port/darwin/CMakeLists.txt
+++ b/mace/port/darwin/CMakeLists.txt
--- a/mace/port/env.cc
+++ b/mace/port/env.cc
--- a/mace/port/file_system.cc
+++ b/mace/port/file_system.cc
--- a/mace/port/linux/CMakeLists.txt
+++ b/mace/port/linux/CMakeLists.txt
--- a/mace/port/linux_base/CMakeLists.txt
+++ b/mace/port/linux_base/CMakeLists.txt
--- a/mace/port/linux_base/env.cc
+++ b/mace/port/linux_base/env.cc
--- a/mace/port/linux_base/env.h
+++ b/mace/port/linux_base/env.h
--- a/mace/port/posix/CMakeLists.txt
+++ b/mace/port/posix/CMakeLists.txt
--- a/mace/port/posix/file_system.cc
+++ b/mace/port/posix/file_system.cc
--- a/mace/port/windows/CMakeLists.txt
+++ b/mace/port/windows/CMakeLists.txt
--- a/mace/port/windows/env.cc
+++ b/mace/port/windows/env.cc
--- a/mace/port/windows/env.h
+++ b/mace/port/windows/env.h
--- a/mace/port/windows/file_system.cc
+++ b/mace/port/windows/file_system.cc
--- a/mace/port/windows/file_system.h
+++ b/mace/port/windows/file_system.h
--- a/mace/proto/CMakeLists.txt
+++ b/mace/proto/CMakeLists.txt
--- a/mace/proto/mace.proto
+++ b/mace/proto/mace.proto
--- a/mace/python/tools/visualization/visualize_model.py
+++ b/mace/python/tools/visualization/visualize_model.py
--- a/mace/tools/CMakeLists.txt
+++ b/mace/tools/CMakeLists.txt
--- a/mace/benchmark/BUILD.bazel
+++ b/mace/benchmark/BUILD.bazel
--- a/mace/benchmark/benchmark_model.cc
+++ b/mace/benchmark/benchmark_model.cc
--- a/mace/benchmark/model_throughput_test.cc
+++ b/mace/benchmark/model_throughput_test.cc
--- a/mace/tools/validation/BUILD.bazel
+++ b/mace/tools/validation/BUILD.bazel
--- a/mace/utils/BUILD.bazel
+++ b/mace/utils/BUILD.bazel
--- a/mace/utils/CMakeLists.txt
+++ b/mace/utils/CMakeLists.txt
--- a/mace/benchmark/statistics.cc
+++ b/mace/benchmark/statistics.cc
--- a/mace/benchmark/statistics.h
+++ b/mace/benchmark/statistics.h
--- a/mace/public/status.cc
+++ b/mace/public/status.cc
--- a/mace/utils/thread_pool.cc
+++ b/mace/utils/thread_pool.cc
--- a/mace/utils/tuner.h
+++ b/mace/utils/tuner.h
--- a/repository/git/git_configure.bzl
+++ b/repository/git/git_configure.bzl
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
--- a/test/ccbenchmark/BUILD.bazel
+++ b/test/ccbenchmark/BUILD.bazel
--- a/test/ccbenchmark/CMakeLists.txt
+++ b/test/ccbenchmark/CMakeLists.txt
--- a/mace/core/testing/test_benchmark.cc
+++ b/mace/core/testing/test_benchmark.cc
--- a/mace/core/testing/test_benchmark.h
+++ b/mace/core/testing/test_benchmark.h
--- a/mace/core/testing/test_benchmark_main.cc
+++ b/mace/core/testing/test_benchmark_main.cc
--- a/mace/ops/activation_benchmark.cc
+++ b/mace/ops/activation_benchmark.cc
--- a/mace/ops/addn_benchmark.cc
+++ b/mace/ops/addn_benchmark.cc
--- a/mace/ops/batch_norm_benchmark.cc
+++ b/mace/ops/batch_norm_benchmark.cc
--- a/mace/ops/batch_to_space_benchmark.cc
+++ b/mace/ops/batch_to_space_benchmark.cc
--- a/mace/ops/bias_add_benchmark.cc
+++ b/mace/ops/bias_add_benchmark.cc
--- a/mace/ops/buffer_to_image_benchmark.cc
+++ b/mace/ops/buffer_to_image_benchmark.cc
--- a/mace/ops/channel_shuffle_benchmark.cc
+++ b/mace/ops/channel_shuffle_benchmark.cc
--- a/mace/ops/concat_benchmark.cc
+++ b/mace/ops/concat_benchmark.cc
--- a/mace/ops/conv_2d_benchmark.cc
+++ b/mace/ops/conv_2d_benchmark.cc
--- a/mace/ops/crop_benchmark.cc
+++ b/mace/ops/crop_benchmark.cc
--- a/mace/ops/cumsum_benchmark.cc
+++ b/mace/ops/cumsum_benchmark.cc
--- a/mace/ops/deconv_2d_benchmark.cc
+++ b/mace/ops/deconv_2d_benchmark.cc
--- a/mace/ops/delay_benchmark.cc
+++ b/mace/ops/delay_benchmark.cc
--- a/mace/ops/depth_to_space_benchmark.cc
+++ b/mace/ops/depth_to_space_benchmark.cc
--- a/mace/ops/depthwise_conv2d_benchmark.cc
+++ b/mace/ops/depthwise_conv2d_benchmark.cc
--- a/mace/ops/depthwise_deconv2d_benchmark.cc
+++ b/mace/ops/depthwise_deconv2d_benchmark.cc
--- a/mace/ops/dynamic_lstm_benchmark.cc
+++ b/mace/ops/dynamic_lstm_benchmark.cc
--- a/mace/ops/eltwise_benchmark.cc
+++ b/mace/ops/eltwise_benchmark.cc
--- a/mace/ops/extract_pooling_benchmark.cc
+++ b/mace/ops/extract_pooling_benchmark.cc
--- a/mace/ops/fully_connected_benchmark.cc
+++ b/mace/ops/fully_connected_benchmark.cc
--- a/mace/ops/gather_benchmark.cc
+++ b/mace/ops/gather_benchmark.cc
--- a/mace/ops/kaldi_batch_norm_benchmark.cc
+++ b/mace/ops/kaldi_batch_norm_benchmark.cc
--- a/mace/ops/local_response_norm_benchmark.cc
+++ b/mace/ops/local_response_norm_benchmark.cc
--- a/mace/ops/lstm_nonlinear_benchmark.cc
+++ b/mace/ops/lstm_nonlinear_benchmark.cc
--- a/mace/ops/lstmcell_benchmark.cc
+++ b/mace/ops/lstmcell_benchmark.cc
--- a/mace/ops/matmul_benchmark.cc
+++ b/mace/ops/matmul_benchmark.cc
--- a/mace/ops/memory_benchmark.cc
+++ b/mace/ops/memory_benchmark.cc
--- a/mace/ops/one_hot_benchmark.cc
+++ b/mace/ops/one_hot_benchmark.cc
--- a/mace/ops/pad_benchmark.cc
+++ b/mace/ops/pad_benchmark.cc
--- a/mace/ops/pad_context_benchmark.cc
+++ b/mace/ops/pad_context_benchmark.cc
--- a/mace/ops/pnorm_benchmark.cc
+++ b/mace/ops/pnorm_benchmark.cc
--- a/mace/ops/pooling_benchmark.cc
+++ b/mace/ops/pooling_benchmark.cc
--- a/mace/ops/prior_box_benchmark.cc
+++ b/mace/ops/prior_box_benchmark.cc
--- a/mace/ops/quantize_benchmark.cc
+++ b/mace/ops/quantize_benchmark.cc
--- a/mace/ops/reduce_benchmark.cc
+++ b/mace/ops/reduce_benchmark.cc
--- a/mace/ops/resize_bicubic_benchmark.cc
+++ b/mace/ops/resize_bicubic_benchmark.cc
--- a/mace/ops/resize_bilinear_benchmark.cc
+++ b/mace/ops/resize_bilinear_benchmark.cc
--- a/mace/ops/resize_nearest_neighbor_benchmark.cc
+++ b/mace/ops/resize_nearest_neighbor_benchmark.cc
--- a/mace/ops/reverse_benchmark.cc
+++ b/mace/ops/reverse_benchmark.cc
--- a/mace/ops/slice_benchmark.cc
+++ b/mace/ops/slice_benchmark.cc
--- a/mace/ops/softmax_benchmark.cc
+++ b/mace/ops/softmax_benchmark.cc
--- a/mace/ops/space_to_batch_benchmark.cc
+++ b/mace/ops/space_to_batch_benchmark.cc
--- a/mace/ops/space_to_depth_benchmark.cc
+++ b/mace/ops/space_to_depth_benchmark.cc
--- a/mace/ops/splice_benchmark.cc
+++ b/mace/ops/splice_benchmark.cc
--- a/mace/ops/split_benchmark.cc
+++ b/mace/ops/split_benchmark.cc
--- a/mace/ops/sqrdiff_mean_benchmark.cc
+++ b/mace/ops/sqrdiff_mean_benchmark.cc
--- a/mace/ops/sum_group_benchmark.cc
+++ b/mace/ops/sum_group_benchmark.cc
--- a/mace/ops/target_rms_norm_benchmark.cc
+++ b/mace/ops/target_rms_norm_benchmark.cc
--- a/mace/ops/thread_pool_benchmark.cc
+++ b/mace/ops/thread_pool_benchmark.cc
--- a/mace/ops/transpose_benchmark.cc
+++ b/mace/ops/transpose_benchmark.cc
--- a/test/ccunit/BUILD.bazel
+++ b/test/ccunit/BUILD.bazel
--- a/test/ccunit/CMakeLists.txt
+++ b/test/ccunit/CMakeLists.txt
--- a/mace/test/BUILD.bazel
+++ b/mace/test/BUILD.bazel
--- a/mace/test/device_capability_api_test.cc
+++ b/mace/test/device_capability_api_test.cc
--- a/mace/test/mace_api_exception_test.cc
+++ b/mace/test/mace_api_exception_test.cc
--- a/mace/test/mace_api_mt_test.cc
+++ b/mace/test/mace_api_mt_test.cc
--- a/mace/test/mace_api_test.cc
+++ b/mace/test/mace_api_test.cc
--- a/mace/test/mace_api_test.h
+++ b/mace/test/mace_api_test.h
--- a/mace/ops/activation_test.cc
+++ b/mace/ops/activation_test.cc
--- a/mace/ops/addn_test.cc
+++ b/mace/ops/addn_test.cc
--- a/mace/ops/argmax_test.cc
+++ b/mace/ops/argmax_test.cc
--- a/mace/ops/arm/fp32/gemm_test.cc
+++ b/mace/ops/arm/fp32/gemm_test.cc
--- a/mace/ops/arm/fp32/gemv_test.cc
+++ b/mace/ops/arm/fp32/gemv_test.cc
--- a/mace/ops/arm/q8/gemv_test.cc
+++ b/mace/ops/arm/q8/gemv_test.cc
--- a/mace/ops/batch_norm_test.cc
+++ b/mace/ops/batch_norm_test.cc
--- a/mace/ops/bias_add_test.cc
+++ b/mace/ops/bias_add_test.cc
--- a/mace/ops/buffer_to_image_test.cc
+++ b/mace/ops/buffer_to_image_test.cc
--- a/mace/ops/buffer_transform_test.cc
+++ b/mace/ops/buffer_transform_test.cc
--- a/mace/ops/cast_test.cc
+++ b/mace/ops/cast_test.cc
--- a/mace/ops/channel_shuffle_test.cc
+++ b/mace/ops/channel_shuffle_test.cc
--- a/mace/ops/concat_test.cc
+++ b/mace/ops/concat_test.cc
--- a/mace/ops/conv_2d_test.cc
+++ b/mace/ops/conv_2d_test.cc
--- a/mace/ops/crop_test.cc
+++ b/mace/ops/crop_test.cc
--- a/mace/ops/cumsum_test.cc
+++ b/mace/ops/cumsum_test.cc
--- a/mace/ops/deconv_2d_test.cc
+++ b/mace/ops/deconv_2d_test.cc
--- a/mace/ops/depth_to_space_test.cc
+++ b/mace/ops/depth_to_space_test.cc
--- a/mace/ops/depthwise_conv2d_test.cc
+++ b/mace/ops/depthwise_conv2d_test.cc
--- a/mace/ops/depthwise_deconv2d_test.cc
+++ b/mace/ops/depthwise_deconv2d_test.cc
--- a/mace/ops/eltwise_test.cc
+++ b/mace/ops/eltwise_test.cc
--- a/mace/ops/expand_dims_test.cc
+++ b/mace/ops/expand_dims_test.cc
--- a/mace/ops/extract_pooling_test.cc
+++ b/mace/ops/extract_pooling_test.cc
--- a/mace/ops/fill_test.cc
+++ b/mace/ops/fill_test.cc
--- a/mace/ops/fixpoint_test.cc
+++ b/mace/ops/fixpoint_test.cc
--- a/mace/ops/folded_batch_norm_test.cc
+++ b/mace/ops/folded_batch_norm_test.cc
--- a/mace/ops/fully_connected_test.cc
+++ b/mace/ops/fully_connected_test.cc
--- a/mace/ops/gather_test.cc
+++ b/mace/ops/gather_test.cc
--- a/mace/ops/identity_test.cc
+++ b/mace/ops/identity_test.cc
--- a/mace/ops/infer_conv2d_shape_test.cc
+++ b/mace/ops/infer_conv2d_shape_test.cc
--- a/mace/ops/kaldi_batch_norm_test.cc
+++ b/mace/ops/kaldi_batch_norm_test.cc
--- a/mace/ops/local_response_norm_test.cc
+++ b/mace/ops/local_response_norm_test.cc
--- a/mace/ops/lstmcell_test.cc
+++ b/mace/ops/lstmcell_test.cc
--- a/mace/ops/matmul_test.cc
+++ b/mace/ops/matmul_test.cc
--- a/mace/ops/one_hot_test.cc
+++ b/mace/ops/one_hot_test.cc
--- a/mace/ops/opencl/out_of_range_check_test.cc
+++ b/mace/ops/opencl/out_of_range_check_test.cc
--- a/mace/ops/pad_context_test.cc
+++ b/mace/ops/pad_context_test.cc
--- a/mace/ops/pad_test.cc
+++ b/mace/ops/pad_test.cc
--- a/mace/ops/pnorm_test.cc
+++ b/mace/ops/pnorm_test.cc
--- a/mace/ops/pooling_test.cc
+++ b/mace/ops/pooling_test.cc
--- a/mace/ops/prior_box_test.cc
+++ b/mace/ops/prior_box_test.cc
--- a/mace/ops/quantize_test.cc
+++ b/mace/ops/quantize_test.cc
--- a/mace/ops/reduce_test.cc
+++ b/mace/ops/reduce_test.cc
--- a/mace/ops/reshape_test.cc
+++ b/mace/ops/reshape_test.cc
--- a/mace/ops/resize_bicubic_test.cc
+++ b/mace/ops/resize_bicubic_test.cc
--- a/mace/ops/resize_bilinear_test.cc
+++ b/mace/ops/resize_bilinear_test.cc
--- a/mace/ops/resize_nearest_neighbor_test.cc
+++ b/mace/ops/resize_nearest_neighbor_test.cc
--- a/mace/ops/reverse_test.cc
+++ b/mace/ops/reverse_test.cc
--- a/mace/ops/scalar_math_test.cc
+++ b/mace/ops/scalar_math_test.cc
--- a/mace/ops/shape_test.cc
+++ b/mace/ops/shape_test.cc
--- a/mace/ops/slice_test.cc
+++ b/mace/ops/slice_test.cc
--- a/mace/ops/softmax_test.cc
+++ b/mace/ops/softmax_test.cc
--- a/mace/ops/space_to_batch_test.cc
+++ b/mace/ops/space_to_batch_test.cc
--- a/mace/ops/space_to_depth_test.cc
+++ b/mace/ops/space_to_depth_test.cc
--- a/mace/ops/splice_test.cc
+++ b/mace/ops/splice_test.cc
--- a/mace/ops/split_test.cc
+++ b/mace/ops/split_test.cc
--- a/mace/ops/sqrdiff_mean_test.cc
+++ b/mace/ops/sqrdiff_mean_test.cc
--- a/mace/ops/squeeze_test.cc
+++ b/mace/ops/squeeze_test.cc
--- a/mace/ops/stack_test.cc
+++ b/mace/ops/stack_test.cc
--- a/mace/ops/strided_slice_test.cc
+++ b/mace/ops/strided_slice_test.cc
--- a/mace/ops/sum_group_test.cc
+++ b/mace/ops/sum_group_test.cc
--- a/mace/ops/target_rms_norm_test.cc
+++ b/mace/ops/target_rms_norm_test.cc
--- a/mace/ops/transpose_test.cc
+++ b/mace/ops/transpose_test.cc
--- a/mace/ops/unstack_test.cc
+++ b/mace/ops/unstack_test.cc
--- a/mace/port/env_test.cc
+++ b/mace/port/env_test.cc
--- a/mace/port/logger_test.cc
+++ b/mace/port/logger_test.cc
--- a/mace/utils/count_down_latch_test.cc
+++ b/mace/utils/count_down_latch_test.cc
--- a/mace/utils/logging_test.cc
+++ b/mace/utils/logging_test.cc
--- a/mace/utils/spinlock_test.cc
+++ b/mace/utils/spinlock_test.cc
--- a/mace/utils/string_util_test.cc
+++ b/mace/utils/string_util_test.cc
--- a/mace/utils/thread_pool_test.cc
+++ b/mace/utils/thread_pool_test.cc
--- a/mace/utils/tuner_test.cc
+++ b/mace/utils/tuner_test.cc
--- a/test/ccutils/BUILD.bazel
+++ b/test/ccutils/BUILD.bazel
--- a/test/ccutils/CMakeLists.txt
+++ b/test/ccutils/CMakeLists.txt
--- a/mace/ops/lstmcell_test_util.h
+++ b/mace/ops/lstmcell_test_util.h
--- a/mace/ops/ops_test_util.cc
+++ b/mace/ops/ops_test_util.cc
--- a/mace/ops/ops_test_util.h
+++ b/mace/ops/ops_test_util.h
--- a/mace/ops/testing/test_utils.h
+++ b/mace/ops/testing/test_utils.h
--- a/third_party/eigen3/eigen3.cmake
+++ b/third_party/eigen3/eigen3.cmake
--- a/third_party/gemmlowp/gemmlowp.cmake
+++ b/third_party/gemmlowp/gemmlowp.cmake
--- a/third_party/gflags/gflags.cmake
+++ b/third_party/gflags/gflags.cmake
--- a/third_party/googletest/googletest.cmake
+++ b/third_party/googletest/googletest.cmake
--- a/third_party/half/half.cmake
+++ b/third_party/half/half.cmake
--- a/third_party/hta/hta.cmake
+++ b/third_party/hta/hta.cmake
--- a/third_party/nnlib/nnlib.cmake
+++ b/third_party/nnlib/nnlib.cmake
--- a/third_party/opencl-clhpp/opencl-clhpp.BUILD
+++ b/third_party/opencl-clhpp/opencl-clhpp.BUILD
--- a/third_party/opencl-clhpp/opencl-clhpp.cmake
+++ b/third_party/opencl-clhpp/opencl-clhpp.cmake
--- a/third_party/opencl-headers/opencl-headers.cmake
+++ b/third_party/opencl-headers/opencl-headers.cmake
--- a/third_party/protobuf/LICENSE
+++ b/third_party/protobuf/LICENSE
--- a/third_party/protobuf/protobuf.cmake
+++ b/third_party/protobuf/protobuf.cmake
--- a/third_party/tflite/tflite.cmake
+++ b/third_party/tflite/tflite.cmake
--- a/third_party/third_party.cmake
+++ b/third_party/third_party.cmake
--- a/tools/build-standalone-lib.sh
+++ b/tools/build-standalone-lib.sh
--- a/tools/bazel.rc
+++ b/tools/bazel.rc
--- a/tools/bazel_adb_run.py
+++ b/tools/bazel_adb_run.py
--- a/tools/cmake-build-aarch64-linux-gnu-full.sh
+++ b/tools/cmake-build-aarch64-linux-gnu-full.sh
--- a/tools/cmake-build-android-arm64-v8a-cpu.sh
+++ b/tools/cmake-build-android-arm64-v8a-cpu.sh
--- a/tools/cmake-build-android-arm64-v8a-full.sh
+++ b/tools/cmake-build-android-arm64-v8a-full.sh
--- a/tools/cmake-build-android-armeabi-v7a-cpu.sh
+++ b/tools/cmake-build-android-armeabi-v7a-cpu.sh
--- a/tools/cmake-build-android-armeabi-v7a-full.sh
+++ b/tools/cmake-build-android-armeabi-v7a-full.sh
--- a/tools/cmake-build-arm-linux-gnueabihf-full.sh
+++ b/tools/cmake-build-arm-linux-gnueabihf-full.sh
--- a/tools/cmake-build-host.sh
+++ b/tools/cmake-build-host.sh
--- a/tools/cmake-build-standalone-lib.sh
+++ b/tools/cmake-build-standalone-lib.sh
--- a/tools/common.py
+++ b/tools/common.py
--- a/tools/converter.py
+++ b/tools/converter.py
--- a/tools/cpplint.sh
+++ b/tools/cpplint.sh
--- a/tools/falcon_cli.py
+++ b/tools/falcon_cli.py
--- a/tools/google-format.sh
+++ b/tools/google-format.sh
--- a/tools/sh_commands.py
+++ b/tools/sh_commands.py