From 51b14100dc00002c7eb5386a0589f36d87929c66 Mon Sep 17 00:00:00 2001 From: liuqi Date: Fri, 30 Nov 2018 14:13:41 +0800 Subject: [PATCH] feature: support arm linux device 1. Abstact android and arm linux to one format 2. Support cross compilation for ARM linux 3. Related issue #36 --- .gitlab-ci.yml | 50 +- docs/installation/env_requirement.rst | 2 +- docs/user_guide/advanced_usage.rst | 65 +- docs/user_guide/basic_usage.rst | 8 +- mace/BUILD | 18 + mace/examples/cli/BUILD | 9 +- mace/mace.bzl | 13 +- mace/ops/depthwise_conv2d_test.cc | 2 +- mace/ops/local_response_norm.cc | 1 + mace/ops/resize_bicubic.cc | 1 + mace/ops/scalar_math.cc | 1 + mace/ops/softmax.cc | 3 +- mace/ops/strided_slice.cc | 1 + mace/ops/transpose.cc | 3 +- .../converter_tool/tensorflow_converter.py | 2 + mace/python/tools/memory_optimizer.py | 350 ++++++ mace/python/tools/model_saver.py | 5 +- mace/utils/quantize.h | 3 +- tools/bazel.rc | 43 +- tools/bazel_adb_run.py | 40 +- tools/build-standalone-lib.sh | 24 + tools/common.py | 339 ++++++ tools/converter.py | 784 +++---------- tools/device.py | 1004 +++++++++++++++++ tools/image/image_to_tensor.py | 5 +- tools/image/tensor_to_image.py | 5 +- tools/sh_commands.py | 262 +---- 27 files changed, 2080 insertions(+), 963 deletions(-) create mode 100644 mace/python/tools/memory_optimizer.py create mode 100644 tools/device.py diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 7ab11760..5e3a22c5 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -47,8 +47,13 @@ ops_test: stage: ops_test script: - if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi - - python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS - - python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS --enable_neon=false + - > + if ping -c 1 v9.git.n.xiaomi.com 1>/dev/null 2>&1; then + GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git + DEVICE_CONF_FILE=generic-mobile-devices/devices.yml + fi + - python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64,armhf --target_socs=$TARGET_SOCS + - python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64,armhf --target_socs=$TARGET_SOCS --enable_neon=false api_test: stage: api_test @@ -68,14 +73,19 @@ extra_tests: stage: extra_tests script: - if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi - - python tools/bazel_adb_run.py --target="//mace/utils:tuner_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS + - > + if ping -c 1 v9.git.n.xiaomi.com 1>/dev/null 2>&1; then + GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git + DEVICE_CONF_FILE=generic-mobile-devices/devices.yml + fi + - python tools/bazel_adb_run.py --target="//mace/utils:tuner_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64 --target_socs=$TARGET_SOCS platform_compatible_tests: stage: platform_compatible_tests script: - bazel build mace/core:core --define openmp=true - - bazel build --config arm_linux --define openmp=true --define opencl=true --define neon=true //mace/libmace:libmace.so - - bazel build --config aarch64_linux --define openmp=true --define opencl=true --define neon=true //mace/libmace:libmace.so + - bazel build --config arm_linux_gnueabihf --define openmp=true --define opencl=true --define neon=true //mace/libmace:libmace.so + - bazel build --config aarch64_linux_gnu --define openmp=true --define opencl=true --define neon=true //mace/libmace:libmace.so build_libraries: stage: build_libraries @@ -87,6 +97,11 @@ ndk_versions_compatible_tests: script: - DEFAULT_NDK_PATH=$ANDROID_NDK_HOME - prefix_path=${DEFAULT_NDK_PATH%android-ndk-*} + - > + if ping -c 1 v9.git.n.xiaomi.com 1>/dev/null 2>&1; then + GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git + DEVICE_CONF_FILE=generic-mobile-devices/devices.yml + fi - > for ndk in android-ndk-r12b android-ndk-r15c android-ndk-r16 android-ndk-r17b; do @@ -96,8 +111,8 @@ ndk_versions_compatible_tests: export PATH=$ANDROID_NDK_HOME:$PATH; echo "ndk path: $ANDROID_NDK_HOME"; if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi - python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS --args="--gtest_filter=ActivationOpTest*"; - python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS --enable_neon=false --args="--gtest_filter=ActivationOpTest*"; + python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64 --target_socs=$TARGET_SOCS --args="--gtest_filter=ActivationOpTest*"; + python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64 --target_socs=$TARGET_SOCS --enable_neon=false --args="--gtest_filter=ActivationOpTest*"; fi done - export ANDROID_NDK_HOME=$DEFAULT_NDK_PATH @@ -111,16 +126,27 @@ python_tools_tests: - GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@github.com:XiaoMi/mace-models.git - CONF_FILE=mace-models/mobilenet-v2/mobilenet-v2.yml - > - python tools/converter.py convert --config=${CONF_FILE} --target_abis=armeabi-v7a --model_graph_format=file --model_data_format=file || exit 1; - python tools/converter.py run --config=${CONF_FILE} --round=1 --target_abis=armeabi-v7a --validate --model_graph_format=file --model_data_format=file || exit 1; - python tools/converter.py run --config=${CONF_FILE} --example --target_abis=armeabi-v7a --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1; + if ping -c 1 v9.git.n.xiaomi.com 1>/dev/null 2>&1; then + GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git + DEVICE_CONF_FILE=generic-mobile-devices/devices.yml + fi + - > + python tools/converter.py convert --config=${CONF_FILE} --target_abis=armeabi-v7a,arm64 --model_graph_format=file --model_data_format=file || exit 1; + python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --round=1 --target_abis=armeabi-v7a,arm64 --validate --model_graph_format=file --model_data_format=file || exit 1; + python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --example --target_abis=armeabi-v7a,arm64 --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1; model_tests: stage: model_tests script: - pwd - rm -rf mace-models + - rm -rf generic-mobile-devices - GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@github.com:XiaoMi/mace-models.git + - > + if ping -c 1 v9.git.n.xiaomi.com 1>/dev/null 2>&1; then + GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git + DEVICE_CONF_FILE=generic-mobile-devices/devices.yml + fi - > for CONF_FILE in mace-models/mobilenet-v1/mobilenet-v1.yml mace-models/mobilenet-v1/mobilenet-v1-quantize-retrain.yml; do @@ -131,8 +157,8 @@ model_tests: - CONF_FILE=mace-models/mobilenet-v2/mobilenet-v2-host.yml - > python tools/converter.py convert --config=${CONF_FILE} --model_graph_format=file --model_data_format=file || exit 1; - python tools/converter.py run --config=${CONF_FILE} --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1; - python tools/converter.py run --config=${CONF_FILE} --example --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1; + python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1; + python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --example --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1; - rm -rf mace-models build_android_demo: diff --git a/docs/installation/env_requirement.rst b/docs/installation/env_requirement.rst index 17b564ea..12af0e61 100644 --- a/docs/installation/env_requirement.rst +++ b/docs/installation/env_requirement.rst @@ -35,7 +35,7 @@ Required dependencies - Required by model validation * - six - pip install -I six==1.11.0 - - Required for Python 2 and 3 compatibility (TODO) + - Required for Python 2 and 3 compatibility Optional dependencies --------------------- diff --git a/docs/user_guide/advanced_usage.rst b/docs/user_guide/advanced_usage.rst index 1c32b799..44d8b788 100644 --- a/docs/user_guide/advanced_usage.rst +++ b/docs/user_guide/advanced_usage.rst @@ -109,13 +109,75 @@ in one deployment file. sha256sum /path/to/your/file + Advanced usage -------------- -There are two common advanced use cases: +There are three common advanced use cases: + - run your model on the embedded device - converting model to C++ code. - tuning GPU kernels for a specific SoC. +Run you model on the embedded device +------------------ + +MACE use ssh to connect embedded device, in this case we recommend you to push ``$HOME/.ssh/id_rsa.pub`` +to your device ``$HOME/.ssh/authorized_keys`` + +.. code:: bash + + cat ~/.ssh/id_rsa.pub | ssh -q {user}@{ip} "cat >> ~/.ssh/authorized_keys" + +This part will show you how to write your own device yaml config file. + +**Device yaml config file** + +The way to run your model on the embedded device is nearly the same as run on android, except you need give a device yaml config file. + +MACE get this yaml config via ``--device_yml`` argument, default config value is ``devices.yml`` +, when the yaml config file is not found. we treat as there is no available arm linux device, give a message +and continue on other device such as plugged android phone. + +* **Example** + + Here is an device yaml config demo. + + .. literalinclude:: devices/demo_device_nanopi.yml + :language: yaml + +* **Configuration** + +.. list-table:: + :header-rows: 1 + + * - Options + - Usage + * - target_abis + - Device supported abis, you can get it via ``dpkg --print-architecture`` and + ``dpkg --print-foreign-architectures`` command, if more than one abi is supported, + separate them by commas. + * - target_socs + - device soc, you can get it from device manual, we haven't found a way to get it in shell. + * - models + - device models full name, you can get via get ``lshw`` command (third party package, install it via your package manager). + see it's product value. + * - address + - Since we use ssh to connect device, ip address is required. + * - username + - login username, required. + * - password + - login password, optional when you can login into device without password + + +.. note:: + + Some command tools: + + .. code:: bash + + # specify device yaml config file via --device_yml argument or put the file under working directory + python tools/converter.py run --config=/path/to/mace-models/mobilenet-v2/mobilenet-v2.yml --device_yml=/path/to/devices.yml + Convert model(s) to C++ code -------------------------------- @@ -403,6 +465,7 @@ Reduce Library Size - It is recommended to use ``version script`` and ``strip`` feature when linking mace static library. The effect is remarkable. * Remove the unused ops. + Remove the registration of the ops unused for your models in the ``mace/ops/ops_register.cc``, which will reduce the library size significantly. the final binary just link the registered ops' code. diff --git a/docs/user_guide/basic_usage.rst b/docs/user_guide/basic_usage.rst index 15a4d516..63b8968b 100644 --- a/docs/user_guide/basic_usage.rst +++ b/docs/user_guide/basic_usage.rst @@ -68,7 +68,8 @@ Here we use the mobilenet-v2 model as an example. .. note:: - If you want to run on device/phone, please plug in at least one device/phone. + If you want to run on phone, please plug in at least one phone. + Or if you want to run on embedded device, please give a :doc:`advanced_usage`. .. code:: sh @@ -245,7 +246,10 @@ to run and validate your model. # Test model run time python tools/converter.py run --config=/path/to/your/model_deployment_file.yml --round=100 - # Validate the correctness by comparing the results against the + # If you want to run model on specified arm linux device, you should put device config file in the working directory or run with flag `--device_yml` + python tools/converter.py run --config=/path/to/mace-models/mobilenet-v2/mobilenet-v2.yml --device_yml=/path/to/devices.yml --example + + # Validate the correctness by comparing the results against the # original model and framework, measured with cosine distance for similarity. python tools/converter.py run --config=/path/to/your/model_deployment_file.yml --validate diff --git a/mace/BUILD b/mace/BUILD index cf2e1e2d..4b7da51f 100644 --- a/mace/BUILD +++ b/mace/BUILD @@ -24,6 +24,24 @@ config_setting( visibility = ["//visibility:public"], ) +config_setting( + name = "arm_linux_aarch64", + values = { + "crosstool_top": "//tools/aarch64_compiler:toolchain", + "cpu": "aarch64", + }, + visibility = ["//visibility:public"], +) + +config_setting( + name = "arm_linux_armhf", + values = { + "crosstool_top": "//tools/arm_compiler:toolchain", + "cpu": "armeabi-v7a", + }, + visibility = ["//visibility:public"], +) + config_setting( name = "neon_enabled", define_values = { diff --git a/mace/examples/cli/BUILD b/mace/examples/cli/BUILD index be0de253..b2c2291c 100644 --- a/mace/examples/cli/BUILD +++ b/mace/examples/cli/BUILD @@ -1,10 +1,10 @@ # Examples load( "//mace:mace.bzl", - "if_openmp_enabled", "if_android", "if_hexagon_enabled", "if_opencl_enabled", + "if_openmp_enabled", ) cc_binary( @@ -18,8 +18,9 @@ cc_binary( ]), linkopts = [ "-lm", + "-ldl", ] + if_openmp_enabled([ - "-fopenmp" + "-fopenmp", ]) + if_android([ "-ldl", "-pie", @@ -47,6 +48,7 @@ cc_binary( ]), linkopts = [ "-lm", + "-ldl", ] + if_android([ "-ldl", "-pie", @@ -55,8 +57,7 @@ cc_binary( linkstatic = 0, deps = [ "//external:gflags_nothreads", - "//mace/codegen:generated_mace_engine_factory", "//mace/codegen:generated_libmace", + "//mace/codegen:generated_mace_engine_factory", ], ) - diff --git a/mace/mace.bzl b/mace/mace.bzl index a7a6bc89..0215a086 100644 --- a/mace/mace.bzl +++ b/mace/mace.bzl @@ -24,6 +24,18 @@ def if_android_arm64(a): "//conditions:default": [], }) +def if_arm_linux_aarch64(a): + return select({ + "//mace:arm_linux_aarch64": a, + "//conditions:default": [], + }) + +def if_arm_linux_armhf(a): + return select({ + "//mace:arm_linux_armhf": a, + "//conditions:default": [] + }) + def if_neon_enabled(a): return select({ "//mace:neon_enabled": a, @@ -81,4 +93,3 @@ def encrypt_opencl_kernel_genrule(): outs = ["opencl/encrypt_opencl_kernel.cc"], cmd = "cat $(SRCS) > $@;" ) - diff --git a/mace/ops/depthwise_conv2d_test.cc b/mace/ops/depthwise_conv2d_test.cc index d757bf09..d9965658 100644 --- a/mace/ops/depthwise_conv2d_test.cc +++ b/mace/ops/depthwise_conv2d_test.cc @@ -233,7 +233,7 @@ void TestNxNS12(const index_t height, const index_t width) { auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w, Padding type) { // generate random input - static unsigned int seed = time(NULL); + // static unsigned int seed = time(NULL); index_t batch = 1; index_t channel = 32; index_t multiplier = 1; diff --git a/mace/ops/local_response_norm.cc b/mace/ops/local_response_norm.cc index fb0cda7c..ff5dd32b 100644 --- a/mace/ops/local_response_norm.cc +++ b/mace/ops/local_response_norm.cc @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include "mace/core/operator.h" diff --git a/mace/ops/resize_bicubic.cc b/mace/ops/resize_bicubic.cc index 3ccff3e6..6646afd0 100644 --- a/mace/ops/resize_bicubic.cc +++ b/mace/ops/resize_bicubic.cc @@ -15,6 +15,7 @@ #include "mace/ops/resize_bicubic.h" #include +#include #include #include diff --git a/mace/ops/scalar_math.cc b/mace/ops/scalar_math.cc index 297dcb33..a0d52192 100644 --- a/mace/ops/scalar_math.cc +++ b/mace/ops/scalar_math.cc @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include "mace/core/operator.h" diff --git a/mace/ops/softmax.cc b/mace/ops/softmax.cc index 2518b407..c4bef3d9 100644 --- a/mace/ops/softmax.cc +++ b/mace/ops/softmax.cc @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include #include @@ -106,7 +107,7 @@ class SoftmaxOp : public Operation { float sum = 0; for (index_t c = 0; c < class_count; ++c) { - float exp_value = ::exp(input_ptr[c] - max_val); + float exp_value = std::exp(input_ptr[c] - max_val); sum += exp_value; output_ptr[c] = exp_value; } diff --git a/mace/ops/strided_slice.cc b/mace/ops/strided_slice.cc index b3b53ec8..89860b79 100644 --- a/mace/ops/strided_slice.cc +++ b/mace/ops/strided_slice.cc @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include "mace/core/operator.h" diff --git a/mace/ops/transpose.cc b/mace/ops/transpose.cc index 7c25ea4f..7588b8c3 100644 --- a/mace/ops/transpose.cc +++ b/mace/ops/transpose.cc @@ -16,8 +16,9 @@ #include #endif -#include #include +#include +#include #include "mace/core/operator.h" #include "mace/ops/transpose.h" diff --git a/mace/python/tools/converter_tool/tensorflow_converter.py b/mace/python/tools/converter_tool/tensorflow_converter.py index 68e5ccb5..4b48ab9d 100644 --- a/mace/python/tools/converter_tool/tensorflow_converter.py +++ b/mace/python/tools/converter_tool/tensorflow_converter.py @@ -112,6 +112,8 @@ TFSupportedOps = [ TFOpType = Enum('TFOpType', [(op, op) for op in TFSupportedOps], type=str) +TFSupportedOps = [six.b(op) for op in TFSupportedOps] + class TensorflowConverter(base_converter.ConverterInterface): """A class for convert tensorflow frozen model to mace model. diff --git a/mace/python/tools/memory_optimizer.py b/mace/python/tools/memory_optimizer.py new file mode 100644 index 00000000..5b644779 --- /dev/null +++ b/mace/python/tools/memory_optimizer.py @@ -0,0 +1,350 @@ +# Copyright 2018 Xiaomi, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import operator + +import six +from six.moves import reduce + +from mace.proto import mace_pb2 + +from mace.python.tools.converter_tool import base_converter as cvt +from mace.python.tools.converter_tool.base_converter import DeviceType +from mace.python.tools.converter_tool.base_converter import ConverterUtil +from mace.python.tools.converter_tool.base_converter import MaceKeyword +from mace.python.tools.convert_util import calculate_image_shape +from mace.python.tools.convert_util import OpenCLBufferType + + +def MemoryTypeToStr(mem_type): + if mem_type == mace_pb2.CPU_BUFFER: + return 'CPU_BUFFER' + elif mem_type == mace_pb2.GPU_BUFFER: + return 'GPU_BUFFER' + elif mem_type == mace_pb2.GPU_IMAGE: + return 'GPU_IMAGE' + else: + return 'UNKNOWN' + + +class MemoryBlock(object): + def __init__(self, mem_type, block): + self._mem_type = mem_type + self._block = block + + @property + def mem_type(self): + return self._mem_type + + @property + def block(self): + return self._block + + +class MemoryOptimizer(object): + def __init__(self, net_def): + self.net_def = net_def + self.idle_mem = set() + self.op_mem = {} # op_name->mem_id + self.mem_block = {} # mem_id->[size] or mem_id->[x, y] + self.total_mem_count = 0 + self.input_ref_counter = {} + self.mem_ref_counter = {} + ocl_mem_type_arg = ConverterUtil.get_arg( + net_def, MaceKeyword.mace_opencl_mem_type) + self.cl_mem_type = ocl_mem_type_arg.i if ocl_mem_type_arg is not None \ + else None + + consumers = {} + for op in net_def.op: + if not self.op_need_optimize_memory(op): + continue + for ipt in op.input: + if ipt not in consumers: + consumers[ipt] = [] + consumers[ipt].append(op) + # only ref op's output tensor + for op in net_def.op: + if not self.op_need_optimize_memory(op): + continue + for output in op.output: + tensor_name = output + if tensor_name in consumers: + self.input_ref_counter[tensor_name] = \ + len(consumers[tensor_name]) + else: + self.input_ref_counter[tensor_name] = 0 + + def op_need_optimize_memory(self, op): + return True + + def get_op_mem_block(self, op_type, output_shape, output_type): + data_type_size = 4 + if output_type == mace_pb2.DT_UINT8: + data_type_size = 1 + return MemoryBlock(mace_pb2.CPU_BUFFER, + [reduce(operator.mul, output_shape, 1) * + data_type_size]) + + def mem_size(self, memory_block): + return memory_block.block[0] + + def sub_mem_block(self, mem_block1, mem_block2): + return self.mem_size(mem_block1) - self.mem_size(mem_block2) + + def resize_mem_block(self, old_mem_block, op_mem_block): + return MemoryBlock( + old_mem_block.mem_type, + [max(old_mem_block.block[0], op_mem_block.block[0])]) + + def add_net_mem_blocks(self): + for mem in self.mem_block: + arena = self.net_def.mem_arena + block = arena.mem_block.add() + block.mem_id = mem + block.device_type = DeviceType.CPU.value + block.mem_type = self.mem_block[mem].mem_type + block.x = self.mem_block[mem].block[0] + block.y = 1 + + def get_total_origin_mem_size(self): + origin_mem_size = 0 + for op in self.net_def.op: + if not self.op_need_optimize_memory(op): + continue + origin_mem_size += reduce(operator.mul, + op.output_shape[0].dims, + 1) + return origin_mem_size + + def get_total_optimized_mem_size(self): + optimized_mem_size = 0 + for mem in self.mem_block: + print(mem, MemoryTypeToStr(self.mem_block[mem].mem_type), + self.mem_block[mem].block) + optimized_mem_size += self.mem_size(self.mem_block[mem]) + return optimized_mem_size + + @staticmethod + def is_memory_reuse_op(op): + return op.type == 'Reshape' or op.type == 'Identity' \ + or op.type == 'Squeeze' or op.type == 'ExpandDims' + + def optimize(self): + for op in self.net_def.op: + if not self.op_need_optimize_memory(op): + continue + if not op.output_shape: + six.print_("WARNING: There is no output shape information to " + "do memory optimization. %s (%s)" % + (op.name, op.type), file=sys.stderr) + return + if len(op.output_shape) != len(op.output): + six.print_('WARNING: the number of output shape is ' + 'not equal to the number of output.', + file=sys.stderr) + return + for i in range(len(op.output)): + if self.is_memory_reuse_op(op): + # make these ops reuse memory of input tensor + mem_id = self.op_mem.get(op.input[0], -1) + else: + output_type = mace_pb2.DT_FLOAT + for arg in op.arg: + if arg.name == 'T': + output_type = arg.i + if len(op.output_type) > i: + output_type = op.output_type[i] + op_mem_block = self.get_op_mem_block( + op.type, + op.output_shape[i].dims, + output_type) + mem_id = -1 + if len(self.idle_mem) > 0: + best_mem_add_size = six.MAXSIZE + best_mem_waste_size = six.MAXSIZE + for mid in self.idle_mem: + old_mem_block = self.mem_block[mid] + if old_mem_block.mem_type != op_mem_block.mem_type: + continue + new_mem_block = self.resize_mem_block( + old_mem_block, op_mem_block) + add_mem_size = self.sub_mem_block(new_mem_block, + old_mem_block) + waste_mem_size = self.sub_mem_block(new_mem_block, + op_mem_block) + + # minimize add_mem_size; if best_mem_add_size is 0, + # then minimize waste_mem_size + if (best_mem_add_size > 0 and + add_mem_size < best_mem_add_size) \ + or (best_mem_add_size == 0 and + waste_mem_size < best_mem_waste_size): + best_mem_id = mid + best_mem_add_size = add_mem_size + best_mem_waste_size = waste_mem_size + best_mem_block = new_mem_block + + # if add mem size < op mem size, then reuse it + if best_mem_add_size <= self.mem_size(op_mem_block): + self.mem_block[best_mem_id] = best_mem_block + mem_id = best_mem_id + self.idle_mem.remove(mem_id) + + if mem_id == -1: + mem_id = self.total_mem_count + self.total_mem_count += 1 + self.mem_block[mem_id] = op_mem_block + + if mem_id != -1: + op.mem_id.extend([mem_id]) + self.op_mem[op.output[i]] = mem_id + if mem_id not in self.mem_ref_counter: + self.mem_ref_counter[mem_id] = 1 + else: + self.mem_ref_counter[mem_id] += 1 + + # de-ref input tensor mem + for idx in six.moves.range(len(op.input)): + ipt = op.input[idx] + if ipt in self.input_ref_counter: + self.input_ref_counter[ipt] -= 1 + if self.input_ref_counter[ipt] == 0 \ + and ipt in self.op_mem: + mem_id = self.op_mem[ipt] + self.mem_ref_counter[mem_id] -= 1 + if self.mem_ref_counter[mem_id] == 0: + self.idle_mem.add(self.op_mem[ipt]) + elif self.input_ref_counter[ipt] < 0: + raise Exception('ref count is less than 0') + + self.add_net_mem_blocks() + + print("total op: %d" % len(self.net_def.op)) + print("origin mem: %d, optimized mem: %d" % ( + self.get_total_origin_mem_size(), + self.get_total_optimized_mem_size())) + + +class GPUMemoryOptimizer(MemoryOptimizer): + def op_need_optimize_memory(self, op): + if op.type == MaceKeyword.mace_buffer_transform: + for arg in op.arg: + if arg.name == 'mode' and arg.i == 0: + return False + return op.type != MaceKeyword.mace_buffer_inverse_transform + + def get_op_image_mem_block(self, op_type, output_shape): + if op_type == 'WinogradTransform' or op_type == 'MatMul': + buffer_shape = list(output_shape) + [1] + mem_block = MemoryBlock( + mace_pb2.GPU_IMAGE, + calculate_image_shape(OpenCLBufferType.IN_OUT_HEIGHT, + buffer_shape)) + elif op_type in ['Shape', + 'InferConv2dShape', + 'StridedSlice', + 'Stack', + 'ScalarMath']: + if len(output_shape) == 1: + mem_block = MemoryBlock(mace_pb2.CPU_BUFFER, + [output_shape[0], 1]) + elif len(output_shape) == 0: + mem_block = MemoryBlock(mace_pb2.CPU_BUFFER, + [1, 1]) + else: + raise Exception('%s output shape dim size is not 0 or 1.' % + op_type) + else: + if len(output_shape) == 2: # only support fc/softmax + buffer_shape = [output_shape[0], output_shape[1]] + elif len(output_shape) == 4: + buffer_shape = output_shape + else: + raise Exception('%s output shape dim size is not 2 or 4.' % + op_type) + mem_block = MemoryBlock( + mace_pb2.GPU_IMAGE, + calculate_image_shape(OpenCLBufferType.IN_OUT_CHANNEL, + buffer_shape)) + return mem_block + + def get_op_buffer_mem_block(self, output_shape): + return MemoryBlock(mace_pb2.GPU_BUFFER, + [reduce(operator.mul, output_shape, 1), 1]) + + def get_op_mem_block(self, op_type, output_shape, output_type): + if self.cl_mem_type == mace_pb2.GPU_IMAGE: + return self.get_op_image_mem_block(op_type, output_shape) + else: + return self.get_op_buffer_mem_block(output_shape) + + def mem_size(self, memory_block): + if memory_block.mem_type == mace_pb2.GPU_IMAGE: + return memory_block.block[0] * memory_block.block[1] * 4 + else: + return memory_block.block[0] + + def resize_mem_block(self, old_mem_block, op_mem_block): + resize_mem_block = MemoryBlock( + old_mem_block.mem_type, + [ + max(old_mem_block.block[0], op_mem_block.block[0]), + max(old_mem_block.block[1], op_mem_block.block[1]) + ]) + + return resize_mem_block + + def add_net_mem_blocks(self): + max_image_size_x = 0 + max_image_size_y = 0 + for mem in self.mem_block: + arena = self.net_def.mem_arena + block = arena.mem_block.add() + block.mem_id = mem + block.device_type = DeviceType.GPU.value + block.mem_type = self.mem_block[mem].mem_type + block.x = self.mem_block[mem].block[0] + block.y = self.mem_block[mem].block[1] + if self.mem_block[mem].mem_type == mace_pb2.GPU_IMAGE: + max_image_size_x = max(max_image_size_x, block.x) + max_image_size_y = max(max_image_size_y, block.y) + + if self.cl_mem_type == mace_pb2.GPU_IMAGE: + # Update OpenCL max image size + net_ocl_max_img_size_arg = None + for arg in self.net_def.arg: + if arg.name == cvt.MaceKeyword.mace_opencl_max_image_size: + net_ocl_max_img_size_arg = arg + max_image_size_x = max(arg.ints[0], max_image_size_x) + max_image_size_y = max(arg.ints[1], max_image_size_y) + break + if net_ocl_max_img_size_arg is None: + net_ocl_max_img_size_arg = self.net_def.arg.add() + net_ocl_max_img_size_arg.name = \ + cvt.MaceKeyword.mace_opencl_max_image_size + + net_ocl_max_img_size_arg.ints[:] = [max_image_size_x, + max_image_size_y] + + +def optimize_gpu_memory(net_def): + mem_optimizer = GPUMemoryOptimizer(net_def) + mem_optimizer.optimize() + + +def optimize_cpu_memory(net_def): + mem_optimizer = MemoryOptimizer(net_def) + mem_optimizer.optimize() diff --git a/mace/python/tools/model_saver.py b/mace/python/tools/model_saver.py index 95c79657..217b25b6 100644 --- a/mace/python/tools/model_saver.py +++ b/mace/python/tools/model_saver.py @@ -14,6 +14,7 @@ import datetime import os +import six import uuid import numpy as np import hashlib @@ -34,8 +35,8 @@ class ModelFormat(object): def generate_obfuscated_name(namespace, name): md5 = hashlib.md5() - md5.update(namespace) - md5.update(name) + md5.update(six.b(namespace)) + md5.update(six.b(name)) md5_digest = md5.hexdigest() name = md5_digest[:8] diff --git a/mace/utils/quantize.h b/mace/utils/quantize.h index 0755e708..baf07708 100644 --- a/mace/utils/quantize.h +++ b/mace/utils/quantize.h @@ -15,8 +15,9 @@ #ifndef MACE_UTILS_QUANTIZE_H_ #define MACE_UTILS_QUANTIZE_H_ -#include #include +#include +#include namespace mace { diff --git a/tools/bazel.rc b/tools/bazel.rc index 1863738e..15273b31 100644 --- a/tools/bazel.rc +++ b/tools/bazel.rc @@ -21,30 +21,29 @@ build:android --config=cross_compile build:android --crosstool_top=//external:android/crosstool build:android --host_crosstool_top=@bazel_tools//tools/cpp:toolchain -# Usage example: bazel build --config arm_linux -build:arm_linux --config=cross_compile -build:arm_linux --crosstool_top=//tools/arm_compiler:toolchain -build:arm_linux --host_crosstool_top=@bazel_tools//tools/cpp:toolchain -build:arm_linux --cpu=armeabi-v7a -build:arm_linux --copt -mfloat-abi=hard -build:arm_linux --copt -mfpu=neon -build:arm_linux --copt -Wno-ignored-attributes -build:arm_linux --copt -Wno-unused-function -build:arm_linux --copt -Wno-sequence-point -build:arm_linux --copt -Wno-implicit-fallthrough -build:arm_linux --copt -Wno-psabi +# Usage example: bazel build --config arm_linux_gnueabihf +build:arm_linux_gnueabihf --config=cross_compile +build:arm_linux_gnueabihf --crosstool_top=//tools/arm_compiler:toolchain +build:arm_linux_gnueabihf --host_crosstool_top=@bazel_tools//tools/cpp:toolchain +build:arm_linux_gnueabihf --cpu=armeabi-v7a +build:arm_linux_gnueabihf --copt -mfloat-abi=hard +build:arm_linux_gnueabihf --copt -mfpu=neon +build:arm_linux_gnueabihf --copt -Wno-ignored-attributes +build:arm_linux_gnueabihf --copt -Wno-unused-function +build:arm_linux_gnueabihf --copt -Wno-sequence-point +build:arm_linux_gnueabihf --copt -Wno-implicit-fallthrough -# Usage example: bazel build --config aarch64_linux -build:aarch64_linux --config=cross_compile -build:aarch64_linux --crosstool_top=//tools/aarch64_compiler:toolchain -build:aarch64_linux --host_crosstool_top=@bazel_tools//tools/cpp:toolchain -build:aarch64_linux --cpu=aarch64 -build:aarch64_linux --copt -Wno-ignored-attributes -build:aarch64_linux --copt -Wno-unused-function -build:aarch64_linux --copt -Wno-sequence-point -build:aarch64_linux --copt -Wno-implicit-fallthrough +# Usage example: bazel build --config aarch64_linux_gnu +build:aarch64_linux_gnu --config=cross_compile +build:aarch64_linux_gnu --crosstool_top=//tools/aarch64_compiler:toolchain +build:aarch64_linux_gnu --host_crosstool_top=@bazel_tools//tools/cpp:toolchain +build:aarch64_linux_gnu --cpu=aarch64 +build:aarch64_linux_gnu --copt -Wno-ignored-attributes +build:aarch64_linux_gnu --copt -Wno-unused-function +build:aarch64_linux_gnu --copt -Wno-sequence-point +build:aarch64_linux_gnu --copt -Wno-implicit-fallthrough -# Usage example: bazel build --config optimization +# Usage example: bazel build --config optimization build:optimization -c opt build:optimization --copt=-O3 build:optimization --linkopt=-Wl,--strip-all diff --git a/tools/bazel_adb_run.py b/tools/bazel_adb_run.py index 12cdd20f..6906015c 100644 --- a/tools/bazel_adb_run.py +++ b/tools/bazel_adb_run.py @@ -26,9 +26,9 @@ import sys import sh_commands +from common import * -def stdout_processor(stdout, device_properties, abi): - pass +from device import DeviceWrapper, DeviceManager def unittest_stdout_processor(stdout, device_properties, abi): @@ -39,7 +39,7 @@ def unittest_stdout_processor(stdout, device_properties, abi): raise Exception("Command failed") -def ops_benchmark_stdout_processor(stdout, device_properties, abi): +def ops_benchmark_stdout_processor(stdout, dev, abi): stdout_lines = stdout.split("\n") metrics = {} for line in stdout_lines: @@ -52,8 +52,8 @@ def ops_benchmark_stdout_processor(stdout, device_properties, abi): metrics["%s.input_mb_per_sec" % parts[0]] = parts[3] metrics["%s.gmacc_per_sec" % parts[0]] = parts[4] - platform = device_properties["ro.board.platform"].replace(" ", "-") - model = device_properties["ro.product.model"].replace(" ", "-") + platform = dev[YAMLKeyword.target_socs] + model = dev[YAMLKeyword.models] tags = { "ro.board.platform": platform, "ro.product.model": model, @@ -87,7 +87,7 @@ def parse_args(): type=str, default="all", help="SoCs (ro.board.platform from getprop) to build, " - "comma seperated list or all/random") + "comma seperated list or all/random") parser.add_argument( "--target", type=str, default="//...", help="Bazel target to build") parser.add_argument( @@ -115,14 +115,22 @@ def parse_args(): type=str2bool, default=False, help="Whether to use simpleperf stat") + parser.add_argument( + '--device_yml', + type=str, + default='', + help='embedded linux device config yml file' + ) return parser.parse_known_args() def main(unused_args): target_socs = None + target_devices = DeviceManager.list_devices(FLAGS.device_yml) if FLAGS.target_socs != "all" and FLAGS.target_socs != "random": target_socs = set(FLAGS.target_socs.split(',')) - target_devices = sh_commands.get_target_socs_serialnos(target_socs) + target_devices = [dev for dev in target_devices + if dev[YAMLKeyword.target_socs] in target_socs] if FLAGS.target_socs == "random": unlocked_devices = \ [d for d in target_devices if not sh_commands.is_device_locked(d)] @@ -136,31 +144,29 @@ def main(unused_args): target_abis = FLAGS.target_abis.split(',') for target_abi in target_abis: + toolchain = infer_toolchain(target_abi) sh_commands.bazel_build(target, abi=target_abi, + toolchain=toolchain, enable_neon=FLAGS.enable_neon, address_sanitizer=FLAGS.address_sanitizer) if FLAGS.run_target: - for serialno in target_devices: - if target_abi not in set( - sh_commands.adb_supported_abis(serialno)): + for dev in target_devices: + if target_abi not in dev[YAMLKeyword.target_abis]: print("Skip device %s which does not support ABI %s" % - (serialno, target_abi)) + (dev, target_abi)) continue - stdouts = sh_commands.adb_run( + device_wrapper = DeviceWrapper(dev) + stdouts = device_wrapper.run( target_abi, - serialno, host_bin_path, bin_name, args=FLAGS.args, opencl_profiling=True, vlog_level=0, - device_bin_path="/data/local/tmp/mace", out_of_range_check=True, address_sanitizer=FLAGS.address_sanitizer, simpleperf=FLAGS.simpleperf) - device_properties = sh_commands.adb_getprop_by_serialno( - serialno) - globals()[FLAGS.stdout_processor](stdouts, device_properties, + globals()[FLAGS.stdout_processor](stdouts, dev, target_abi) diff --git a/tools/build-standalone-lib.sh b/tools/build-standalone-lib.sh index 34866596..24cba4cf 100755 --- a/tools/build-standalone-lib.sh +++ b/tools/build-standalone-lib.sh @@ -22,6 +22,14 @@ mkdir -p $LIB_DIR/arm64-v8a/cpu_gpu rm -rf $LIB_DIR/linux-x86-64 mkdir -p $LIB_DIR/linux-x86-64 +rm -rf $LIB_DIR/arm_linux_gnueabihf +mkdir -p $LIB_DIR/arm_linux_gnueabihf/cpu_gpu + +rm -rf $LIB_DIR/aarch64_linux_gnu +mkdir -p $LIB_DIR/aarch64_linux_gnu/cpu_gpu + + + # build shared libraries echo "build shared lib for armeabi-v7a + cpu_gpu_dsp" bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --define hexagon=true --define quantize=true --cpu=armeabi-v7a @@ -36,6 +44,14 @@ echo "build shared lib for arm64-v8a + cpu_gpu" bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --define quantize=true --cpu=arm64-v8a cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/arm64-v8a/cpu_gpu/ +echo "build shared lib for arm_linux_gnueabihf + cpu_gpu" +bazel build --config arm_linux_gnueabihf --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true +cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/arm_linux_gnueabihf/cpu_gpu/ + +echo "build shared lib for aarch64_linux_gnu + cpu_gpu" +bazel build --config aarch64_linux_gnu --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true +cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/aarch64_linux_gnu/cpu_gpu/ + if [[ "$OSTYPE" != "darwin"* ]];then echo "build shared lib for linux-x86-64" bazel build mace/libmace:libmace_dynamic --config optimization --define quantize=true --define openmp=true @@ -56,6 +72,14 @@ echo "build static lib for arm64-v8a + cpu_gpu" bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true --define quantize=true --cpu=arm64-v8a cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/arm64-v8a/cpu_gpu/ +echo "build static lib for arm_linux_gnueabihf + cpu_gpu" +bazel build --config arm_linux_gnueabihf --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true +cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/arm_linux_gnueabihf/cpu_gpu/ + +echo "build static lib for aarch64_linux_gnu + cpu_gpu" +bazel build --config aarch64_linux_gnu --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true +cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/aarch64_linux_gnu/cpu_gpu/ + if [[ "$OSTYPE" != "darwin"* ]];then echo "build static lib for linux-x86-64" bazel build mace/libmace:libmace_static --config optimization --define quantize=true --define openmp=true diff --git a/tools/common.py b/tools/common.py index 9ba294dd..2185b274 100644 --- a/tools/common.py +++ b/tools/common.py @@ -13,7 +13,9 @@ # limitations under the License. import enum +import hashlib import re +import os import six @@ -135,3 +137,340 @@ def formatted_file_name(input_file_name, input_name): for c in input_name: res += c if c.isalnum() else '_' return res + + +def md5sum(s): + md5 = hashlib.md5() + md5.update(s.encode('utf-8')) + return md5.hexdigest() + + +def get_build_binary_dir(library_name, target_abi): + return "%s/%s/%s/%s" % ( + BUILD_OUTPUT_DIR, library_name, BUILD_TMP_DIR_NAME, target_abi) + + +def get_model_lib_output_path(library_name, abi): + lib_output_path = os.path.join(BUILD_OUTPUT_DIR, library_name, + MODEL_OUTPUT_DIR_NAME, abi, + "%s.a" % library_name) + return lib_output_path + + +def check_model_converted(library_name, model_name, + model_graph_format, model_data_format, + abi): + model_output_dir = \ + '%s/%s/%s' % (BUILD_OUTPUT_DIR, library_name, MODEL_OUTPUT_DIR_NAME) + if model_graph_format == ModelFormat.file: + mace_check(os.path.exists("%s/%s.pb" % (model_output_dir, model_name)), + ModuleName.RUN, + "You should convert model first.") + else: + model_lib_path = get_model_lib_output_path(library_name, abi) + mace_check(os.path.exists(model_lib_path), + ModuleName.RUN, + "You should convert model first.") + if model_data_format == ModelFormat.file: + mace_check(os.path.exists("%s/%s.data" % + (model_output_dir, model_name)), + ModuleName.RUN, + "You should convert model first.") + + +def parse_device_type(runtime): + device_type = "" + + if runtime == RuntimeType.dsp: + device_type = DeviceType.HEXAGON + elif runtime == RuntimeType.gpu: + device_type = DeviceType.GPU + elif runtime == RuntimeType.cpu: + device_type = DeviceType.CPU + + return device_type + + +def sha256_checksum(fname): + hash_func = hashlib.sha256() + with open(fname, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + hash_func.update(chunk) + return hash_func.hexdigest() + + +def get_model_files(model_file_path, + model_sha256_checksum, + model_output_dir, + weight_file_path="", + weight_sha256_checksum=""): + model_file = model_file_path + weight_file = weight_file_path + + if model_file_path.startswith("http://") or \ + model_file_path.startswith("https://"): + model_file = model_output_dir + "/" + md5sum(model_file_path) + ".pb" + if not os.path.exists(model_file) or \ + sha256_checksum(model_file) != model_sha256_checksum: + MaceLogger.info("Downloading model, please wait ...") + six.moves.urllib.request.urlretrieve(model_file_path, model_file) + MaceLogger.info("Model downloaded successfully.") + + if sha256_checksum(model_file) != model_sha256_checksum: + MaceLogger.error(ModuleName.MODEL_CONVERTER, + "model file sha256checksum not match") + + if weight_file_path.startswith("http://") or \ + weight_file_path.startswith("https://"): + weight_file = \ + model_output_dir + "/" + md5sum(weight_file_path) + ".caffemodel" + if not os.path.exists(weight_file) or \ + sha256_checksum(weight_file) != weight_sha256_checksum: + MaceLogger.info("Downloading model weight, please wait ...") + six.moves.urllib.request.urlretrieve(weight_file_path, weight_file) + MaceLogger.info("Model weight downloaded successfully.") + + if weight_file: + if sha256_checksum(weight_file) != weight_sha256_checksum: + MaceLogger.error(ModuleName.MODEL_CONVERTER, + "weight file sha256checksum not match") + + return model_file, weight_file + + +def get_opencl_binary_output_path(library_name, target_abi, device): + target_soc = device.target_socs + device_model = device.models + return '%s/%s/%s/%s/%s_%s.%s.%s.bin' % \ + (BUILD_OUTPUT_DIR, + library_name, + OUTPUT_OPENCL_BINARY_DIR_NAME, + target_abi, + library_name, + OUTPUT_OPENCL_BINARY_FILE_NAME, + device_model, + target_soc) + + +def get_opencl_parameter_output_path(library_name, target_abi, device): + target_soc = device.target_socs + device_model = device.models + return '%s/%s/%s/%s/%s_%s.%s.%s.bin' % \ + (BUILD_OUTPUT_DIR, + library_name, + OUTPUT_OPENCL_BINARY_DIR_NAME, + target_abi, + library_name, + OUTPUT_OPENCL_PARAMETER_FILE_NAME, + device_model, + target_soc) + + +def get_build_model_dirs(library_name, + model_name, + target_abi, + device, + model_file_path): + models = device.models + target_socs = device.target_socs + model_path_digest = md5sum(model_file_path) + model_output_base_dir = '{}/{}/{}/{}/{}'.format( + BUILD_OUTPUT_DIR, library_name, BUILD_TMP_DIR_NAME, + model_name, model_path_digest) + + if target_abi == ABIType.host: + model_output_dir = '%s/%s' % (model_output_base_dir, target_abi) + elif not target_socs or not device.address: + model_output_dir = '%s/%s/%s' % (model_output_base_dir, + BUILD_TMP_GENERAL_OUTPUT_DIR_NAME, + target_abi) + else: + model_output_dir = '{}/{}_{}/{}'.format( + model_output_base_dir, + models, + target_socs, + target_abi + ) + + mace_model_dir = '{}/{}/{}'.format( + BUILD_OUTPUT_DIR, library_name, MODEL_OUTPUT_DIR_NAME + ) + + return model_output_base_dir, model_output_dir, mace_model_dir + + +def abi_to_internal(abi): + if abi in [ABIType.armeabi_v7a, ABIType.arm64_v8a]: + return abi + if abi == ABIType.arm64: + return ABIType.aarch64 + if abi == ABIType.armhf: + return ABIType.armeabi_v7a + + +def infer_toolchain(abi): + if abi in [ABIType.armeabi_v7a, ABIType.arm64_v8a]: + return ToolchainType.android + if abi == ABIType.armhf: + return ToolchainType.arm_linux_gnueabihf + if abi == ABIType.arm64: + return ToolchainType.aarch64_linux_gnu + return '' + + +################################ +# YAML key word +################################ +class YAMLKeyword(object): + library_name = 'library_name' + target_abis = 'target_abis' + target_socs = 'target_socs' + model_graph_format = 'model_graph_format' + model_data_format = 'model_data_format' + models = 'models' + platform = 'platform' + device_name = 'device_name' + system = 'system' + address = 'address' + username = 'username' + password = 'password' + model_file_path = 'model_file_path' + model_sha256_checksum = 'model_sha256_checksum' + weight_file_path = 'weight_file_path' + weight_sha256_checksum = 'weight_sha256_checksum' + subgraphs = 'subgraphs' + input_tensors = 'input_tensors' + input_shapes = 'input_shapes' + input_ranges = 'input_ranges' + output_tensors = 'output_tensors' + output_shapes = 'output_shapes' + check_tensors = 'check_tensors' + check_shapes = 'check_shapes' + runtime = 'runtime' + data_type = 'data_type' + input_data_types = 'input_data_types' + input_data_formats = 'input_data_formats' + output_data_formats = 'output_data_formats' + limit_opencl_kernel_time = 'limit_opencl_kernel_time' + nnlib_graph_mode = 'nnlib_graph_mode' + obfuscate = 'obfuscate' + winograd = 'winograd' + quantize = 'quantize' + quantize_range_file = 'quantize_range_file' + change_concat_ranges = 'change_concat_ranges' + validation_inputs_data = 'validation_inputs_data' + validation_threshold = 'validation_threshold' + graph_optimize_options = 'graph_optimize_options' # internal use for now + cl_mem_type = 'cl_mem_type' + + +################################ +# SystemType +################################ +class SystemType: + host = 'host' + android = 'android' + arm_linux = 'arm_linux' + + +################################ +# common device str +################################ + +PHONE_DATA_DIR = '/data/local/tmp/mace_run' +DEVICE_DATA_DIR = '/tmp/data/mace_run' +DEVICE_INTERIOR_DIR = PHONE_DATA_DIR + "/interior" +BUILD_OUTPUT_DIR = 'builds' +BUILD_TMP_DIR_NAME = '_tmp' +BUILD_DOWNLOADS_DIR = BUILD_OUTPUT_DIR + '/downloads' +BUILD_TMP_GENERAL_OUTPUT_DIR_NAME = 'general' +MODEL_OUTPUT_DIR_NAME = 'model' +EXAMPLE_STATIC_NAME = "example_static" +EXAMPLE_DYNAMIC_NAME = "example_dynamic" +EXAMPLE_STATIC_TARGET = "//mace/examples/cli:" + EXAMPLE_STATIC_NAME +EXAMPLE_DYNAMIC_TARGET = "//mace/examples/cli:" + EXAMPLE_DYNAMIC_NAME +MACE_RUN_STATIC_NAME = "mace_run_static" +MACE_RUN_DYNAMIC_NAME = "mace_run_dynamic" +MACE_RUN_STATIC_TARGET = "//mace/tools/validation:" + MACE_RUN_STATIC_NAME +MACE_RUN_DYNAMIC_TARGET = "//mace/tools/validation:" + MACE_RUN_DYNAMIC_NAME +CL_COMPILED_BINARY_FILE_NAME = "mace_cl_compiled_program.bin" +BUILD_TMP_OPENCL_BIN_DIR = 'opencl_bin' +LIBMACE_DYNAMIC_PATH = "bazel-bin/mace/libmace/libmace.so" +CL_TUNED_PARAMETER_FILE_NAME = "mace_run.config" +MODEL_HEADER_DIR_PATH = 'include/mace/public' +OUTPUT_LIBRARY_DIR_NAME = 'lib' +OUTPUT_OPENCL_BINARY_DIR_NAME = 'opencl' +OUTPUT_OPENCL_BINARY_FILE_NAME = 'compiled_opencl_kernel' +OUTPUT_OPENCL_PARAMETER_FILE_NAME = 'tuned_opencl_parameter' +CODEGEN_BASE_DIR = 'mace/codegen' +MODEL_CODEGEN_DIR = CODEGEN_BASE_DIR + '/models' +ENGINE_CODEGEN_DIR = CODEGEN_BASE_DIR + '/engine' +LIB_CODEGEN_DIR = CODEGEN_BASE_DIR + '/lib' +LIBMACE_SO_TARGET = "//mace/libmace:libmace.so" +LIBMACE_STATIC_TARGET = "//mace/libmace:libmace_static" +LIBMACE_STATIC_PATH = "bazel-genfiles/mace/libmace/libmace.a" +MODEL_LIB_TARGET = "//mace/codegen:generated_models" +MODEL_LIB_PATH = "bazel-genfiles/mace/codegen/libgenerated_models.a" +QUANTIZE_STAT_TARGET = "//mace/tools/quantization:quantize_stat" +BM_MODEL_STATIC_NAME = "benchmark_model_static" +BM_MODEL_DYNAMIC_NAME = "benchmark_model_dynamic" +BM_MODEL_STATIC_TARGET = "//mace/benchmark:" + BM_MODEL_STATIC_NAME +BM_MODEL_DYNAMIC_TARGET = "//mace/benchmark:" + BM_MODEL_DYNAMIC_NAME +ALL_SOC_TAG = 'all' + + +################################ +# Model File Format +################################ +class ModelFormat(object): + file = 'file' + code = 'code' + + +################################ +# ABI Type +################################ +class ABIType(object): + armeabi_v7a = 'armeabi-v7a' + arm64_v8a = 'arm64-v8a' + arm64 = 'arm64' + aarch64 = 'aarch64' + armhf = 'armhf' + host = 'host' + + +################################ +# Module name +################################ +class ModuleName(object): + YAML_CONFIG = 'YAML CONFIG' + MODEL_CONVERTER = 'Model Converter' + RUN = 'RUN' + BENCHMARK = 'Benchmark' + + +################################# +# mace lib type +################################# +class MACELibType(object): + static = 0 + dynamic = 1 + + +################################# +# Run time type +################################# +class RuntimeType(object): + cpu = 'cpu' + gpu = 'gpu' + dsp = 'dsp' + cpu_gpu = 'cpu+gpu' + + +################################# +# Tool chain Type +################################# +class ToolchainType: + android = 'android' + arm_linux_gnueabihf = 'arm_linux_gnueabihf' + aarch64_linux_gnu = 'aarch64_linux_gnu' diff --git a/tools/converter.py b/tools/converter.py index 1d712dfd..7646692e 100644 --- a/tools/converter.py +++ b/tools/converter.py @@ -18,7 +18,6 @@ import hashlib import os import re import sh -import subprocess import sys import urllib import yaml @@ -27,14 +26,9 @@ from enum import Enum import six import sh_commands -from sh_commands import BuildType -from sh_commands import ModelFormat -from common import CaffeEnvType -from common import DeviceType -from common import mace_check -from common import MaceLogger -from common import StringFormatter +from common import * +from device import DeviceWrapper, DeviceManager ################################ # set environment @@ -44,69 +38,20 @@ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' ################################ # common definitions ################################ -BUILD_OUTPUT_DIR = 'builds' -BUILD_DOWNLOADS_DIR = BUILD_OUTPUT_DIR + '/downloads' -PHONE_DATA_DIR = "/data/local/tmp/mace_run" -MODEL_OUTPUT_DIR_NAME = 'model' -MODEL_HEADER_DIR_PATH = 'include/mace/public' -BUILD_TMP_DIR_NAME = '_tmp' -BUILD_TMP_GENERAL_OUTPUT_DIR_NAME = 'general' -OUTPUT_LIBRARY_DIR_NAME = 'lib' -OUTPUT_OPENCL_BINARY_DIR_NAME = 'opencl' -OUTPUT_OPENCL_BINARY_FILE_NAME = 'compiled_opencl_kernel' -OUTPUT_OPENCL_PARAMETER_FILE_NAME = 'tuned_opencl_parameter' -CL_COMPILED_BINARY_FILE_NAME = "mace_cl_compiled_program.bin" -CL_TUNED_PARAMETER_FILE_NAME = "mace_run.config" -CODEGEN_BASE_DIR = 'mace/codegen' -MODEL_CODEGEN_DIR = CODEGEN_BASE_DIR + '/models' -ENGINE_CODEGEN_DIR = CODEGEN_BASE_DIR + '/engine' -LIB_CODEGEN_DIR = CODEGEN_BASE_DIR + '/lib' -LIBMACE_SO_TARGET = "//mace/libmace:libmace.so" -LIBMACE_STATIC_TARGET = "//mace/libmace:libmace_static" -LIBMACE_STATIC_PATH = "bazel-genfiles/mace/libmace/libmace.a" -LIBMACE_DYNAMIC_PATH = "bazel-bin/mace/libmace/libmace.so" -MODEL_LIB_TARGET = "//mace/codegen:generated_models" -MODEL_LIB_PATH = "bazel-genfiles/mace/codegen/libgenerated_models.a" -MACE_RUN_STATIC_NAME = "mace_run_static" -MACE_RUN_DYNAMIC_NAME = "mace_run_dynamic" -MACE_RUN_STATIC_TARGET = "//mace/tools/validation:" + MACE_RUN_STATIC_NAME -MACE_RUN_DYNAMIC_TARGET = "//mace/tools/validation:" + MACE_RUN_DYNAMIC_NAME -EXAMPLE_STATIC_NAME = "example_static" -EXAMPLE_DYNAMIC_NAME = "example_dynamic" -EXAMPLE_STATIC_TARGET = "//mace/examples/cli:" + EXAMPLE_STATIC_NAME -EXAMPLE_DYNAMIC_TARGET = "//mace/examples/cli:" + EXAMPLE_DYNAMIC_NAME -BM_MODEL_STATIC_NAME = "benchmark_model_static" -BM_MODEL_DYNAMIC_NAME = "benchmark_model_dynamic" -BM_MODEL_STATIC_TARGET = "//mace/benchmark:" + BM_MODEL_STATIC_NAME -BM_MODEL_DYNAMIC_TARGET = "//mace/benchmark:" + BM_MODEL_DYNAMIC_NAME -DEVICE_INTERIOR_DIR = PHONE_DATA_DIR + "/interior" -BUILD_TMP_OPENCL_BIN_DIR = 'opencl_bin' -ALL_SOC_TAG = 'all' ABITypeStrs = [ 'armeabi-v7a', 'arm64-v8a', + 'arm64', + 'armhf', 'host', ] - -class ABIType(object): - armeabi_v7a = 'armeabi-v7a' - arm64_v8a = 'arm64-v8a' - host = 'host' - - ModelFormatStrs = [ "file", "code", ] - -class MACELibType(object): - static = 0 - dynamic = 1 - - PlatformTypeStrs = [ "tensorflow", "caffe", @@ -121,14 +66,6 @@ RuntimeTypeStrs = [ "cpu+gpu" ] - -class RuntimeType(object): - cpu = 'cpu' - gpu = 'gpu' - dsp = 'dsp' - cpu_gpu = 'cpu+gpu' - - InputDataTypeStrs = [ "int32", "float32", @@ -174,51 +111,6 @@ class DefaultValues(object): gpu_priority_hint = 3, -class YAMLKeyword(object): - library_name = 'library_name' - target_abis = 'target_abis' - target_socs = 'target_socs' - model_graph_format = 'model_graph_format' - model_data_format = 'model_data_format' - models = 'models' - platform = 'platform' - model_file_path = 'model_file_path' - model_sha256_checksum = 'model_sha256_checksum' - weight_file_path = 'weight_file_path' - weight_sha256_checksum = 'weight_sha256_checksum' - subgraphs = 'subgraphs' - input_tensors = 'input_tensors' - input_shapes = 'input_shapes' - input_ranges = 'input_ranges' - output_tensors = 'output_tensors' - output_shapes = 'output_shapes' - check_tensors = 'check_tensors' - check_shapes = 'check_shapes' - runtime = 'runtime' - data_type = 'data_type' - input_data_types = 'input_data_types' - input_data_formats = 'input_data_formats' - output_data_formats = 'output_data_formats' - limit_opencl_kernel_time = 'limit_opencl_kernel_time' - nnlib_graph_mode = 'nnlib_graph_mode' - obfuscate = 'obfuscate' - winograd = 'winograd' - quantize = 'quantize' - quantize_range_file = 'quantize_range_file' - change_concat_ranges = 'change_concat_ranges' - validation_inputs_data = 'validation_inputs_data' - validation_threshold = 'validation_threshold' - graph_optimize_options = 'graph_optimize_options' # internal use for now - cl_mem_type = 'cl_mem_type' - - -class ModuleName(object): - YAML_CONFIG = 'YAML CONFIG' - MODEL_CONVERTER = 'Model Converter' - RUN = 'RUN' - BENCHMARK = 'Benchmark' - - CPP_KEYWORDS = [ 'alignas', 'alignof', 'and', 'and_eq', 'asm', 'atomic_cancel', 'atomic_commit', 'atomic_noexcept', 'auto', 'bitand', 'bitor', @@ -260,7 +152,7 @@ def parse_device_type(runtime): def get_hexagon_mode(configs): runtime_list = [] for model_name in configs[YAMLKeyword.models]: - model_runtime =\ + model_runtime = \ configs[YAMLKeyword.models][model_name].get( YAMLKeyword.runtime, "") runtime_list.append(model_runtime.lower()) @@ -273,7 +165,7 @@ def get_hexagon_mode(configs): def get_opencl_mode(configs): runtime_list = [] for model_name in configs[YAMLKeyword.models]: - model_runtime =\ + model_runtime = \ configs[YAMLKeyword.models][model_name].get( YAMLKeyword.runtime, "") runtime_list.append(model_runtime.lower()) @@ -331,7 +223,7 @@ def format_model_config(flags): target_socs = configs.get(YAMLKeyword.target_socs, "") if flags.target_socs: configs[YAMLKeyword.target_socs] = \ - [soc.lower() for soc in flags.target_socs.split(',')] + [soc.lower() for soc in flags.target_socs.split(',')] elif not target_socs: configs[YAMLKeyword.target_socs] = [] elif not isinstance(target_socs, list): @@ -347,7 +239,9 @@ def format_model_config(flags): if ALL_SOC_TAG in target_socs: mace_check(available_socs, ModuleName.YAML_CONFIG, - "Build for all SOCs plugged in computer, " + "Android abi is listed in config file and " + "build for all SOCs plugged in computer, " + "But no android phone found, " "you at least plug in one phone") else: for soc in target_socs: @@ -412,7 +306,7 @@ def format_model_config(flags): weight_file_path = model_config.get(YAMLKeyword.weight_file_path, "") if weight_file_path: - weight_checksum =\ + weight_checksum = \ model_config.get(YAMLKeyword.weight_sha256_checksum, "") mace_check(weight_checksum != "", ModuleName.YAML_CONFIG, "'%s' is necessary" % @@ -538,14 +432,14 @@ def format_model_config(flags): YAMLKeyword.validation_threshold, {}) if not isinstance(validation_threshold, dict): raise argparse.ArgumentTypeError( - 'similarity threshold must be a dict.') + 'similarity threshold must be a dict.') threshold_dict = { - DeviceType.CPU: 0.999, - DeviceType.GPU: 0.995, - DeviceType.HEXAGON: 0.930, - DeviceType.CPU + "_QUANTIZE": 0.980, - } + DeviceType.CPU: 0.999, + DeviceType.GPU: 0.995, + DeviceType.HEXAGON: 0.930, + DeviceType.CPU + "_QUANTIZE": 0.980, + } for k, v in six.iteritems(validation_threshold): if k.upper() == 'DSP': k = DeviceType.HEXAGON @@ -554,7 +448,7 @@ def format_model_config(flags): DeviceType.HEXAGON, DeviceType.CPU + "_QUANTIZE"): raise argparse.ArgumentTypeError( - 'Unsupported validation threshold runtime: %s' % k) + 'Unsupported validation threshold runtime: %s' % k) threshold_dict[k.upper()] = v subgraph[YAMLKeyword.validation_threshold] = threshold_dict @@ -573,7 +467,7 @@ def format_model_config(flags): subgraph[YAMLKeyword.input_ranges] = [input_ranges] else: subgraph[YAMLKeyword.input_ranges] = input_ranges - subgraph[YAMLKeyword.input_ranges] =\ + subgraph[YAMLKeyword.input_ranges] = \ [str(v) for v in subgraph[YAMLKeyword.input_ranges]] for key in [YAMLKeyword.limit_opencl_kernel_time, @@ -598,67 +492,6 @@ def format_model_config(flags): return configs -def get_build_binary_dir(library_name, target_abi): - return "%s/%s/%s/%s" % ( - BUILD_OUTPUT_DIR, library_name, BUILD_TMP_DIR_NAME, target_abi) - - -def get_build_model_dirs(library_name, model_name, target_abi, target_soc, - serial_num, model_file_path): - model_path_digest = md5sum(model_file_path) - model_output_base_dir = "%s/%s/%s/%s/%s" % ( - BUILD_OUTPUT_DIR, library_name, BUILD_TMP_DIR_NAME, - model_name, model_path_digest) - - if target_abi == ABIType.host: - model_output_dir = "%s/%s" % (model_output_base_dir, target_abi) - elif not target_soc or not serial_num: - model_output_dir = "%s/%s/%s" % ( - model_output_base_dir, BUILD_TMP_GENERAL_OUTPUT_DIR_NAME, - target_abi) - else: - device_name = \ - sh_commands.adb_get_device_name_by_serialno(serial_num) - model_output_dir = "%s/%s_%s/%s" % ( - model_output_base_dir, device_name, - target_soc, target_abi) - - mace_model_dir = \ - '%s/%s/%s' % (BUILD_OUTPUT_DIR, library_name, MODEL_OUTPUT_DIR_NAME) - - return model_output_base_dir, model_output_dir, mace_model_dir - - -def get_opencl_binary_output_path(library_name, target_abi, - target_soc, serial_num): - device_name = \ - sh_commands.adb_get_device_name_by_serialno(serial_num) - return '%s/%s/%s/%s/%s_%s.%s.%s.bin' % \ - (BUILD_OUTPUT_DIR, - library_name, - OUTPUT_OPENCL_BINARY_DIR_NAME, - target_abi, - library_name, - OUTPUT_OPENCL_BINARY_FILE_NAME, - device_name, - target_soc) - - -def get_opencl_parameter_output_path(library_name, target_abi, - target_soc, serial_num): - device_name = \ - sh_commands.adb_get_device_name_by_serialno(serial_num) - return '%s/%s/%s/%s/%s_%s.%s.%s.bin' % \ - (BUILD_OUTPUT_DIR, - library_name, - OUTPUT_OPENCL_BINARY_DIR_NAME, - target_abi, - library_name, - OUTPUT_OPENCL_PARAMETER_FILE_NAME, - device_name, - target_soc) - - def clear_build_dirs(library_name): # make build dir if not os.path.exists(BUILD_OUTPUT_DIR): @@ -676,27 +509,6 @@ def clear_build_dirs(library_name): sh.rm('-rf', lib_output_dir) -def check_model_converted(library_name, model_name, - model_graph_format, model_data_format, - abi): - model_output_dir = \ - '%s/%s/%s' % (BUILD_OUTPUT_DIR, library_name, MODEL_OUTPUT_DIR_NAME) - if model_graph_format == ModelFormat.file: - mace_check(os.path.exists("%s/%s.pb" % (model_output_dir, model_name)), - ModuleName.RUN, - "You should convert model first.") - else: - model_lib_path = get_model_lib_output_path(library_name, abi) - mace_check(os.path.exists(model_lib_path), - ModuleName.RUN, - "You should convert model first.") - if model_data_format == ModelFormat.file: - mace_check(os.path.exists("%s/%s.data" % - (model_output_dir, model_name)), - ModuleName.RUN, - "You should convert model first.") - - ################################ # convert ################################ @@ -883,13 +695,6 @@ def convert_model(configs, cl_mem_type): StringFormatter.block("Model %s converted" % model_name)) -def get_model_lib_output_path(library_name, abi): - lib_output_path = os.path.join(BUILD_OUTPUT_DIR, library_name, - MODEL_OUTPUT_DIR_NAME, abi, - "%s.a" % library_name) - return lib_output_path - - def build_model_lib(configs, address_sanitizer): MaceLogger.header(StringFormatter.block("Building model library")) @@ -902,10 +707,11 @@ def build_model_lib(configs, address_sanitizer): library_out_dir = os.path.dirname(model_lib_output_path) if not os.path.exists(library_out_dir): os.makedirs(library_out_dir) - + toolchain = infer_toolchain(target_abi) sh_commands.bazel_build( MODEL_LIB_TARGET, abi=target_abi, + toolchain=toolchain, hexagon_mode=hexagon_mode, enable_opencl=get_opencl_mode(configs), enable_quantize=get_quantize_mode(configs), @@ -994,8 +800,8 @@ def report_run_statistics(stdout, f.write(data_str) -def build_mace_run(configs, target_abi, enable_openmp, address_sanitizer, - mace_lib_type): +def build_mace_run(configs, target_abi, toolchain, enable_openmp, + address_sanitizer, mace_lib_type): library_name = configs[YAMLKeyword.library_name] hexagon_mode = get_hexagon_mode(configs) @@ -1019,6 +825,7 @@ def build_mace_run(configs, target_abi, enable_openmp, address_sanitizer, sh_commands.bazel_build( mace_run_target, abi=target_abi, + toolchain=toolchain, hexagon_mode=hexagon_mode, enable_openmp=enable_openmp, enable_opencl=get_opencl_mode(configs), @@ -1031,8 +838,41 @@ def build_mace_run(configs, target_abi, enable_openmp, address_sanitizer, mace_lib_type == MACELibType.dynamic) -def build_example(configs, target_abi, enable_openmp, address_sanitizer, - mace_lib_type): +def build_quantize_stat(configs): + library_name = configs[YAMLKeyword.library_name] + + build_tmp_binary_dir = get_build_binary_dir(library_name, ABIType.host) + if os.path.exists(build_tmp_binary_dir): + sh.rm("-rf", build_tmp_binary_dir) + os.makedirs(build_tmp_binary_dir) + + quantize_stat_target = QUANTIZE_STAT_TARGET + build_arg = "" + six.print_(configs[YAMLKeyword.model_graph_format]) + if configs[YAMLKeyword.model_graph_format] == ModelFormat.code: + mace_check(os.path.exists(ENGINE_CODEGEN_DIR), + ModuleName.RUN, + "You should convert model first.") + build_arg = "--per_file_copt=mace/tools/quantization/quantize_stat.cc@-DMODEL_GRAPH_FORMAT_CODE" # noqa + + sh_commands.bazel_build( + quantize_stat_target, + abi=ABIType.host, + toolchain=flags.toolchain, + enable_openmp=True, + symbol_hidden=True, + extra_args=build_arg + ) + + quantize_stat_filepath = build_tmp_binary_dir + "/quantize_stat" + if os.path.exists(quantize_stat_filepath): + sh.rm("-rf", quantize_stat_filepath) + sh.cp("-f", "bazel-bin/mace/tools/quantization/quantize_stat", + build_tmp_binary_dir) + + +def build_example(configs, target_abi, toolchain, + enable_openmp, mace_lib_type): library_name = configs[YAMLKeyword.library_name] hexagon_mode = get_hexagon_mode(configs) @@ -1042,6 +882,7 @@ def build_example(configs, target_abi, enable_openmp, address_sanitizer, os.makedirs(build_tmp_binary_dir) symbol_hidden = True + libmace_target = LIBMACE_STATIC_TARGET if mace_lib_type == MACELibType.dynamic: symbol_hidden = False @@ -1049,11 +890,12 @@ def build_example(configs, target_abi, enable_openmp, address_sanitizer, sh_commands.bazel_build(libmace_target, abi=target_abi, + toolchain=toolchain, enable_openmp=enable_openmp, enable_opencl=get_opencl_mode(configs), enable_quantize=get_quantize_mode(configs), hexagon_mode=hexagon_mode, - address_sanitizer=address_sanitizer, + address_sanitizer=flags.address_sanitizer, symbol_hidden=symbol_hidden) if os.path.exists(LIB_CODEGEN_DIR): @@ -1079,11 +921,12 @@ def build_example(configs, target_abi, enable_openmp, address_sanitizer, sh_commands.bazel_build(example_target, abi=target_abi, + toolchain=toolchain, enable_openmp=enable_openmp, enable_opencl=get_opencl_mode(configs), enable_quantize=get_quantize_mode(configs), hexagon_mode=hexagon_mode, - address_sanitizer=address_sanitizer, + address_sanitizer=flags.address_sanitizer, extra_args=build_arg) target_bin = "/".join(sh_commands.bazel_target_to_bin(example_target)) @@ -1092,296 +935,6 @@ def build_example(configs, target_abi, enable_openmp, address_sanitizer, sh.rm("-rf", LIB_CODEGEN_DIR) -def tuning(library_name, model_name, model_config, - model_graph_format, model_data_format, - target_abi, target_soc, serial_num, - mace_lib_type): - six.print_('* Tuning, it may take some time...') - - build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi) - mace_run_name = MACE_RUN_STATIC_NAME - link_dynamic = False - if mace_lib_type == MACELibType.dynamic: - mace_run_name = MACE_RUN_DYNAMIC_NAME - link_dynamic = True - - embed_model_data = model_data_format == ModelFormat.code - - model_output_base_dir, model_output_dir, mace_model_dir = \ - get_build_model_dirs(library_name, model_name, target_abi, - target_soc, serial_num, - model_config[YAMLKeyword.model_file_path]) - - # build for specified soc - sh_commands.clear_phone_data_dir(serial_num, PHONE_DATA_DIR) - - subgraphs = model_config[YAMLKeyword.subgraphs] - # generate input data - sh_commands.gen_random_input( - model_output_dir, - subgraphs[0][YAMLKeyword.input_tensors], - subgraphs[0][YAMLKeyword.input_shapes], - subgraphs[0][YAMLKeyword.validation_inputs_data], - input_ranges=subgraphs[0][YAMLKeyword.input_ranges], - input_data_types=subgraphs[0][YAMLKeyword.input_data_types]) - - sh_commands.tuning_run( - abi=target_abi, - serialno=serial_num, - target_dir=build_tmp_binary_dir, - target_name=mace_run_name, - vlog_level=0, - embed_model_data=embed_model_data, - model_output_dir=model_output_dir, - input_nodes=subgraphs[0][YAMLKeyword.input_tensors], - output_nodes=subgraphs[0][YAMLKeyword.output_tensors], - input_shapes=subgraphs[0][YAMLKeyword.input_shapes], - output_shapes=subgraphs[0][YAMLKeyword.output_shapes], - mace_model_dir=mace_model_dir, - model_tag=model_name, - device_type=DeviceType.GPU, - running_round=0, - restart_round=1, - limit_opencl_kernel_time=model_config[YAMLKeyword.limit_opencl_kernel_time], # noqa - tuning=True, - out_of_range_check=False, - phone_data_dir=PHONE_DATA_DIR, - model_graph_format=model_graph_format, - opencl_binary_file="", - opencl_parameter_file="", - libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH, - link_dynamic=link_dynamic, - ) - # pull opencl binary - sh_commands.pull_file_from_device( - serial_num, - DEVICE_INTERIOR_DIR, - CL_COMPILED_BINARY_FILE_NAME, - "%s/%s" % (model_output_dir, BUILD_TMP_OPENCL_BIN_DIR)) - - # pull opencl parameter - sh_commands.pull_file_from_device( - serial_num, - PHONE_DATA_DIR, - CL_TUNED_PARAMETER_FILE_NAME, - "%s/%s" % (model_output_dir, BUILD_TMP_OPENCL_BIN_DIR)) - - six.print_('Tuning done\n') - - -def run_specific_target(flags, configs, target_abi, - target_soc, serial_num): - library_name = configs[YAMLKeyword.library_name] - mace_lib_type = flags.mace_lib_type - embed_model_data = \ - configs[YAMLKeyword.model_data_format] == ModelFormat.code - build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi) - - # get target name for run - if flags.example: - if mace_lib_type == MACELibType.static: - target_name = EXAMPLE_STATIC_NAME - else: - target_name = EXAMPLE_DYNAMIC_NAME - else: - if mace_lib_type == MACELibType.static: - target_name = MACE_RUN_STATIC_NAME - else: - target_name = MACE_RUN_DYNAMIC_NAME - - link_dynamic = mace_lib_type == MACELibType.dynamic - model_output_dirs = [] - - for model_name in configs[YAMLKeyword.models]: - check_model_converted(library_name, model_name, - configs[YAMLKeyword.model_graph_format], - configs[YAMLKeyword.model_data_format], - target_abi) - if target_abi == ABIType.host: - device_name = ABIType.host - else: - device_name = \ - sh_commands.adb_get_device_name_by_serialno(serial_num) - sh_commands.clear_phone_data_dir(serial_num, PHONE_DATA_DIR) - - MaceLogger.header( - StringFormatter.block( - "Run model %s on %s" % (model_name, device_name))) - - model_config = configs[YAMLKeyword.models][model_name] - model_runtime = model_config[YAMLKeyword.runtime] - subgraphs = model_config[YAMLKeyword.subgraphs] - - if not configs[YAMLKeyword.target_socs] or target_abi == ABIType.host: - model_output_base_dir, model_output_dir, mace_model_dir = \ - get_build_model_dirs(library_name, model_name, target_abi, - None, None, - model_config[YAMLKeyword.model_file_path]) - else: - model_output_base_dir, model_output_dir, mace_model_dir = \ - get_build_model_dirs(library_name, model_name, target_abi, - target_soc, serial_num, - model_config[YAMLKeyword.model_file_path]) - # clear temp model output dir - if os.path.exists(model_output_dir): - sh.rm("-rf", model_output_dir) - os.makedirs(model_output_dir) - - is_tuned = False - model_opencl_output_bin_path = "" - model_opencl_parameter_path = "" - # tuning for specified soc - if not flags.address_sanitizer \ - and not flags.example \ - and target_abi != ABIType.host \ - and configs[YAMLKeyword.target_socs] \ - and target_soc \ - and model_runtime in [RuntimeType.gpu, RuntimeType.cpu_gpu] \ - and not flags.disable_tuning: - tuning(library_name, model_name, model_config, - configs[YAMLKeyword.model_graph_format], - configs[YAMLKeyword.model_data_format], - target_abi, target_soc, serial_num, - mace_lib_type) - model_output_dirs.append(model_output_dir) - model_opencl_output_bin_path =\ - "%s/%s/%s" % (model_output_dir, - BUILD_TMP_OPENCL_BIN_DIR, - CL_COMPILED_BINARY_FILE_NAME) - model_opencl_parameter_path = \ - "%s/%s/%s" % (model_output_dir, - BUILD_TMP_OPENCL_BIN_DIR, - CL_TUNED_PARAMETER_FILE_NAME) - sh_commands.clear_phone_data_dir(serial_num, PHONE_DATA_DIR) - is_tuned = True - elif target_abi != ABIType.host and target_soc: - model_opencl_output_bin_path = get_opencl_binary_output_path( - library_name, target_abi, target_soc, serial_num - ) - model_opencl_parameter_path = get_opencl_parameter_output_path( - library_name, target_abi, target_soc, serial_num - ) - - # generate input data - sh_commands.gen_random_input( - model_output_dir, - subgraphs[0][YAMLKeyword.input_tensors], - subgraphs[0][YAMLKeyword.input_shapes], - subgraphs[0][YAMLKeyword.validation_inputs_data], - input_ranges=subgraphs[0][YAMLKeyword.input_ranges], - input_data_types=subgraphs[0][YAMLKeyword.input_data_types]) - - runtime_list = [] - if target_abi == ABIType.host: - runtime_list.extend([RuntimeType.cpu]) - elif model_runtime == RuntimeType.cpu_gpu: - runtime_list.extend([RuntimeType.cpu, RuntimeType.gpu]) - else: - runtime_list.extend([model_runtime]) - for runtime in runtime_list: - device_type = parse_device_type(runtime) - # run for specified soc - if not subgraphs[0][YAMLKeyword.check_tensors]: - output_nodes = subgraphs[0][YAMLKeyword.output_tensors] - output_shapes = subgraphs[0][YAMLKeyword.output_shapes] - else: - output_nodes = subgraphs[0][YAMLKeyword.check_tensors] - output_shapes = subgraphs[0][YAMLKeyword.check_shapes] - run_output = sh_commands.tuning_run( - abi=target_abi, - serialno=serial_num, - target_dir=build_tmp_binary_dir, - target_name=target_name, - vlog_level=flags.vlog_level, - embed_model_data=embed_model_data, - model_output_dir=model_output_dir, - input_nodes=subgraphs[0][YAMLKeyword.input_tensors], - output_nodes=output_nodes, - input_shapes=subgraphs[0][YAMLKeyword.input_shapes], - output_shapes=output_shapes, - mace_model_dir=mace_model_dir, - model_tag=model_name, - device_type=device_type, - running_round=flags.round, - restart_round=flags.restart_round, - limit_opencl_kernel_time=model_config[YAMLKeyword.limit_opencl_kernel_time], # noqa - tuning=False, - out_of_range_check=flags.gpu_out_of_range_check, - phone_data_dir=PHONE_DATA_DIR, - model_graph_format=configs[YAMLKeyword.model_graph_format], - omp_num_threads=flags.omp_num_threads, - cpu_affinity_policy=flags.cpu_affinity_policy, - gpu_perf_hint=flags.gpu_perf_hint, - gpu_priority_hint=flags.gpu_priority_hint, - input_dir=flags.input_dir, - output_dir=flags.output_dir, - runtime_failure_ratio=flags.runtime_failure_ratio, - address_sanitizer=flags.address_sanitizer, - opencl_binary_file=model_opencl_output_bin_path, - opencl_parameter_file=model_opencl_parameter_path, - libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH, - link_dynamic=link_dynamic, - quantize_stat=flags.quantize_stat, - ) - if flags.validate: - model_file_path, weight_file_path = get_model_files( - model_config[YAMLKeyword.model_file_path], - model_config[YAMLKeyword.model_sha256_checksum], - BUILD_DOWNLOADS_DIR, - model_config[YAMLKeyword.weight_file_path], - model_config[YAMLKeyword.weight_sha256_checksum]) - - validate_type = device_type - if model_config[YAMLKeyword.quantize] == 1 \ - and device_type == DeviceType.CPU: - validate_type = device_type + "_QUANTIZE" - - sh_commands.validate_model( - abi=target_abi, - serialno=serial_num, - model_file_path=model_file_path, - weight_file_path=weight_file_path, - platform=model_config[YAMLKeyword.platform], - device_type=device_type, - input_nodes=subgraphs[0][YAMLKeyword.input_tensors], - output_nodes=output_nodes, - input_shapes=subgraphs[0][YAMLKeyword.input_shapes], - output_shapes=output_shapes, - model_output_dir=model_output_dir, - phone_data_dir=PHONE_DATA_DIR, - input_data_types=subgraphs[0][YAMLKeyword.input_data_types], # noqa - caffe_env=flags.caffe_env, - validation_threshold=subgraphs[0][YAMLKeyword.validation_threshold][validate_type]) # noqa - if flags.report and flags.round > 0: - tuned = is_tuned and device_type == DeviceType.GPU - report_run_statistics( - run_output, target_abi, serial_num, - model_name, device_type, flags.report_dir, - tuned) - - if model_output_dirs: - opencl_output_bin_path = get_opencl_binary_output_path( - library_name, target_abi, target_soc, serial_num - ) - opencl_parameter_bin_path = get_opencl_parameter_output_path( - library_name, target_abi, target_soc, serial_num - ) - # clear opencl output dir - if os.path.exists(opencl_output_bin_path): - sh.rm('-rf', opencl_output_bin_path) - if os.path.exists(opencl_parameter_bin_path): - sh.rm('-rf', opencl_parameter_bin_path) - - # merge all models' OpenCL binaries together - sh_commands.merge_opencl_binaries( - model_output_dirs, CL_COMPILED_BINARY_FILE_NAME, - opencl_output_bin_path) - # merge all models' OpenCL parameters together - sh_commands.merge_opencl_parameters( - model_output_dirs, CL_TUNED_PARAMETER_FILE_NAME, - opencl_parameter_bin_path) - - def print_package_summary(package_path): title = "Library" header = ["key", "value"] @@ -1399,35 +952,38 @@ def run_mace(flags): target_socs = configs[YAMLKeyword.target_socs] if not target_socs or ALL_SOC_TAG in target_socs: - target_socs = sh_commands.adb_get_all_socs() - + device_list = DeviceManager.list_devices(flags.device_yml) + else: + device_list = DeviceManager.list_devices(flags.device_yml) + device_list = [dev for dev in device_list + if dev[YAMLKeyword.target_socs].lower() in target_socs] for target_abi in configs[YAMLKeyword.target_abis]: # build target - if flags.example: - build_example(configs, target_abi, - not flags.disable_openmp, - flags.address_sanitizer, - flags.mace_lib_type) - else: - build_mace_run(configs, target_abi, - not flags.disable_openmp, - flags.address_sanitizer, - flags.mace_lib_type) - - # run - if target_abi == ABIType.host: - run_specific_target(flags, configs, target_abi, None, None) - else: - for target_soc in target_socs: - serial_nums = \ - sh_commands.get_target_socs_serialnos([target_soc]) - mace_check(serial_nums, - ModuleName.RUN, - 'There is no device with soc: ' + target_soc) - for serial_num in serial_nums: - with sh_commands.device_lock(serial_num): - run_specific_target(flags, configs, target_abi, - target_soc, serial_num) + for dev in device_list: + if target_abi in dev[YAMLKeyword.target_abis]: + # get toolchain + toolchain = infer_toolchain(target_abi) + if flags.example: + build_example(configs, + target_abi, + toolchain, + not flags.disable_openmp, + flags.mace_lib_type) + else: + build_mace_run(configs, + target_abi, + toolchain, + not flags.disable_openmp, + flags.address_sanitizer, + flags.mace_lib_type) + # run + device = DeviceWrapper(dev) + with device.lock(): + device.run_specify_abi(flags, configs, target_abi) + elif dev[YAMLKeyword.device_name] != SystemType.host: + six.print_('The device with soc %s do not support abi %s' % + (dev[YAMLKeyword.target_socs], target_abi), + file=sys.stderr) # package the output files package_path = sh_commands.packaging_lib(BUILD_OUTPUT_DIR, @@ -1438,7 +994,11 @@ def run_mace(flags): ################################ # benchmark model ################################ -def build_benchmark_model(configs, target_abi, enable_openmp, mace_lib_type): +def build_benchmark_model(configs, + target_abi, + toolchain, + enable_openmp, + mace_lib_type): library_name = configs[YAMLKeyword.library_name] hexagon_mode = get_hexagon_mode(configs) @@ -1459,6 +1019,7 @@ def build_benchmark_model(configs, target_abi, enable_openmp, mace_lib_type): sh_commands.bazel_build(benchmark_target, abi=target_abi, + toolchain=toolchain, enable_openmp=enable_openmp, enable_opencl=get_opencl_mode(configs), enable_quantize=get_quantize_mode(configs), @@ -1475,105 +1036,6 @@ def build_benchmark_model(configs, target_abi, enable_openmp, mace_lib_type): sh.cp("-f", target_bin, build_tmp_binary_dir) -def bm_specific_target(flags, configs, target_abi, target_soc, serial_num): - library_name = configs[YAMLKeyword.library_name] - embed_model_data = \ - configs[YAMLKeyword.model_data_format] == ModelFormat.code - opencl_output_bin_path = "" - opencl_parameter_path = "" - link_dynamic = flags.mace_lib_type == MACELibType.dynamic - - if link_dynamic: - bm_model_binary_name = BM_MODEL_DYNAMIC_NAME - else: - bm_model_binary_name = BM_MODEL_STATIC_NAME - build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi) - - if configs[YAMLKeyword.target_socs] and target_abi != ABIType.host: - opencl_output_bin_path = get_opencl_binary_output_path( - library_name, target_abi, target_soc, serial_num - ) - opencl_parameter_path = get_opencl_parameter_output_path( - library_name, target_abi, target_soc, serial_num - ) - - for model_name in configs[YAMLKeyword.models]: - check_model_converted(library_name, model_name, - configs[YAMLKeyword.model_graph_format], - configs[YAMLKeyword.model_data_format], - target_abi) - if target_abi == ABIType.host: - device_name = ABIType.host - else: - device_name = \ - sh_commands.adb_get_device_name_by_serialno(serial_num) - MaceLogger.header( - StringFormatter.block( - "Benchmark model %s on %s" % (model_name, device_name))) - model_config = configs[YAMLKeyword.models][model_name] - model_runtime = model_config[YAMLKeyword.runtime] - subgraphs = model_config[YAMLKeyword.subgraphs] - - if not configs[YAMLKeyword.target_socs] or target_abi == ABIType.host: - model_output_base_dir, model_output_dir, mace_model_dir = \ - get_build_model_dirs(library_name, model_name, target_abi, - None, None, - model_config[YAMLKeyword.model_file_path]) - else: - model_output_base_dir, model_output_dir, mace_model_dir = \ - get_build_model_dirs(library_name, model_name, target_abi, - target_soc, serial_num, - model_config[YAMLKeyword.model_file_path]) - if os.path.exists(model_output_dir): - sh.rm("-rf", model_output_dir) - os.makedirs(model_output_dir) - - if target_abi != ABIType.host: - sh_commands.clear_phone_data_dir(serial_num, PHONE_DATA_DIR) - - sh_commands.gen_random_input( - model_output_dir, - subgraphs[0][YAMLKeyword.input_tensors], - subgraphs[0][YAMLKeyword.input_shapes], - subgraphs[0][YAMLKeyword.validation_inputs_data], - input_ranges=subgraphs[0][YAMLKeyword.input_ranges], - input_data_types=subgraphs[0][YAMLKeyword.input_data_types]) - runtime_list = [] - if target_abi == ABIType.host: - runtime_list.extend([RuntimeType.cpu]) - elif model_runtime == RuntimeType.cpu_gpu: - runtime_list.extend([RuntimeType.cpu, RuntimeType.gpu]) - else: - runtime_list.extend([model_runtime]) - for runtime in runtime_list: - device_type = parse_device_type(runtime) - sh_commands.benchmark_model( - abi=target_abi, - serialno=serial_num, - benchmark_binary_dir=build_tmp_binary_dir, - benchmark_binary_name=bm_model_binary_name, - vlog_level=0, - embed_model_data=embed_model_data, - model_output_dir=model_output_dir, - input_nodes=subgraphs[0][YAMLKeyword.input_tensors], - output_nodes=subgraphs[0][YAMLKeyword.output_tensors], - input_shapes=subgraphs[0][YAMLKeyword.input_shapes], - output_shapes=subgraphs[0][YAMLKeyword.output_shapes], - mace_model_dir=mace_model_dir, - model_tag=model_name, - device_type=device_type, - phone_data_dir=PHONE_DATA_DIR, - model_graph_format=configs[YAMLKeyword.model_graph_format], - omp_num_threads=flags.omp_num_threads, - cpu_affinity_policy=flags.cpu_affinity_policy, - gpu_perf_hint=flags.gpu_perf_hint, - gpu_priority_hint=flags.gpu_priority_hint, - opencl_binary_file=opencl_output_bin_path, - opencl_parameter_file=opencl_parameter_path, - libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH, - link_dynamic=link_dynamic) - - def benchmark_model(flags): configs = format_model_config(flags) @@ -1581,27 +1043,30 @@ def benchmark_model(flags): target_socs = configs[YAMLKeyword.target_socs] if not target_socs or ALL_SOC_TAG in target_socs: - target_socs = sh_commands.adb_get_all_socs() + device_list = DeviceManager.list_devices(flags.device_yml) + # target_socs = sh_commands.adb_get_all_socs() + else: + device_list = DeviceManager.list_devices(flags.device_yml) + device_list = [dev for dev in device_list + if dev[YAMLKeyword.target_socs] in target_socs] for target_abi in configs[YAMLKeyword.target_abis]: # build benchmark_model binary - build_benchmark_model(configs, target_abi, - not flags.disable_openmp, - flags.mace_lib_type) - - if target_abi == ABIType.host: - bm_specific_target(flags, configs, target_abi, None, None) - else: - for target_soc in target_socs: - serial_nums = \ - sh_commands.get_target_socs_serialnos([target_soc]) - mace_check(serial_nums, - ModuleName.BENCHMARK, - 'There is no device with soc: ' + target_soc) - for serial_num in serial_nums: - with sh_commands.device_lock(serial_num): - bm_specific_target(flags, configs, target_abi, - target_soc, serial_num) + for dev in device_list: + if target_abi in dev[YAMLKeyword.target_abis]: + toolchain = infer_toolchain(target_abi) + build_benchmark_model(configs, + target_abi, + toolchain, + not flags.disable_openmp, + flags.mace_lib_type) + device = DeviceWrapper(dev) + with device.lock(): + device.bm_specific_target(flags, configs, target_abi) + else: + six.print_('There is no abi %s with soc %s' % + (target_abi, dev[YAMLKeyword.target_socs]), + file=sys.stderr) ################################ @@ -1698,7 +1163,12 @@ def parse_args(): type=int, default=DefaultValues.gpu_priority_hint, help="0:DEFAULT/1:LOW/2:NORMAL/3:HIGH") - + run_bm_parent_parser.add_argument( + "--device_yml", + type=str, + default='', + help='embedded linux device config yml file' + ) parser = argparse.ArgumentParser() subparsers = parser.add_subparsers() convert = subparsers.add_parser( diff --git a/tools/device.py b/tools/device.py new file mode 100644 index 00000000..d04cfa64 --- /dev/null +++ b/tools/device.py @@ -0,0 +1,1004 @@ +# Copyright 2018 Xiaomi, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import socket +import subprocess +import time + +import six +import sh +import yaml + +import common +from common import * + +import sh_commands + + +class DeviceWrapper: + allow_scheme = ('ssh', 'adb') + + def __init__(self, device_dict): + """ + init device with device dict info + :type device_dict: Device + :param device_dict: a key-value dict that holds the device information, + which attribute has: + target_abis, target_socs, models, system, address + password, username + """ + diff = set(device_dict.keys()) - set(YAMLKeyword.__dict__.keys()) + if len(diff) > 0: + six.print_('Wrong key detected: ') + six.print_(diff) + raise KeyError(str(diff)) + self.__dict__.update(device_dict) + if self.system == SystemType.android: + self.data_dir = PHONE_DATA_DIR + self.interior_dir = self.data_dir + '/interior' + elif self.system == SystemType.arm_linux: + try: + sh.ssh('-q', '{}@{}'.format(self.username, self.address), + 'exit') + except sh.ErrorReturnCode as e: + six.print_('device connect failed, ' + 'please check your authentication') + raise e + self.data_dir = DEVICE_DATA_DIR + self.interior_dir = self.data_dir + '/interior' + + ################## + # internal use # + ################## + + def exec_command(self, command, *args, **kwargs): + if self.system == SystemType.android: + sh.adb('-s', self.address, 'shell', command, *args, **kwargs) + elif self.system == SystemType.arm_linux: + sh.ssh('{}@{}'.format(self.username, self.address), + command, *args, **kwargs) + + ##################### + # public interface # + ##################### + + def is_lock(self): + return sh_commands.is_device_locked(self.address) + + def lock(self): + return sh_commands.device_lock(self.address) + + def clear_data_dir(self): + if self.system == SystemType.android: + sh_commands.clear_phone_data_dir(self.address, PHONE_DATA_DIR) + elif self.system == SystemType.arm_linux: + self.exec_command('rm -rf {}'.format(self.data_dir)) + + def pull_from_data_dir(self, filename, dst_path): + if self.system == SystemType.android: + self.pull(PHONE_DATA_DIR, filename, dst_path) + elif self.system == SystemType.arm_linux: + self.pull(DEVICE_DATA_DIR, filename, dst_path) + + def create_internal_storage_dir(self): + internal_storage_dir = '{}/interior/'.format(self.data_dir) + if self.system == SystemType.android: + sh_commands.create_internal_storage_dir(self.address, + internal_storage_dir) + elif self.system == SystemType.arm_linux: + self.exec_command('mkdir -p {}'.format(internal_storage_dir)) + return internal_storage_dir + + def rm(self, file): + if self.system == SystemType.android: + sh.adb('-s', self.address, 'shell', 'rm', '-rf', file, _fg=True) + elif self.system == SystemType.arm_linux: + self.exec_command('rm -rf {}'.format(file), _fg=True) + + def push(self, src_path, dst_path): + mace_check(os.path.exists(src_path), "Device", + '{} not found'.format(src_path)) + if self.system == SystemType.android: + sh_commands.adb_push(src_path, dst_path, self.address) + elif self.system == SystemType.arm_linux: + try: + sh.scp(src_path, '{}@{}:{}'.format(self.username, + self.address, + dst_path)) + except sh.ErrorReturnCode_1 as e: + six.print_('Push Failed !', e, file=sys.stderr) + raise e + + def pull(self, src_path, file_name, dst_path='.'): + if not os.path.exists(dst_path): + sh.mkdir("-p", dst_path) + src_file = "%s/%s" % (src_path, file_name) + dst_file = "%s/%s" % (dst_path, file_name) + if os.path.exists(dst_file): + sh.rm('-f', dst_file) + if self.system == SystemType.android: + sh_commands.adb_pull( + src_file, dst_file, self.address) + elif self.system == SystemType.arm_linux: + try: + sh.scp('-r', '%s@%s:%s' % (self.username, + self.address, + src_file), + dst_file) + print("pull file ", src_path, dst_path) + except sh.ErrorReturnCode_1 as e: + six.print_("Pull Failed !", file=sys.stderr) + raise e + + def tuning_run(self, + abi, + target_dir, + target_name, + vlog_level, + embed_model_data, + model_output_dir, + input_nodes, + output_nodes, + input_shapes, + output_shapes, + mace_model_dir, + model_tag, + device_type, + running_round, + restart_round, + limit_opencl_kernel_time, + tuning, + out_of_range_check, + model_graph_format, + opencl_binary_file, + opencl_parameter_file, + libmace_dynamic_library_path, + omp_num_threads=-1, + cpu_affinity_policy=1, + gpu_perf_hint=3, + gpu_priority_hint=3, + input_file_name='model_input', + output_file_name='model_out', + runtime_failure_ratio=0.0, + address_sanitizer=False, + link_dynamic=False + ): + six.print_("* Run '%s' with round=%s, restart_round=%s, tuning=%s, " + "out_of_range_check=%s, omp_num_threads=%s, " + "cpu_affinity_policy=%s, gpu_perf_hint=%s, " + "gpu_priority_hint=%s" % + (model_tag, running_round, restart_round, str(tuning), + str(out_of_range_check), omp_num_threads, + cpu_affinity_policy, gpu_perf_hint, gpu_priority_hint)) + mace_model_path = "" + if model_graph_format == ModelFormat.file: + mace_model_path = "%s/%s.pb" % (mace_model_dir, model_tag) + if self.system == SystemType.host: + libmace_dynamic_lib_path = \ + os.path.dirname(libmace_dynamic_library_path) + p = subprocess.Popen( + [ + "env", + "LD_LIBRARY_PATH=%s" % libmace_dynamic_lib_path, + "MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level, + "MACE_RUNTIME_FAILURE_RATIO=%f" % runtime_failure_ratio, + "%s/%s" % (target_dir, target_name), + "--model_name=%s" % model_tag, + "--input_node=%s" % ",".join(input_nodes), + "--output_node=%s" % ",".join(output_nodes), + "--input_shape=%s" % ":".join(input_shapes), + "--output_shape=%s" % ":".join(output_shapes), + "--input_file=%s/%s" % (model_output_dir, + input_file_name), + "--output_file=%s/%s" % (model_output_dir, + output_file_name), + "--model_data_file=%s/%s.data" % (mace_model_dir, + model_tag), + "--device=%s" % device_type, + "--round=%s" % running_round, + "--restart_round=%s" % restart_round, + "--omp_num_threads=%s" % omp_num_threads, + "--cpu_affinity_policy=%s" % cpu_affinity_policy, + "--gpu_perf_hint=%s" % gpu_perf_hint, + "--gpu_priority_hint=%s" % gpu_priority_hint, + "--model_file=%s" % mace_model_path, + ], + stderr=subprocess.PIPE, + stdout=subprocess.PIPE) + out, err = p.communicate() + self.stdout = err + out + six.print_(self.stdout) + six.print_("Running finished!\n") + elif self.system in [SystemType.android, SystemType.arm_linux]: + self.rm(self.data_dir) + self.exec_command('mkdir -p {}'.format(self.data_dir)) + internal_storage_dir = self.create_internal_storage_dir() + + for input_name in input_nodes: + formatted_name = common.formatted_file_name(input_file_name, + input_name) + self.push("%s/%s" % (model_output_dir, formatted_name), + self.data_dir) + if self.system == SystemType.android and address_sanitizer: + self.push(sh_commands.find_asan_rt_library(abi), + self.data_dir) + + if not embed_model_data: + model_data_path = "%s/%s.data" % (mace_model_dir, model_tag) + mace_check(os.path.exists(model_data_path), "Device", + 'model data file not found,' + ' please convert model first') + self.push(model_data_path, self.data_dir) + + if device_type == common.DeviceType.GPU: + if os.path.exists(opencl_binary_file): + self.push(opencl_binary_file, self.data_dir) + if os.path.exists(opencl_parameter_file): + self.push(opencl_parameter_file, self.data_dir) + + self.push("third_party/nnlib/libhexagon_controller.so", + self.data_dir) + + mace_model_phone_path = "" + if model_graph_format == ModelFormat.file: + mace_model_phone_path = "%s/%s.pb" % (self.data_dir, + model_tag) + self.push(mace_model_path, + mace_model_phone_path) + if link_dynamic: + self.push(libmace_dynamic_library_path, self.data_dir) + self.push("%s/%s" % (target_dir, target_name), self.data_dir) + + stdout_buff = [] + process_output = sh_commands.make_output_processor(stdout_buff) + cmd = [ + "LD_LIBRARY_PATH=%s" % self.data_dir, + "MACE_TUNING=%s" % int(tuning), + "MACE_OUT_OF_RANGE_CHECK=%s" % int(out_of_range_check), + "MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level, + "MACE_RUN_PARAMETER_PATH=%s/mace_run.config" % self.data_dir, + "MACE_INTERNAL_STORAGE_PATH=%s" % internal_storage_dir, + "MACE_LIMIT_OPENCL_KERNEL_TIME=%s" % limit_opencl_kernel_time, + "MACE_RUNTIME_FAILURE_RATIO=%f" % runtime_failure_ratio, + ] + if self.system == SystemType.android and address_sanitizer: + cmd.extend([ + "LD_PRELOAD=%s/%s" % + (self.data_dir, + sh_commands.asan_rt_library_names(abi)) + ]) + cmd.extend([ + "%s/%s" % (self.data_dir, target_name), + "--model_name=%s" % model_tag, + "--input_node=%s" % ",".join(input_nodes), + "--output_node=%s" % ",".join(output_nodes), + "--input_shape=%s" % ":".join(input_shapes), + "--output_shape=%s" % ":".join(output_shapes), + "--input_file=%s/%s" % (self.data_dir, input_file_name), + "--output_file=%s/%s" % (self.data_dir, output_file_name), + "--model_data_file=%s/%s.data" % (self.data_dir, model_tag), + "--device=%s" % device_type, + "--round=%s" % running_round, + "--restart_round=%s" % restart_round, + "--omp_num_threads=%s" % omp_num_threads, + "--cpu_affinity_policy=%s" % cpu_affinity_policy, + "--gpu_perf_hint=%s" % gpu_perf_hint, + "--gpu_priority_hint=%s" % gpu_priority_hint, + "--model_file=%s" % mace_model_phone_path, + "--opencl_binary_file=%s/%s" % + (self.data_dir, os.path.basename(opencl_binary_file)), + "--opencl_parameter_file=%s/%s" % + (self.data_dir, os.path.basename(opencl_parameter_file)), + ]) + cmd = ' '.join(cmd) + cmd_file_name = "%s-%s-%s" % ('cmd_file', + model_tag, + str(time.time())) + cmd_file = "%s/%s" % (self.data_dir, cmd_file_name) + tmp_cmd_file = "%s/%s" % ('/tmp', cmd_file_name) + with open(tmp_cmd_file, 'w') as file: + file.write(cmd) + self.push(tmp_cmd_file, cmd_file) + os.remove(tmp_cmd_file) + self.exec_command('sh {}'.format(cmd_file), + _tty_in=True, + _out=process_output, + _err_to_out=True) + self.stdout = "".join(stdout_buff) + if not sh_commands.stdout_success(self.stdout): + common.MaceLogger.error("Mace Run", "Mace run failed.") + + six.print_("Running finished!\n") + else: + six.print_('Unsupported system %s' % self.system, file=sys.stderr) + raise Exception('Wrong device') + + return self.stdout + + def tuning(self, library_name, model_name, model_config, + model_graph_format, model_data_format, + target_abi, mace_lib_type): + six.print_('* Tuning, it may take some time') + build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi) + mace_run_name = MACE_RUN_STATIC_NAME + link_dynamic = False + if mace_lib_type == MACELibType.dynamic: + mace_run_name = MACE_RUN_DYNAMIC_NAME + link_dynamic = True + embed_model_data = model_data_format == ModelFormat.code + + # build for specified soc + # device_wrapper = DeviceWrapper(device) + + model_output_base_dir, model_output_dir, mace_model_dir = \ + get_build_model_dirs( + library_name, model_name, target_abi, self, + model_config[YAMLKeyword.model_file_path]) + + self.clear_data_dir() + + subgraphs = model_config[YAMLKeyword.subgraphs] + # generate input data + sh_commands.gen_random_input( + model_output_dir, + subgraphs[0][YAMLKeyword.input_tensors], + subgraphs[0][YAMLKeyword.input_shapes], + subgraphs[0][YAMLKeyword.validation_inputs_data], + input_ranges=subgraphs[0][YAMLKeyword.input_ranges], + input_data_types=subgraphs[0][YAMLKeyword.input_data_types] + ) + + self.tuning_run( + abi=target_abi, + target_dir=build_tmp_binary_dir, + target_name=mace_run_name, + vlog_level=0, + embed_model_data=embed_model_data, + model_output_dir=model_output_dir, + input_nodes=subgraphs[0][YAMLKeyword.input_tensors], + output_nodes=subgraphs[0][YAMLKeyword.output_tensors], + input_shapes=subgraphs[0][YAMLKeyword.input_shapes], + output_shapes=subgraphs[0][YAMLKeyword.output_shapes], + mace_model_dir=mace_model_dir, + model_tag=model_name, + device_type=DeviceType.GPU, + running_round=0, + restart_round=1, + limit_opencl_kernel_time=model_config[ + YAMLKeyword.limit_opencl_kernel_time], + tuning=True, + out_of_range_check=False, + model_graph_format=model_graph_format, + opencl_binary_file='', + opencl_parameter_file='', + libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH, + link_dynamic=link_dynamic, + ) + + # pull opencl library + self.pull(self.interior_dir, CL_COMPILED_BINARY_FILE_NAME, + '{}/{}'.format(model_output_dir, + BUILD_TMP_OPENCL_BIN_DIR)) + + # pull opencl parameter + self.pull_from_data_dir(CL_TUNED_PARAMETER_FILE_NAME, + '{}/{}'.format(model_output_dir, + BUILD_TMP_OPENCL_BIN_DIR)) + + six.print_('Tuning done! \n') + + def run_specify_abi(self, flags, configs, target_abi): + if target_abi not in self.target_abis: + six.print_('There is no device with soc: %s abi: %s' % + (self.target_socs, target_abi)) + return + library_name = configs[YAMLKeyword.library_name] + mace_lib_type = flags.mace_lib_type + embed_model_data = \ + configs[YAMLKeyword.model_data_format] == ModelFormat.code + build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi) + + # get target name for run + if flags.example: + if mace_lib_type == MACELibType.static: + target_name = EXAMPLE_STATIC_NAME + else: + target_name = EXAMPLE_DYNAMIC_NAME + else: + if mace_lib_type == MACELibType.static: + target_name = MACE_RUN_STATIC_NAME + else: + target_name = MACE_RUN_DYNAMIC_NAME + link_dynamic = mace_lib_type == MACELibType.dynamic + model_output_dirs = [] + + for model_name in configs[YAMLKeyword.models]: + check_model_converted(library_name, model_name, + configs[YAMLKeyword.model_graph_format], + configs[YAMLKeyword.model_data_format], + target_abi) + if target_abi == ABIType.host: + device_model = ABIType.host + else: + device_model = self.models + self.clear_data_dir() + MaceLogger.header( + StringFormatter.block( + 'Run model {} on {}'.format(model_name, device_model))) + + model_config = configs[YAMLKeyword.models][model_name] + model_runtime = model_config[YAMLKeyword.runtime] + subgraphs = model_config[YAMLKeyword.subgraphs] + + if not configs[YAMLKeyword.target_socs] \ + or target_abi == ABIType.host: + model_output_base_dir, model_output_dir, mace_model_dir = \ + get_build_model_dirs( + library_name, model_name, target_abi, self, + model_config[YAMLKeyword.model_file_path]) + else: + model_output_base_dir, model_output_dir, mace_model_dir = \ + get_build_model_dirs( + library_name, model_name, target_abi, self, + model_config[YAMLKeyword.model_file_path]) + + # clear temp model output dir + if os.path.exists(model_output_dir): + sh.rm('-rf', model_output_dir) + os.makedirs(model_output_dir) + + is_tuned = False + model_opencl_output_bin_path = '' + model_opencl_parameter_path = '' + if not flags.address_sanitizer \ + and not flags.example \ + and target_abi != ABIType.host \ + and configs[YAMLKeyword.target_socs] \ + and self.target_socs \ + and model_runtime in [RuntimeType.gpu, + RuntimeType.cpu_gpu] \ + and not flags.disable_tuning: + self.tuning(library_name, model_name, model_config, + configs[YAMLKeyword.model_graph_format], + configs[YAMLKeyword.model_data_format], + target_abi, mace_lib_type) + model_output_dirs.append(model_output_dir) + model_opencl_output_bin_path = \ + '{}/{}/{}'.format(model_output_dir, + BUILD_TMP_OPENCL_BIN_DIR, + CL_COMPILED_BINARY_FILE_NAME) + model_opencl_parameter_path = \ + '{}/{}/{}'.format(model_output_dir, + BUILD_TMP_OPENCL_BIN_DIR, + CL_TUNED_PARAMETER_FILE_NAME) + self.clear_data_dir() + is_tuned = True + elif target_abi != ABIType.host and self.target_socs: + model_opencl_output_bin_path = get_opencl_binary_output_path( + library_name, target_abi, self + ) + model_opencl_parameter_path = get_opencl_parameter_output_path( + library_name, target_abi, self + ) + sh_commands.gen_random_input( + model_output_dir, + subgraphs[0][YAMLKeyword.input_tensors], + subgraphs[0][YAMLKeyword.input_shapes], + subgraphs[0][YAMLKeyword.validation_inputs_data], + input_ranges=subgraphs[0][YAMLKeyword.input_ranges], + input_data_types=subgraphs[0][YAMLKeyword.input_data_types] + ) + runtime_list = [] + if target_abi == ABIType.host: + runtime_list.append(RuntimeType.cpu) + elif model_runtime == RuntimeType.cpu_gpu: + runtime_list.extend([RuntimeType.cpu, RuntimeType.gpu]) + else: + runtime_list.append(model_runtime) + for runtime in runtime_list: + device_type = parse_device_type(runtime) + # run for specified soc + run_output = self.tuning_run( + abi=target_abi, + target_dir=build_tmp_binary_dir, + target_name=target_name, + vlog_level=flags.vlog_level, + embed_model_data=embed_model_data, + model_output_dir=model_output_dir, + input_nodes=subgraphs[0][YAMLKeyword.input_tensors], + output_nodes=subgraphs[0][YAMLKeyword.output_tensors], + input_shapes=subgraphs[0][YAMLKeyword.input_shapes], + output_shapes=subgraphs[0][YAMLKeyword.output_shapes], + mace_model_dir=mace_model_dir, + model_tag=model_name, + device_type=device_type, + running_round=flags.round, + restart_round=flags.restart_round, + limit_opencl_kernel_time=model_config[ + YAMLKeyword.limit_opencl_kernel_time], + tuning=False, + out_of_range_check=flags.gpu_out_of_range_check, + model_graph_format=configs[YAMLKeyword.model_graph_format], + omp_num_threads=flags.omp_num_threads, + cpu_affinity_policy=flags.cpu_affinity_policy, + gpu_perf_hint=flags.gpu_perf_hint, + gpu_priority_hint=flags.gpu_priority_hint, + runtime_failure_ratio=flags.runtime_failure_ratio, + address_sanitizer=flags.address_sanitizer, + opencl_binary_file=model_opencl_output_bin_path, + opencl_parameter_file=model_opencl_parameter_path, + libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH, + link_dynamic=link_dynamic + ) + if flags.validate: + model_file_path, weight_file_path = get_model_files( + model_config[YAMLKeyword.model_file_path], + model_config[YAMLKeyword.model_sha256_checksum], + BUILD_DOWNLOADS_DIR, + model_config[YAMLKeyword.weight_file_path], + model_config[YAMLKeyword.weight_sha256_checksum] + ) + + validate_type = device_type + if model_config[YAMLKeyword.quantize] == 1: + validate_type = device_type + '_QUANTIZE' + sh_commands.validate_model( + abi=target_abi, + device=self, + model_file_path=model_file_path, + weight_file_path=weight_file_path, + platform=model_config[YAMLKeyword.platform], + device_type=device_type, + input_nodes=subgraphs[0][YAMLKeyword.input_tensors], + output_nodes=subgraphs[0][YAMLKeyword.output_tensors], + input_shapes=subgraphs[0][YAMLKeyword.input_shapes], + output_shapes=subgraphs[0][YAMLKeyword.output_shapes], + model_output_dir=model_output_dir, + input_data_types=subgraphs[0][ + YAMLKeyword.input_data_types], + caffe_env=flags.caffe_env, + validation_threshold=subgraphs[0][ + YAMLKeyword.validation_threshold][validate_type] + ) + if flags.report and flags.round > 0: + tuned = is_tuned and device_type == DeviceType.GPU + self.report_run_statistics( + target_abi=target_abi, + model_name=model_name, + device_type=device_type, + output_dir=flags.report_dir, + tuned=tuned + ) + if model_output_dirs: + opencl_output_bin_path = get_opencl_binary_output_path( + library_name, target_abi, self + ) + opencl_parameter_bin_path = get_opencl_parameter_output_path( + library_name, target_abi, self + ) + + # clear opencl output dir + if os.path.exists(opencl_output_bin_path): + sh.rm('-rf', opencl_output_bin_path) + if os.path.exists(opencl_parameter_bin_path): + sh.rm('-rf', opencl_parameter_bin_path) + + # merge all model's opencl binaries together + sh_commands.merge_opencl_binaries( + model_output_dirs, CL_COMPILED_BINARY_FILE_NAME, + opencl_output_bin_path + ) + # merge all model's opencl parameter together + sh_commands.merge_opencl_parameters( + model_output_dirs, CL_TUNED_PARAMETER_FILE_NAME, + opencl_parameter_bin_path + ) + + def report_run_statistics(self, + target_abi, + model_name, + device_type, + output_dir, + tuned): + metrics = [0] * 3 + for line in self.stdout.split('\n'): + line = line.strip() + parts = line.split() + if len(parts) == 4 and parts[0].startswith('time'): + metrics[0] = str(float(parts[1])) + metrics[1] = str(float(parts[2])) + metrics[2] = str(float(parts[3])) + break + report_filename = output_dir + '/report.csv' + if not os.path.exists(report_filename): + with open(report_filename, 'w') as f: + f.write('model_name,device_name,soc,abi,runtime,' + 'init(ms),warmup(ms),run_avg(ms),tuned\n') + + data_str = '{model_name},{device_name},{soc},{abi},{device_type},' \ + '{init},{warmup},{run_avg},{tuned}\n'.format( + model_name=model_name, + device_name=self.models, + soc=self.target_socs, + abi=target_abi, + device_type=device_type, + init=metrics[0], + warmup=metrics[1], + run_avg=metrics[2], + tuned=tuned) + with open(report_filename, 'a') as f: + f.write(data_str) + + def benchmark_model(self, + abi, + benchmark_binary_dir, + benchmark_binary_name, + vlog_level, + embed_model_data, + model_output_dir, + mace_model_dir, + input_nodes, + output_nodes, + input_shapes, + output_shapes, + model_tag, + device_type, + model_graph_format, + opencl_binary_file, + opencl_parameter_file, + libmace_dynamic_library_path, + omp_num_threads=-1, + cpu_affinity_policy=1, + gpu_perf_hint=3, + gpu_priority_hint=3, + input_file_name='model_input', + link_dynamic=False): + six.print_('* Benchmark for %s' % model_tag) + + mace_model_path = '' + if model_graph_format == ModelFormat.file: + mace_model_path = '%s/%s.pb' % (mace_model_dir, model_tag) + if abi == 'host': + libmace_dynamic_lib_dir_path = \ + os.path.dirname(libmace_dynamic_library_path) + p = subprocess.Popen( + [ + 'env', + 'LD_LIBRARY_PATH=%s' % libmace_dynamic_lib_dir_path, + 'MACE_CPP_MIN_VLOG_LEVEL=%s' % vlog_level, + '%s/%s' % (benchmark_binary_dir, benchmark_binary_name), + '--model_name=%s' % model_tag, + '--input_node=%s' % ','.join(input_nodes), + '--output_node=%s' % ','.join(output_nodes), + '--input_shape=%s' % ':'.join(input_shapes), + '--output_shapes=%s' % ':'.join(output_shapes), + '--input_file=%s/%s' % (model_output_dir, input_file_name), + '--model_data_file=%s/%s.data' % (mace_model_dir, + model_tag), + '--device=%s' % device_type, + '--omp_num_threads=%s' % omp_num_threads, + '--cpu_addinity_policy=%s' % cpu_affinity_policy, + '--gpu_perf_hint=%s' % gpu_perf_hint, + '--gpu_priority_hint=%s' % gpu_priority_hint, + '--model_file=%s' % mace_model_path + ]) + p.wait() + elif self.system in [SystemType.android, SystemType.arm_linux]: + self.exec_command('mkdir -p %s' % self.data_dir) + internal_storage_dir = self.create_internal_storage_dir() + for input_name in input_nodes: + formatted_name = formatted_file_name(input_file_name, + input_name) + self.push('%s/%s' % (model_output_dir, formatted_name), + self.data_dir) + if not embed_model_data: + self.push('%s/%s.data' % (mace_model_dir, model_tag), + self.data_dir) + if device_type == common.DeviceType.GPU: + if os.path.exists(opencl_binary_file): + self.push(opencl_binary_file, self.data_dir) + if os.path.exists(opencl_parameter_file): + self.push(opencl_parameter_file, self.data_dir) + mace_model_device_path = '' + if model_graph_format == ModelFormat.file: + mace_model_device_path = '%s/%s.pb' % \ + (self.data_dir, model_tag) + self.push(mace_model_path, mace_model_device_path) + if link_dynamic: + self.push(libmace_dynamic_library_path, self.data_dir) + self.rm('%s/%s' % (self.data_dir, benchmark_binary_name)) + self.push('%s/%s' % (benchmark_binary_dir, benchmark_binary_name), + self.data_dir) + + cmd = [ + 'LD_LIBRARY_PATH=%s' % self.data_dir, + 'MACE_CPP_MIN_VLOG_LEVEL=%s' % vlog_level, + 'MACE_RUN_PARAMETER_PATH=%s/mace_run.config' % self.data_dir, + 'MACE_INTERNAL_STORAGE_PATH=%s' % internal_storage_dir, + 'MACE_OPENCL_PROFILING=1', + '%s/%s' % (self.data_dir, benchmark_binary_name), + '--model_name=%s' % model_tag, + '--input_node=%s' % ','.join(input_nodes), + '--output_node=%s' % ','.join(output_nodes), + '--input_shape=%s' % ':'.join(input_shapes), + '--output_shape=%s' % ':'.join(output_shapes), + '--input_file=%s/%s' % (self.data_dir, input_file_name), + '--model_data_file=%s/%s.data' % (self.data_dir, model_tag), + '--device=%s' % device_type, + '--omp_num_threads=%s' % omp_num_threads, + '--cpu_affinity_policy=%s' % cpu_affinity_policy, + '--gpu_perf_hint=%s' % gpu_perf_hint, + '--gpu_priority_hint=%s' % gpu_priority_hint, + '--model_file=%s' % mace_model_device_path, + '--opencl_binary_file=%s/%s' % + (self.data_dir, os.path.basename(opencl_binary_file)), + '--opencl_parameter_file=%s/%s' % + (self.data_dir, os.path.basename(opencl_parameter_file)) + ] + + cmd = ' '.join(cmd) + cmd_file_name = '%s-%s-%s' % \ + ('cmd_file', model_tag, str(time.time())) + + cmd_file_path = '%s/%s' % (self.data_dir, cmd_file_name) + tmp_cmd_file = '%s/%s' % ('/tmp', cmd_file_name) + with open(tmp_cmd_file, 'w') as f: + f.write(cmd) + self.push(tmp_cmd_file, cmd_file_path) + os.remove(tmp_cmd_file) + + if self.system == SystemType.android: + sh.adb( + '-s', + self.address, + 'shell', + 'sh', + cmd_file_path, + _fg=True + ) + elif self.system == SystemType.arm_linux: + sh.ssh('%s@%s' % (self.username, self.address), + 'sh', + cmd_file_path, + _fg=True) + self.rm(cmd_file_path) + six.print_('Benchmark done! \n') + + def bm_specific_target(self, flags, configs, target_abi): + library_name = configs[YAMLKeyword.library_name] + embed_model_data = \ + configs[YAMLKeyword.model_data_format] == ModelFormat.code + opencl_output_bin_path = '' + opencl_parameter_path = '' + link_dynamic = flags.mace_lib_type == MACELibType.dynamic + + if link_dynamic: + bm_model_binary_name = BM_MODEL_DYNAMIC_NAME + else: + bm_model_binary_name = BM_MODEL_STATIC_NAME + build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi) + if configs[YAMLKeyword.target_socs] and target_abi != ABIType.host: + opencl_output_bin_path = get_opencl_binary_output_path( + library_name, target_abi, self + ) + opencl_parameter_path = get_opencl_parameter_output_path( + library_name, target_abi, self + ) + + for model_name in configs[YAMLKeyword.models]: + check_model_converted(library_name, + model_name, + configs[YAMLKeyword.model_graph_format], + configs[YAMLKeyword.model_data_format], + target_abi) + if target_abi == ABIType.host: + device_name = ABIType.host + else: + device_name = self.models + MaceLogger.header( + StringFormatter.block( + 'Benchmark model %s on %s' % (model_name, device_name))) + model_config = configs[YAMLKeyword.models][model_name] + model_runtime = model_config[YAMLKeyword.runtime] + subgraphs = model_config[YAMLKeyword.subgraphs] + + model_output_base_dir, model_output_dir, mace_model_dir = \ + get_build_model_dirs(library_name, model_name, + target_abi, self, + model_config[YAMLKeyword.model_file_path]) + if os.path.exists(model_output_dir): + sh.rm('-rf', model_output_dir) + os.makedirs(model_output_dir) + + if target_abi != ABIType.host: + self.clear_data_dir() + sh_commands.gen_random_input( + model_output_dir, + subgraphs[0][YAMLKeyword.input_tensors], + subgraphs[0][YAMLKeyword.input_shapes], + subgraphs[0][YAMLKeyword.validation_inputs_data], + input_ranges=subgraphs[0][YAMLKeyword.input_ranges], + input_data_types=subgraphs[0][YAMLKeyword.input_data_types] + ) + runtime_list = [] + if target_abi == ABIType.host: + runtime_list.append(RuntimeType.cpu) + elif model_runtime == RuntimeType.cpu_gpu: + runtime_list.extend([RuntimeType.cpu, RuntimeType.cpu_gpu]) + else: + runtime_list.append(model_runtime) + for runtime in runtime_list: + device_type = parse_device_type(runtime) + self.benchmark_model( + abi=target_abi, + benchmark_binary_dir=build_tmp_binary_dir, + benchmark_binary_name=bm_model_binary_name, + vlog_level=0, + embed_model_data=embed_model_data, + model_output_dir=model_output_dir, + input_nodes=subgraphs[0][YAMLKeyword.input_tensors], + output_nodes=subgraphs[0][YAMLKeyword.output_tensors], + input_shapes=subgraphs[0][YAMLKeyword.input_shapes], + output_shapes=subgraphs[0][YAMLKeyword.output_shapes], + mace_model_dir=mace_model_dir, + model_tag=model_name, + device_type=device_type, + model_graph_format=configs[YAMLKeyword.model_graph_format], + omp_num_threads=flags.omp_num_threads, + cpu_affinity_policy=flags.cpu_affinity_policy, + gpu_perf_hint=flags.gpu_perf_hint, + gpu_priority_hint=flags.gpu_priority_hint, + opencl_binary_file=opencl_output_bin_path, + opencl_parameter_file=opencl_parameter_path, + libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH, + link_dynamic=link_dynamic + ) + + def run(self, + abi, + host_bin_path, + bin_name, + args='', + opencl_profiling=True, + vlog_level=0, + out_of_range_check=True, + address_sanitizer=False, + simpleperf=False): + host_bin_full_path = '%s/%s' % (host_bin_path, bin_name) + device_bin_full_path = '%s/%s' % (self.data_dir, bin_name) + print( + '================================================================' + ) + print('Trying to lock device %s' % self.address) + with self.lock(): + print('Run on device: %s, %s, %s' % + (self.address, self.target_socs, self.models)) + self.rm(self.data_dir) + self.exec_command('mkdir -p %s' % self.data_dir) + self.push(host_bin_full_path, device_bin_full_path) + ld_preload = '' + if address_sanitizer: + self.push(sh_commands.find_asan_rt_library(abi), + self.data_dir) + ld_preload = 'LD_PRELOAD=%s/%s' % \ + (self.data_dir, + sh_commands.asan_rt_library_names(abi)) + opencl_profiling = 1 if opencl_profiling else 0 + out_of_range_check = 1 if out_of_range_check else 0 + print('Run %s' % device_bin_full_path) + stdout_buf = [] + process_output = sh_commands.make_output_processor(stdout_buf) + + if simpleperf and self.system == SystemType.android: + self.push(sh_commands.find_simpleperf_library(abi), + self.data_dir) + simpleperf_cmd = '%s/simpleperf' % self.data_dir + exec_cmd = [ + ld_preload, + 'MACE_OUT_OF_RANGE_CHECK=%s' % out_of_range_check, + 'MACE_OPENCL_PROFILING=%d' % opencl_profiling, + 'MACE_CPP_MIN_VLOG_LEVEL=%d' % vlog_level, + simpleperf_cmd, + 'stat', + '--group', + 'raw-l1-dcache,raw-l1-dcache-refill', + '--group', + 'raw-l2-dcache,raw-l2-dcache-refill', + '--group', + 'raw-l1-dtlb,raw-l1-dtlb-refill', + '--group', + 'raw-l2-dtlb,raw-l2-dtlb-refill', + device_bin_full_path, + args, + ] + else: + exec_cmd = [ + ld_preload, + 'MACE_OUT_OF_RANGE_CHECK=%d' % out_of_range_check, + 'MACE_OPENCL_PROFILNG=%d' % opencl_profiling, + 'MACE_CPP_MIN_VLOG_LEVEL=%d' % vlog_level, + device_bin_full_path, + args + ] + exec_cmd = ' '.join(exec_cmd) + self.exec_command(exec_cmd, _tty_in=True, + _out=process_output, _err_to_out=True) + return ''.join(stdout_buf) + + +class DeviceManager: + @classmethod + def list_adb_device(cls): + adb_list = sh.adb('devices').stdout.decode('utf-8'). \ + strip().split('\n')[1:] + adb_list = [tuple(pair.split('\t')) for pair in adb_list] + devices = [] + for adb in adb_list: + prop = sh_commands.adb_getprop_by_serialno(adb[0]) + android = { + YAMLKeyword.device_name: adb[1], + YAMLKeyword.target_abis: + prop['ro.product.cpu.abilist'].split(','), + YAMLKeyword.target_socs: prop['ro.board.platform'], + YAMLKeyword.models: prop['ro.product.model'].replace(' ', '_'), + YAMLKeyword.system: SystemType.android, + YAMLKeyword.address: adb[0], + YAMLKeyword.username: '', + } + devices.append(android) + return devices + + @classmethod + def list_ssh_device(cls, yml): + with open(yml) as f: + devices = yaml.load(f.read()) + devices = devices['devices'] + device_list = [] + for name, dev in six.iteritems(devices): + dev[YAMLKeyword.device_name] = name + dev[YAMLKeyword.system] = SystemType.arm_linux + dev[YAMLKeyword.models] = dev[YAMLKeyword.models].replace(' ', '_') + device_list.append(dev) + return device_list + + @classmethod + def list_devices(cls, yml): + devices_list = [] + devices_list.extend(cls.list_adb_device()) + if not yml: + if os.path.exists('devices.yml'): + devices_list.extend(cls.list_ssh_device('devices.yml')) + else: + if os.path.exists(yml): + devices_list.extend(cls.list_ssh_device(yml)) + else: + MaceLogger.error(ModuleName.RUN, + 'no ARM linux device config file found') + host = { + YAMLKeyword.device_name: SystemType.host, + YAMLKeyword.target_abis: [ABIType.host], + YAMLKeyword.target_socs: '', + YAMLKeyword.system: SystemType.host, + YAMLKeyword.models: None, + YAMLKeyword.address: None, + + } + devices_list.append(host) + return devices_list + + +if __name__ == '__main__': + pass diff --git a/tools/image/image_to_tensor.py b/tools/image/image_to_tensor.py index d39c07c3..8dabe6db 100644 --- a/tools/image/image_to_tensor.py +++ b/tools/image/image_to_tensor.py @@ -1,6 +1,9 @@ import argparse import os import sys + +import six + import tensorflow as tf # TODO(liyin): use dataset api and estimator with distributed strategy @@ -70,7 +73,7 @@ def images_to_tensors(input_files, image_shape, mean_values=None): def main(unused_args): if not os.path.exists(FLAGS.input): - print ("input does not exist: %s" % FLAGS.input) + print("input does not exist: %s" % FLAGS.input) sys.exit(-1) input_files = [] diff --git a/tools/image/tensor_to_image.py b/tools/image/tensor_to_image.py index ce18628e..e04dde7e 100644 --- a/tools/image/tensor_to_image.py +++ b/tools/image/tensor_to_image.py @@ -1,6 +1,9 @@ import argparse import os import sys + +import six + import numpy as np import tensorflow as tf @@ -53,7 +56,7 @@ def tensors_to_images(input_files, image_shape): def main(unused_args): if not os.path.exists(FLAGS.input): - print ("input does not exist: %s" % FLAGS.input) + print("input does not exist: %s" % FLAGS.input) sys.exit(-1) input_files = [] diff --git a/tools/sh_commands.py b/tools/sh_commands.py index 601f5b2c..38f2b60a 100644 --- a/tools/sh_commands.py +++ b/tools/sh_commands.py @@ -23,13 +23,16 @@ import struct import subprocess import sys import time -import urllib import platform -from enum import Enum import six import common +from common import ModelFormat +from common import ABIType +from common import SystemType +from common import YAMLKeyword +from common import abi_to_internal sys.path.insert(0, "mace/python/tools") try: @@ -89,11 +92,6 @@ class BuildType(object): code = 'code' -class ModelFormat(object): - file = 'file' - code = 'code' - - def stdout_success(stdout): stdout_lines = stdout.split("\n") for line in stdout_lines: @@ -190,7 +188,7 @@ def adb_pull(src_path, dst_path, serialno): try: sh.adb("-s", serialno, "pull", src_path, dst_path) except Exception as e: - six.print_("Error msg: %s" % e.stderr) + six.print_("Error msg: %s" % e, file=sys.stderr) def adb_run(abi, @@ -293,7 +291,7 @@ def find_asan_rt_library(abi, asan_rt_path=''): if len(candidates) == 0: common.MaceLogger.error( "Toolchain", - "Can't find AddressSanitizer runtime library in % s" % + "Can't find AddressSanitizer runtime library in %s" % find_path) elif len(candidates) > 1: common.MaceLogger.info( @@ -338,6 +336,7 @@ def find_simpleperf_library(abi, simpleperf_path=''): ################################ def bazel_build(target, abi="armeabi-v7a", + toolchain='android', hexagon_mode=False, enable_openmp=True, enable_neon=True, @@ -361,8 +360,8 @@ def bazel_build(target, "build", target, "--config", - "android", - "--cpu=%s" % abi, + toolchain, + "--cpu=%s" % abi_to_internal(abi), "--define", "neon=%s" % str(enable_neon).lower(), "--define", @@ -694,230 +693,20 @@ def push_depended_so_libs(libmace_dynamic_library_path, for dep in split_stdout(dep_so_libs): if dep == "libgnustl_shared.so": adb_push( - "%s/sources/cxx-stl/gnu-libstdc++/4.9/libs/%s/libgnustl_shared.so" # noqa - % (os.environ["ANDROID_NDK_HOME"], abi), - phone_data_dir, - serialno) + "%s/sources/cxx-stl/gnu-libstdc++/4.9/libs/%s/libgnustl_shared.so" # noqa + % (os.environ["ANDROID_NDK_HOME"], abi), + phone_data_dir, + serialno) elif dep == "libc++_shared.so": adb_push( - "%s/sources/cxx-stl/llvm-libc++/libs/%s/libc++_shared.so" # noqa - % (os.environ["ANDROID_NDK_HOME"], abi), - phone_data_dir, - serialno) - - -def tuning_run(abi, - serialno, - target_dir, - target_name, - vlog_level, - embed_model_data, - model_output_dir, - input_nodes, - output_nodes, - input_shapes, - output_shapes, - mace_model_dir, - model_tag, - device_type, - running_round, - restart_round, - limit_opencl_kernel_time, - tuning, - out_of_range_check, - phone_data_dir, - model_graph_format, - opencl_binary_file, - opencl_parameter_file, - libmace_dynamic_library_path, - omp_num_threads=-1, - cpu_affinity_policy=1, - gpu_perf_hint=3, - gpu_priority_hint=3, - input_file_name="model_input", - output_file_name="model_out", - input_dir="", - output_dir="", - runtime_failure_ratio=0.0, - address_sanitizer=False, - link_dynamic=False, - quantize_stat=False): - six.print_("* Run '%s' with round=%s, restart_round=%s, tuning=%s, " - "out_of_range_check=%s, omp_num_threads=%s, " - "cpu_affinity_policy=%s, gpu_perf_hint=%s, " - "gpu_priority_hint=%s" % - (model_tag, running_round, restart_round, str(tuning), - str(out_of_range_check), omp_num_threads, cpu_affinity_policy, - gpu_perf_hint, gpu_priority_hint)) - sys.stdout.flush() - - mace_model_path = "" - if model_graph_format == ModelFormat.file: - mace_model_path = "%s/%s.pb" % (mace_model_dir, model_tag) - if abi == "host": - libmace_dynamic_lib_path = \ - os.path.dirname(libmace_dynamic_library_path) - cmd = [ - "env", - "LD_LIBRARY_PATH=%s" % libmace_dynamic_lib_path, - "MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level, - "MACE_RUNTIME_FAILURE_RATIO=%f" % runtime_failure_ratio, - ] - if quantize_stat: - cmd.append("MACE_LOG_TENSOR_RANGE=1") - cmd.extend([ - "%s/%s" % (target_dir, target_name), - "--model_name=%s" % model_tag, - "--input_node=%s" % ",".join(input_nodes), - "--output_node=%s" % ",".join(output_nodes), - "--input_shape=%s" % ":".join(input_shapes), - "--output_shape=%s" % ":".join(output_shapes), - "--input_file=%s/%s" % (model_output_dir, input_file_name), - "--output_file=%s/%s" % (model_output_dir, output_file_name), - "--input_dir=%s" % input_dir, - "--output_dir=%s" % output_dir, - "--model_data_file=%s/%s.data" % (mace_model_dir, model_tag), - "--device=%s" % device_type, - "--round=%s" % running_round, - "--restart_round=%s" % restart_round, - "--omp_num_threads=%s" % omp_num_threads, - "--cpu_affinity_policy=%s" % cpu_affinity_policy, - "--gpu_perf_hint=%s" % gpu_perf_hint, - "--gpu_priority_hint=%s" % gpu_priority_hint, - "--model_file=%s" % mace_model_path, - ]) - p = subprocess.Popen( - cmd, - stderr=subprocess.PIPE, - stdout=subprocess.PIPE) - out, err = p.communicate() - stdout = err + out - six.print_(stdout) - six.print_("Running finished!\n") - else: - sh.adb("-s", serialno, "shell", "mkdir", "-p", phone_data_dir) - internal_storage_dir = create_internal_storage_dir( - serialno, phone_data_dir) - - for input_name in input_nodes: - formatted_name = common.formatted_file_name(input_file_name, - input_name) - adb_push("%s/%s" % (model_output_dir, formatted_name), - phone_data_dir, serialno) - if address_sanitizer: - adb_push(find_asan_rt_library(abi), phone_data_dir, serialno) - - if not embed_model_data: - adb_push("%s/%s.data" % (mace_model_dir, model_tag), - phone_data_dir, serialno) - - if device_type == common.DeviceType.GPU: - if os.path.exists(opencl_binary_file): - adb_push(opencl_binary_file, phone_data_dir, serialno) - if os.path.exists(opencl_parameter_file): - adb_push(opencl_parameter_file, phone_data_dir, serialno) - - adb_push("third_party/nnlib/libhexagon_controller.so", - phone_data_dir, serialno) - - mace_model_phone_path = "" - if model_graph_format == ModelFormat.file: - mace_model_phone_path = "%s/%s.pb" % (phone_data_dir, model_tag) - adb_push(mace_model_path, - mace_model_phone_path, - serialno) - - if link_dynamic: - adb_push(libmace_dynamic_library_path, phone_data_dir, - serialno) - push_depended_so_libs(libmace_dynamic_library_path, abi, - phone_data_dir, serialno) - - adb_push("%s/%s" % (target_dir, target_name), phone_data_dir, - serialno) - - stdout_buff = [] - process_output = make_output_processor(stdout_buff) - adb_cmd = [ - "LD_LIBRARY_PATH=%s" % phone_data_dir, - "MACE_TUNING=%s" % int(tuning), - "MACE_OUT_OF_RANGE_CHECK=%s" % int(out_of_range_check), - "MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level, - "MACE_RUN_PARAMETER_PATH=%s/mace_run.config" % phone_data_dir, - "MACE_INTERNAL_STORAGE_PATH=%s" % internal_storage_dir, - "MACE_LIMIT_OPENCL_KERNEL_TIME=%s" % limit_opencl_kernel_time, - "MACE_RUNTIME_FAILURE_RATIO=%f" % runtime_failure_ratio, - ] - if quantize_stat: - adb_cmd.append("MACE_LOG_TENSOR_RANGE=1") - if address_sanitizer: - adb_cmd.extend([ - "LD_PRELOAD=%s/%s" % (phone_data_dir, - asan_rt_library_names(abi)) - ]) - adb_cmd.extend([ - "%s/%s" % (phone_data_dir, target_name), - "--model_name=%s" % model_tag, - "--input_node=%s" % ",".join(input_nodes), - "--output_node=%s" % ",".join(output_nodes), - "--input_shape=%s" % ":".join(input_shapes), - "--output_shape=%s" % ":".join(output_shapes), - "--input_file=%s/%s" % (phone_data_dir, input_file_name), - "--output_file=%s/%s" % (phone_data_dir, output_file_name), - "--input_dir=%s" % input_dir, - "--output_dir=%s" % output_dir, - "--model_data_file=%s/%s.data" % (phone_data_dir, model_tag), - "--device=%s" % device_type, - "--round=%s" % running_round, - "--restart_round=%s" % restart_round, - "--omp_num_threads=%s" % omp_num_threads, - "--cpu_affinity_policy=%s" % cpu_affinity_policy, - "--gpu_perf_hint=%s" % gpu_perf_hint, - "--gpu_priority_hint=%s" % gpu_priority_hint, - "--model_file=%s" % mace_model_phone_path, - "--opencl_binary_file=%s/%s" % - (phone_data_dir, os.path.basename(opencl_binary_file)), - "--opencl_parameter_file=%s/%s" % - (phone_data_dir, os.path.basename(opencl_parameter_file)), - ]) - adb_cmd = ' '.join(adb_cmd) - cmd_file_name = "%s-%s-%s" % ('cmd_file', model_tag, str(time.time())) - adb_cmd_file = "%s/%s" % (phone_data_dir, cmd_file_name) - tmp_cmd_file = "%s/%s" % ('/tmp', cmd_file_name) - with open(tmp_cmd_file, 'w') as cmd_file: - cmd_file.write(adb_cmd) - adb_push(tmp_cmd_file, adb_cmd_file, serialno) - os.remove(tmp_cmd_file) - - sh.adb( - "-s", - serialno, - "shell", - "sh", - adb_cmd_file, - _tty_in=True, - _out=process_output, - _err_to_out=True) - stdout = "".join(stdout_buff) - if not stdout_success(stdout): - common.MaceLogger.error("Mace Run", "Mace run failed.") - - sh.adb( - "-s", - serialno, - "shell", - "rm", - adb_cmd_file, - _fg=True) - - six.print_("Running finished!\n") - - sys.stdout.flush() - return stdout + "%s/sources/cxx-stl/llvm-libc++/libs/%s/libc++_shared.so" # noqa + % (os.environ["ANDROID_NDK_HOME"], abi), + phone_data_dir, + serialno) def validate_model(abi, - serialno, + device, model_file_path, weight_file_path, platform, @@ -927,7 +716,6 @@ def validate_model(abi, input_shapes, output_shapes, model_output_dir, - phone_data_dir, input_data_types, caffe_env, input_file_name="model_input", @@ -941,8 +729,7 @@ def validate_model(abi, if os.path.exists("%s/%s" % (model_output_dir, formatted_name)): sh.rm("-rf", "%s/%s" % (model_output_dir, formatted_name)) - adb_pull("%s/%s" % (phone_data_dir, formatted_name), - model_output_dir, serialno) + device.pull_from_data_dir(formatted_name, model_output_dir) if platform == "tensorflow": validate(platform, model_file_path, "", @@ -956,11 +743,10 @@ def validate_model(abi, container_name = "mace_caffe_validator" if caffe_env == common.CaffeEnvType.LOCAL: - import imp try: - imp.find_module('caffe') + import caffe except ImportError: - logger.error('There is no caffe python module.') + logging.error('There is no caffe python module.') validate(platform, model_file_path, weight_file_path, "%s/%s" % (model_output_dir, input_file_name), "%s/%s" % (model_output_dir, output_file_name), @@ -1157,8 +943,8 @@ def benchmark_model(abi, if link_dynamic: adb_push(libmace_dynamic_library_path, phone_data_dir, serialno) - push_depended_so_lib(libmace_dynamic_library_path, abi, - phone_data_dir, serialno) + push_depended_so_libs(libmace_dynamic_library_path, abi, + phone_data_dir, serialno) adb_push("%s/%s" % (benchmark_binary_dir, benchmark_binary_name), phone_data_dir, -- GitLab