提交 3cdf9973 编写于 作者: 叶剑武

Merge branch 'device_support' into 'master'

Device support

See merge request !898
......@@ -47,8 +47,13 @@ ops_test:
stage: ops_test
script:
- if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi
- python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS
- python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS --enable_neon=false
- >
if ping -c 1 v9.git.n.xiaomi.com 1>/dev/null 2>&1; then
GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git
DEVICE_CONF_FILE=generic-mobile-devices/devices.yml
fi
- python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64,armhf --target_socs=$TARGET_SOCS
- python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64,armhf --target_socs=$TARGET_SOCS --enable_neon=false
api_test:
stage: api_test
......@@ -68,14 +73,19 @@ extra_tests:
stage: extra_tests
script:
- if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi
- python tools/bazel_adb_run.py --target="//mace/utils:tuner_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS
- >
if ping -c 1 v9.git.n.xiaomi.com 1>/dev/null 2>&1; then
GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git
DEVICE_CONF_FILE=generic-mobile-devices/devices.yml
fi
- python tools/bazel_adb_run.py --target="//mace/utils:tuner_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64 --target_socs=$TARGET_SOCS
platform_compatible_tests:
stage: platform_compatible_tests
script:
- bazel build mace/core:core --define openmp=true
- bazel build --config arm_linux --define openmp=true --define opencl=true --define neon=true //mace/libmace:libmace.so
- bazel build --config aarch64_linux --define openmp=true --define opencl=true --define neon=true //mace/libmace:libmace.so
- bazel build --config arm_linux_gnueabihf --define openmp=true --define opencl=true --define neon=true //mace/libmace:libmace.so
- bazel build --config aarch64_linux_gnu --define openmp=true --define opencl=true --define neon=true //mace/libmace:libmace.so
build_libraries:
stage: build_libraries
......@@ -87,6 +97,11 @@ ndk_versions_compatible_tests:
script:
- DEFAULT_NDK_PATH=$ANDROID_NDK_HOME
- prefix_path=${DEFAULT_NDK_PATH%android-ndk-*}
- >
if ping -c 1 v9.git.n.xiaomi.com 1>/dev/null 2>&1; then
GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git
DEVICE_CONF_FILE=generic-mobile-devices/devices.yml
fi
- >
for ndk in android-ndk-r12b android-ndk-r15c android-ndk-r16 android-ndk-r17b;
do
......@@ -96,8 +111,8 @@ ndk_versions_compatible_tests:
export PATH=$ANDROID_NDK_HOME:$PATH;
echo "ndk path: $ANDROID_NDK_HOME";
if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi
python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS --args="--gtest_filter=ActivationOpTest*";
python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS --enable_neon=false --args="--gtest_filter=ActivationOpTest*";
python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64 --target_socs=$TARGET_SOCS --args="--gtest_filter=ActivationOpTest*";
python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64 --target_socs=$TARGET_SOCS --enable_neon=false --args="--gtest_filter=ActivationOpTest*";
fi
done
- export ANDROID_NDK_HOME=$DEFAULT_NDK_PATH
......@@ -111,16 +126,27 @@ python_tools_tests:
- GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@github.com:XiaoMi/mace-models.git
- CONF_FILE=mace-models/mobilenet-v2/mobilenet-v2.yml
- >
python tools/converter.py convert --config=${CONF_FILE} --target_abis=armeabi-v7a --model_graph_format=file --model_data_format=file || exit 1;
python tools/converter.py run --config=${CONF_FILE} --round=1 --target_abis=armeabi-v7a --validate --model_graph_format=file --model_data_format=file || exit 1;
python tools/converter.py run --config=${CONF_FILE} --example --target_abis=armeabi-v7a --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
if ping -c 1 v9.git.n.xiaomi.com 1>/dev/null 2>&1; then
GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git
DEVICE_CONF_FILE=generic-mobile-devices/devices.yml
fi
- >
python tools/converter.py convert --config=${CONF_FILE} --target_abis=armeabi-v7a,arm64 --model_graph_format=file --model_data_format=file || exit 1;
python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --round=1 --target_abis=armeabi-v7a,arm64 --validate --model_graph_format=file --model_data_format=file || exit 1;
python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --example --target_abis=armeabi-v7a,arm64 --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
model_tests:
stage: model_tests
script:
- pwd
- rm -rf mace-models
- rm -rf generic-mobile-devices
- GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@github.com:XiaoMi/mace-models.git
- >
if ping -c 1 v9.git.n.xiaomi.com 1>/dev/null 2>&1; then
GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git
DEVICE_CONF_FILE=generic-mobile-devices/devices.yml
fi
- >
for CONF_FILE in mace-models/mobilenet-v1/mobilenet-v1.yml mace-models/mobilenet-v1/mobilenet-v1-quantize-retrain.yml;
do
......@@ -131,8 +157,8 @@ model_tests:
- CONF_FILE=mace-models/mobilenet-v2/mobilenet-v2-host.yml
- >
python tools/converter.py convert --config=${CONF_FILE} --model_graph_format=file --model_data_format=file || exit 1;
python tools/converter.py run --config=${CONF_FILE} --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
python tools/converter.py run --config=${CONF_FILE} --example --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --example --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
- rm -rf mace-models
build_android_demo:
......
......@@ -35,7 +35,7 @@ Required dependencies
- Required by model validation
* - six
- pip install -I six==1.11.0
- Required for Python 2 and 3 compatibility (TODO)
- Required for Python 2 and 3 compatibility
Optional dependencies
---------------------
......
......@@ -109,13 +109,66 @@ in one deployment file.
sha256sum /path/to/your/file
Advanced usage
--------------
There are two common advanced use cases:
There are three common advanced use cases:
- run your model on the embedded device(ARM LINUX)
- converting model to C++ code.
- tuning GPU kernels for a specific SoC.
Run you model on the embedded device(ARM Linux)
-----------------------------------------------
The way to run your model on the ARM Linux is nearly same as with android, except you need specify a device config file.
.. code:: bash
python tools/converter.py run --config=/path/to/your/model_deployment_file.yml --device_yml=/path/to/devices.yml
There are two steps to do before run:
1. configure login without password
MACE use ssh to connect embedded device, you should copy your public key to embedded device with the blow command.
.. code:: bash
cat ~/.ssh/id_rsa.pub | ssh -q {user}@{ip} "cat >> ~/.ssh/authorized_keys"
2. write your own device yaml configuration file.
* **Example**
Here is an device yaml config demo.
.. literalinclude:: devices/demo_device_nanopi.yml
:language: yaml
* **Configuration**
The detailed explanation is listed in the blow table.
.. list-table::
:header-rows: 1
* - Options
- Usage
* - target_abis
- Device supported abis, you can get it via ``dpkg --print-architecture`` and
``dpkg --print-foreign-architectures`` command, if more than one abi is supported,
separate them by commas.
* - target_socs
- device soc, you can get it from device manual, we haven't found a way to get it in shell.
* - models
- device models full name, you can get via get ``lshw`` command (third party package, install it via your package manager).
see it's product value.
* - address
- Since we use ssh to connect device, ip address is required.
* - username
- login username, required.
Convert model(s) to C++ code
--------------------------------
......@@ -403,6 +456,7 @@ Reduce Library Size
- It is recommended to use ``version script`` and ``strip`` feature when linking mace static library. The effect is remarkable.
* Remove the unused ops.
Remove the registration of the ops unused for your models in the ``mace/ops/ops_register.cc``,
which will reduce the library size significantly. the final binary just link the registered ops' code.
......
......@@ -68,7 +68,8 @@ Here we use the mobilenet-v2 model as an example.
.. note::
If you want to run on device/phone, please plug in at least one device/phone.
If you want to run on phone, please plug in at least one phone.
Or if you want to run on embedded device, please give a :doc:`advanced_usage`.
.. code:: sh
......@@ -245,10 +246,14 @@ to run and validate your model.
# Test model run time
python tools/converter.py run --config=/path/to/your/model_deployment_file.yml --round=100
# Validate the correctness by comparing the results against the
# Validate the correctness by comparing the results against the
# original model and framework, measured with cosine distance for similarity.
python tools/converter.py run --config=/path/to/your/model_deployment_file.yml --validate
# If you want to run model on specified arm linux device, you should put device config file in the working directory or run with flag `--device_yml`
python tools/converter.py run --config=/path/to/your/model_deployment_file.yml --device_yml=/path/to/devices.yml
* **benchmark**
benchmark and profile the model.
......
......@@ -12,12 +12,9 @@ devices:
address: 10.0.0.0
# login username
username: user
# login password, is required when you can login into device without password
password: 1234567
raspberry:
target_abis: [armv7l]
target_socs: BCM2837
models: Raspberry Pi 3 Model B Plus Rev 1.3
address: 10.0.0.1
username: user
password: 123456
......@@ -24,6 +24,24 @@ config_setting(
visibility = ["//visibility:public"],
)
config_setting(
name = "arm_linux_aarch64",
values = {
"crosstool_top": "//tools/aarch64_compiler:toolchain",
"cpu": "aarch64",
},
visibility = ["//visibility:public"],
)
config_setting(
name = "arm_linux_armhf",
values = {
"crosstool_top": "//tools/arm_compiler:toolchain",
"cpu": "armeabi-v7a",
},
visibility = ["//visibility:public"],
)
config_setting(
name = "neon_enabled",
define_values = {
......
......@@ -42,7 +42,7 @@ struct CPUFreq {
};
namespace {
#if defined(__ANDROID__)
int GetCPUCount() {
int cpu_count = 0;
std::string cpu_sys_conf = "/proc/cpuinfo";
......@@ -69,10 +69,8 @@ int GetCPUCount() {
VLOG(2) << "CPU cores: " << cpu_count;
return cpu_count;
}
#endif
int GetCPUMaxFreq(std::vector<float> *max_freqs) {
#if defined(__ANDROID__)
int cpu_count = GetCPUCount();
for (int cpu_id = 0; cpu_id < cpu_count; ++cpu_id) {
std::string cpuinfo_max_freq_sys_conf = MakeString(
......@@ -94,34 +92,6 @@ int GetCPUMaxFreq(std::vector<float> *max_freqs) {
}
f.close();
}
#else
std::string cpu_sys_conf = "/proc/cpuinfo";
std::ifstream f(cpu_sys_conf);
if (!f.is_open()) {
LOG(ERROR) << "failed to open " << cpu_sys_conf;
return -1;
}
std::string line;
const std::string freq_key = "cpu MHz";
while (std::getline(f, line)) {
if (line.size() >= freq_key.size()
&& line.compare(0, freq_key.size(), freq_key) == 0) {
size_t pos = line.find(":");
if (pos != std::string::npos) {
std::string freq_str = line.substr(pos + 1);
float freq = atof(freq_str.c_str());
max_freqs->push_back(freq);
}
}
}
if (f.bad()) {
LOG(ERROR) << "failed to read " << cpu_sys_conf;
}
if (!f.eof()) {
LOG(ERROR) << "failed to read end of " << cpu_sys_conf;
}
f.close();
#endif
for (float freq : *max_freqs) {
VLOG(2) << "CPU freq: " << freq;
......
# Examples
load(
"//mace:mace.bzl",
"if_openmp_enabled",
"if_android",
"if_hexagon_enabled",
"if_opencl_enabled",
"if_openmp_enabled",
)
cc_binary(
......@@ -18,8 +18,9 @@ cc_binary(
]),
linkopts = [
"-lm",
"-ldl",
] + if_openmp_enabled([
"-fopenmp"
"-fopenmp",
]) + if_android([
"-ldl",
"-pie",
......@@ -47,6 +48,7 @@ cc_binary(
]),
linkopts = [
"-lm",
"-ldl",
] + if_android([
"-ldl",
"-pie",
......@@ -55,8 +57,7 @@ cc_binary(
linkstatic = 0,
deps = [
"//external:gflags_nothreads",
"//mace/codegen:generated_mace_engine_factory",
"//mace/codegen:generated_libmace",
"//mace/codegen:generated_mace_engine_factory",
],
)
......@@ -24,6 +24,18 @@ def if_android_arm64(a):
"//conditions:default": [],
})
def if_arm_linux_aarch64(a):
return select({
"//mace:arm_linux_aarch64": a,
"//conditions:default": [],
})
def if_arm_linux_armhf(a):
return select({
"//mace:arm_linux_armhf": a,
"//conditions:default": []
})
def if_neon_enabled(a):
return select({
"//mace:neon_enabled": a,
......@@ -81,4 +93,3 @@ def encrypt_opencl_kernel_genrule():
outs = ["opencl/encrypt_opencl_kernel.cc"],
cmd = "cat $(SRCS) > $@;"
)
......@@ -233,7 +233,7 @@ void TestNxNS12(const index_t height, const index_t width) {
auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w,
Padding type) {
// generate random input
static unsigned int seed = time(NULL);
// static unsigned int seed = time(NULL);
index_t batch = 1;
index_t channel = 32;
index_t multiplier = 1;
......
......@@ -13,6 +13,7 @@
// limitations under the License.
#include <algorithm>
#include <cmath>
#include "mace/core/operator.h"
......
......@@ -15,6 +15,7 @@
#include "mace/ops/resize_bicubic.h"
#include <algorithm>
#include <cmath>
#include <memory>
#include <vector>
......
......@@ -13,6 +13,7 @@
// limitations under the License.
#include <algorithm>
#include <cmath>
#include <vector>
#include "mace/core/operator.h"
......
......@@ -13,6 +13,7 @@
// limitations under the License.
#include <algorithm>
#include <cmath>
#include <limits>
#include <memory>
#include <vector>
......@@ -106,7 +107,7 @@ class SoftmaxOp<DeviceType::CPU, float> : public Operation {
float sum = 0;
for (index_t c = 0; c < class_count; ++c) {
float exp_value = ::exp(input_ptr[c] - max_val);
float exp_value = std::exp(input_ptr[c] - max_val);
sum += exp_value;
output_ptr[c] = exp_value;
}
......
......@@ -13,6 +13,7 @@
// limitations under the License.
#include <algorithm>
#include <cmath>
#include <vector>
#include "mace/core/operator.h"
......
......@@ -16,8 +16,9 @@
#include <arm_neon.h>
#endif
#include <vector>
#include <algorithm>
#include <cmath>
#include <vector>
#include "mace/core/operator.h"
#include "mace/ops/transpose.h"
......
......@@ -112,6 +112,8 @@ TFSupportedOps = [
TFOpType = Enum('TFOpType', [(op, op) for op in TFSupportedOps], type=str)
TFSupportedOps = [six.b(op) for op in TFSupportedOps]
class TensorflowConverter(base_converter.ConverterInterface):
"""A class for convert tensorflow frozen model to mace model.
......
......@@ -14,6 +14,7 @@
import datetime
import os
import six
import uuid
import numpy as np
import hashlib
......@@ -34,8 +35,8 @@ class ModelFormat(object):
def generate_obfuscated_name(namespace, name):
md5 = hashlib.md5()
md5.update(namespace)
md5.update(name)
md5.update(six.b(namespace))
md5.update(six.b(name))
md5_digest = md5.hexdigest()
name = md5_digest[:8]
......
......@@ -15,8 +15,9 @@
#ifndef MACE_UTILS_QUANTIZE_H_
#define MACE_UTILS_QUANTIZE_H_
#include <limits>
#include <algorithm>
#include <cmath>
#include <limits>
namespace mace {
......
# Partially borrowed from tensorflow tools/bazel.rc
# By default, we don't distinct target and host platfroms.
# When doing cross compilation, use --config=cross_compile to distinct them.
build --distinct_host_configuration=false
build:cross_compile --distinct_host_configuration=true
build --verbose_failures
build --copt=-std=c++11
......@@ -17,34 +15,33 @@ build --copt=-DMACE_USE_NNLIB_CAF
build:symbol_hidden --copt=-fvisibility=hidden
# Usage example: bazel build --config android
build:android --config=cross_compile
build:android --distinct_host_configuration=true
build:android --crosstool_top=//external:android/crosstool
build:android --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
# Usage example: bazel build --config arm_linux
build:arm_linux --config=cross_compile
build:arm_linux --crosstool_top=//tools/arm_compiler:toolchain
build:arm_linux --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
build:arm_linux --cpu=armeabi-v7a
build:arm_linux --copt -mfloat-abi=hard
build:arm_linux --copt -mfpu=neon
build:arm_linux --copt -Wno-ignored-attributes
build:arm_linux --copt -Wno-unused-function
build:arm_linux --copt -Wno-sequence-point
build:arm_linux --copt -Wno-implicit-fallthrough
build:arm_linux --copt -Wno-psabi
# Usage example: bazel build --config arm_linux_gnueabihf
build:arm_linux_gnueabihf --distinct_host_configuration=true
build:arm_linux_gnueabihf --crosstool_top=//tools/arm_compiler:toolchain
build:arm_linux_gnueabihf --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
build:arm_linux_gnueabihf --cpu=armeabi-v7a
build:arm_linux_gnueabihf --copt -mfloat-abi=hard
build:arm_linux_gnueabihf --copt -mfpu=neon
build:arm_linux_gnueabihf --copt -Wno-ignored-attributes
build:arm_linux_gnueabihf --copt -Wno-unused-function
build:arm_linux_gnueabihf --copt -Wno-sequence-point
build:arm_linux_gnueabihf --copt -Wno-implicit-fallthrough
# Usage example: bazel build --config aarch64_linux
build:aarch64_linux --config=cross_compile
build:aarch64_linux --crosstool_top=//tools/aarch64_compiler:toolchain
build:aarch64_linux --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
build:aarch64_linux --cpu=aarch64
build:aarch64_linux --copt -Wno-ignored-attributes
build:aarch64_linux --copt -Wno-unused-function
build:aarch64_linux --copt -Wno-sequence-point
build:aarch64_linux --copt -Wno-implicit-fallthrough
# Usage example: bazel build --config aarch64_linux_gnu
build:aarch64_linux_gnu --distinct_host_configuration=true
build:aarch64_linux_gnu --crosstool_top=//tools/aarch64_compiler:toolchain
build:aarch64_linux_gnu --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
build:aarch64_linux_gnu --cpu=aarch64
build:aarch64_linux_gnu --copt -Wno-ignored-attributes
build:aarch64_linux_gnu --copt -Wno-unused-function
build:aarch64_linux_gnu --copt -Wno-sequence-point
build:aarch64_linux_gnu --copt -Wno-implicit-fallthrough
# Usage example: bazel build --config optimization
# Usage example: bazel build --config optimization
build:optimization -c opt
build:optimization --copt=-O3
build:optimization --linkopt=-Wl,--strip-all
......
......@@ -26,9 +26,9 @@ import sys
import sh_commands
from common import *
def stdout_processor(stdout, device_properties, abi):
pass
from device import DeviceWrapper, DeviceManager
def unittest_stdout_processor(stdout, device_properties, abi):
......@@ -39,7 +39,7 @@ def unittest_stdout_processor(stdout, device_properties, abi):
raise Exception("Command failed")
def ops_benchmark_stdout_processor(stdout, device_properties, abi):
def ops_benchmark_stdout_processor(stdout, dev, abi):
stdout_lines = stdout.split("\n")
metrics = {}
for line in stdout_lines:
......@@ -52,13 +52,13 @@ def ops_benchmark_stdout_processor(stdout, device_properties, abi):
metrics["%s.input_mb_per_sec" % parts[0]] = parts[3]
metrics["%s.gmacc_per_sec" % parts[0]] = parts[4]
platform = device_properties["ro.board.platform"].replace(" ", "-")
model = device_properties["ro.product.model"].replace(" ", "-")
tags = {
"ro.board.platform": platform,
"ro.product.model": model,
"abi": abi
}
# platform = dev[YAMLKeyword.target_socs]
# model = dev[YAMLKeyword.device_name]
# tags = {
# "ro.board.platform": platform,
# "ro.product.model": model,
# "abi": abi
# }
# sh_commands.falcon_push_metrics(server,
# metrics, tags=tags, endpoint="mace_ops_benchmark")
......@@ -87,7 +87,7 @@ def parse_args():
type=str,
default="all",
help="SoCs (ro.board.platform from getprop) to build, "
"comma seperated list or all/random")
"comma seperated list or all/random")
parser.add_argument(
"--target", type=str, default="//...", help="Bazel target to build")
parser.add_argument(
......@@ -99,7 +99,7 @@ def parse_args():
parser.add_argument(
"--stdout_processor",
type=str,
default="stdout_processor",
default="unittest_stdout_processor",
help="Stdout processing function, default: stdout_processor")
parser.add_argument(
"--enable_neon",
......@@ -115,14 +115,22 @@ def parse_args():
type=str2bool,
default=False,
help="Whether to use simpleperf stat")
parser.add_argument(
'--device_yml',
type=str,
default='',
help='embedded linux device config yml file'
)
return parser.parse_known_args()
def main(unused_args):
target_socs = None
target_devices = DeviceManager.list_devices(FLAGS.device_yml)
if FLAGS.target_socs != "all" and FLAGS.target_socs != "random":
target_socs = set(FLAGS.target_socs.split(','))
target_devices = sh_commands.get_target_socs_serialnos(target_socs)
target_devices = [dev for dev in target_devices
if dev[YAMLKeyword.target_socs] in target_socs]
if FLAGS.target_socs == "random":
unlocked_devices = \
[d for d in target_devices if not sh_commands.is_device_locked(d)]
......@@ -136,31 +144,29 @@ def main(unused_args):
target_abis = FLAGS.target_abis.split(',')
for target_abi in target_abis:
toolchain = infer_toolchain(target_abi)
sh_commands.bazel_build(target, abi=target_abi,
toolchain=toolchain,
enable_neon=FLAGS.enable_neon,
address_sanitizer=FLAGS.address_sanitizer)
if FLAGS.run_target:
for serialno in target_devices:
if target_abi not in set(
sh_commands.adb_supported_abis(serialno)):
for dev in target_devices:
if target_abi not in dev[YAMLKeyword.target_abis]:
print("Skip device %s which does not support ABI %s" %
(serialno, target_abi))
(dev, target_abi))
continue
stdouts = sh_commands.adb_run(
device_wrapper = DeviceWrapper(dev)
stdouts = device_wrapper.run(
target_abi,
serialno,
host_bin_path,
bin_name,
args=FLAGS.args,
opencl_profiling=True,
vlog_level=0,
device_bin_path="/data/local/tmp/mace",
out_of_range_check=True,
address_sanitizer=FLAGS.address_sanitizer,
simpleperf=FLAGS.simpleperf)
device_properties = sh_commands.adb_getprop_by_serialno(
serialno)
globals()[FLAGS.stdout_processor](stdouts, device_properties,
globals()[FLAGS.stdout_processor](stdouts, dev,
target_abi)
......
......@@ -22,6 +22,14 @@ mkdir -p $LIB_DIR/arm64-v8a/cpu_gpu
rm -rf $LIB_DIR/linux-x86-64
mkdir -p $LIB_DIR/linux-x86-64
rm -rf $LIB_DIR/arm_linux_gnueabihf
mkdir -p $LIB_DIR/arm_linux_gnueabihf/cpu_gpu
rm -rf $LIB_DIR/aarch64_linux_gnu
mkdir -p $LIB_DIR/aarch64_linux_gnu/cpu_gpu
# build shared libraries
echo "build shared lib for armeabi-v7a + cpu_gpu_dsp"
bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --define hexagon=true --define quantize=true --cpu=armeabi-v7a
......@@ -36,6 +44,14 @@ echo "build shared lib for arm64-v8a + cpu_gpu"
bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --define quantize=true --cpu=arm64-v8a
cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/arm64-v8a/cpu_gpu/
echo "build shared lib for arm_linux_gnueabihf + cpu_gpu"
bazel build --config arm_linux_gnueabihf --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --define quantize=true
cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/arm_linux_gnueabihf/cpu_gpu/
echo "build shared lib for aarch64_linux_gnu + cpu_gpu"
bazel build --config aarch64_linux_gnu --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --define quantize=true
cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/aarch64_linux_gnu/cpu_gpu/
if [[ "$OSTYPE" != "darwin"* ]];then
echo "build shared lib for linux-x86-64"
bazel build mace/libmace:libmace_dynamic --config optimization --define quantize=true --define openmp=true
......@@ -56,6 +72,14 @@ echo "build static lib for arm64-v8a + cpu_gpu"
bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true --define quantize=true --cpu=arm64-v8a
cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/arm64-v8a/cpu_gpu/
echo "build static lib for arm_linux_gnueabihf + cpu_gpu"
bazel build --config arm_linux_gnueabihf --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true --define quantize=true
cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/arm_linux_gnueabihf/cpu_gpu/
echo "build static lib for aarch64_linux_gnu + cpu_gpu"
bazel build --config aarch64_linux_gnu --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true --define quantize=true
cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/aarch64_linux_gnu/cpu_gpu/
if [[ "$OSTYPE" != "darwin"* ]];then
echo "build static lib for linux-x86-64"
bazel build mace/libmace:libmace_static --config optimization --define quantize=true --define openmp=true
......
......@@ -13,7 +13,9 @@
# limitations under the License.
import enum
import hashlib
import re
import os
import six
......@@ -135,3 +137,340 @@ def formatted_file_name(input_file_name, input_name):
for c in input_name:
res += c if c.isalnum() else '_'
return res
def md5sum(s):
md5 = hashlib.md5()
md5.update(s.encode('utf-8'))
return md5.hexdigest()
def get_build_binary_dir(library_name, target_abi):
return "%s/%s/%s/%s" % (
BUILD_OUTPUT_DIR, library_name, BUILD_TMP_DIR_NAME, target_abi)
def get_model_lib_output_path(library_name, abi):
lib_output_path = os.path.join(BUILD_OUTPUT_DIR, library_name,
MODEL_OUTPUT_DIR_NAME, abi,
"%s.a" % library_name)
return lib_output_path
def check_model_converted(library_name, model_name,
model_graph_format, model_data_format,
abi):
model_output_dir = \
'%s/%s/%s' % (BUILD_OUTPUT_DIR, library_name, MODEL_OUTPUT_DIR_NAME)
if model_graph_format == ModelFormat.file:
mace_check(os.path.exists("%s/%s.pb" % (model_output_dir, model_name)),
ModuleName.RUN,
"You should convert model first.")
else:
model_lib_path = get_model_lib_output_path(library_name, abi)
mace_check(os.path.exists(model_lib_path),
ModuleName.RUN,
"You should convert model first.")
if model_data_format == ModelFormat.file:
mace_check(os.path.exists("%s/%s.data" %
(model_output_dir, model_name)),
ModuleName.RUN,
"You should convert model first.")
def parse_device_type(runtime):
device_type = ""
if runtime == RuntimeType.dsp:
device_type = DeviceType.HEXAGON
elif runtime == RuntimeType.gpu:
device_type = DeviceType.GPU
elif runtime == RuntimeType.cpu:
device_type = DeviceType.CPU
return device_type
def sha256_checksum(fname):
hash_func = hashlib.sha256()
with open(fname, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_func.update(chunk)
return hash_func.hexdigest()
def get_model_files(model_file_path,
model_sha256_checksum,
model_output_dir,
weight_file_path="",
weight_sha256_checksum=""):
model_file = model_file_path
weight_file = weight_file_path
if model_file_path.startswith("http://") or \
model_file_path.startswith("https://"):
model_file = model_output_dir + "/" + md5sum(model_file_path) + ".pb"
if not os.path.exists(model_file) or \
sha256_checksum(model_file) != model_sha256_checksum:
MaceLogger.info("Downloading model, please wait ...")
six.moves.urllib.request.urlretrieve(model_file_path, model_file)
MaceLogger.info("Model downloaded successfully.")
if sha256_checksum(model_file) != model_sha256_checksum:
MaceLogger.error(ModuleName.MODEL_CONVERTER,
"model file sha256checksum not match")
if weight_file_path.startswith("http://") or \
weight_file_path.startswith("https://"):
weight_file = \
model_output_dir + "/" + md5sum(weight_file_path) + ".caffemodel"
if not os.path.exists(weight_file) or \
sha256_checksum(weight_file) != weight_sha256_checksum:
MaceLogger.info("Downloading model weight, please wait ...")
six.moves.urllib.request.urlretrieve(weight_file_path, weight_file)
MaceLogger.info("Model weight downloaded successfully.")
if weight_file:
if sha256_checksum(weight_file) != weight_sha256_checksum:
MaceLogger.error(ModuleName.MODEL_CONVERTER,
"weight file sha256checksum not match")
return model_file, weight_file
def get_opencl_binary_output_path(library_name, target_abi, device):
target_soc = device.target_socs
device_name = device.device_name
return '%s/%s/%s/%s/%s_%s.%s.%s.bin' % \
(BUILD_OUTPUT_DIR,
library_name,
OUTPUT_OPENCL_BINARY_DIR_NAME,
target_abi,
library_name,
OUTPUT_OPENCL_BINARY_FILE_NAME,
device_name,
target_soc)
def get_opencl_parameter_output_path(library_name, target_abi, device):
target_soc = device.target_socs
device_name = device.device_name
return '%s/%s/%s/%s/%s_%s.%s.%s.bin' % \
(BUILD_OUTPUT_DIR,
library_name,
OUTPUT_OPENCL_BINARY_DIR_NAME,
target_abi,
library_name,
OUTPUT_OPENCL_PARAMETER_FILE_NAME,
device_name,
target_soc)
def get_build_model_dirs(library_name,
model_name,
target_abi,
device,
model_file_path):
device_name = device.device_name
target_socs = device.target_socs
model_path_digest = md5sum(model_file_path)
model_output_base_dir = '{}/{}/{}/{}/{}'.format(
BUILD_OUTPUT_DIR, library_name, BUILD_TMP_DIR_NAME,
model_name, model_path_digest)
if target_abi == ABIType.host:
model_output_dir = '%s/%s' % (model_output_base_dir, target_abi)
elif not target_socs or not device.address:
model_output_dir = '%s/%s/%s' % (model_output_base_dir,
BUILD_TMP_GENERAL_OUTPUT_DIR_NAME,
target_abi)
else:
model_output_dir = '{}/{}_{}/{}'.format(
model_output_base_dir,
device_name,
target_socs,
target_abi
)
mace_model_dir = '{}/{}/{}'.format(
BUILD_OUTPUT_DIR, library_name, MODEL_OUTPUT_DIR_NAME
)
return model_output_base_dir, model_output_dir, mace_model_dir
def abi_to_internal(abi):
if abi in [ABIType.armeabi_v7a, ABIType.arm64_v8a]:
return abi
if abi == ABIType.arm64:
return ABIType.aarch64
if abi == ABIType.armhf:
return ABIType.armeabi_v7a
def infer_toolchain(abi):
if abi in [ABIType.armeabi_v7a, ABIType.arm64_v8a]:
return ToolchainType.android
if abi == ABIType.armhf:
return ToolchainType.arm_linux_gnueabihf
if abi == ABIType.arm64:
return ToolchainType.aarch64_linux_gnu
return ''
################################
# YAML key word
################################
class YAMLKeyword(object):
library_name = 'library_name'
target_abis = 'target_abis'
target_socs = 'target_socs'
model_graph_format = 'model_graph_format'
model_data_format = 'model_data_format'
models = 'models'
platform = 'platform'
device_name = 'device_name'
system = 'system'
address = 'address'
username = 'username'
password = 'password'
model_file_path = 'model_file_path'
model_sha256_checksum = 'model_sha256_checksum'
weight_file_path = 'weight_file_path'
weight_sha256_checksum = 'weight_sha256_checksum'
subgraphs = 'subgraphs'
input_tensors = 'input_tensors'
input_shapes = 'input_shapes'
input_ranges = 'input_ranges'
output_tensors = 'output_tensors'
output_shapes = 'output_shapes'
check_tensors = 'check_tensors'
check_shapes = 'check_shapes'
runtime = 'runtime'
data_type = 'data_type'
input_data_types = 'input_data_types'
input_data_formats = 'input_data_formats'
output_data_formats = 'output_data_formats'
limit_opencl_kernel_time = 'limit_opencl_kernel_time'
nnlib_graph_mode = 'nnlib_graph_mode'
obfuscate = 'obfuscate'
winograd = 'winograd'
quantize = 'quantize'
quantize_range_file = 'quantize_range_file'
change_concat_ranges = 'change_concat_ranges'
validation_inputs_data = 'validation_inputs_data'
validation_threshold = 'validation_threshold'
graph_optimize_options = 'graph_optimize_options' # internal use for now
cl_mem_type = 'cl_mem_type'
################################
# SystemType
################################
class SystemType:
host = 'host'
android = 'android'
arm_linux = 'arm_linux'
################################
# common device str
################################
PHONE_DATA_DIR = '/data/local/tmp/mace_run'
DEVICE_DATA_DIR = '/tmp/data/mace_run'
DEVICE_INTERIOR_DIR = PHONE_DATA_DIR + "/interior"
BUILD_OUTPUT_DIR = 'builds'
BUILD_TMP_DIR_NAME = '_tmp'
BUILD_DOWNLOADS_DIR = BUILD_OUTPUT_DIR + '/downloads'
BUILD_TMP_GENERAL_OUTPUT_DIR_NAME = 'general'
MODEL_OUTPUT_DIR_NAME = 'model'
EXAMPLE_STATIC_NAME = "example_static"
EXAMPLE_DYNAMIC_NAME = "example_dynamic"
EXAMPLE_STATIC_TARGET = "//mace/examples/cli:" + EXAMPLE_STATIC_NAME
EXAMPLE_DYNAMIC_TARGET = "//mace/examples/cli:" + EXAMPLE_DYNAMIC_NAME
MACE_RUN_STATIC_NAME = "mace_run_static"
MACE_RUN_DYNAMIC_NAME = "mace_run_dynamic"
MACE_RUN_STATIC_TARGET = "//mace/tools/validation:" + MACE_RUN_STATIC_NAME
MACE_RUN_DYNAMIC_TARGET = "//mace/tools/validation:" + MACE_RUN_DYNAMIC_NAME
CL_COMPILED_BINARY_FILE_NAME = "mace_cl_compiled_program.bin"
BUILD_TMP_OPENCL_BIN_DIR = 'opencl_bin'
LIBMACE_DYNAMIC_PATH = "bazel-bin/mace/libmace/libmace.so"
CL_TUNED_PARAMETER_FILE_NAME = "mace_run.config"
MODEL_HEADER_DIR_PATH = 'include/mace/public'
OUTPUT_LIBRARY_DIR_NAME = 'lib'
OUTPUT_OPENCL_BINARY_DIR_NAME = 'opencl'
OUTPUT_OPENCL_BINARY_FILE_NAME = 'compiled_opencl_kernel'
OUTPUT_OPENCL_PARAMETER_FILE_NAME = 'tuned_opencl_parameter'
CODEGEN_BASE_DIR = 'mace/codegen'
MODEL_CODEGEN_DIR = CODEGEN_BASE_DIR + '/models'
ENGINE_CODEGEN_DIR = CODEGEN_BASE_DIR + '/engine'
LIB_CODEGEN_DIR = CODEGEN_BASE_DIR + '/lib'
LIBMACE_SO_TARGET = "//mace/libmace:libmace.so"
LIBMACE_STATIC_TARGET = "//mace/libmace:libmace_static"
LIBMACE_STATIC_PATH = "bazel-genfiles/mace/libmace/libmace.a"
MODEL_LIB_TARGET = "//mace/codegen:generated_models"
MODEL_LIB_PATH = "bazel-genfiles/mace/codegen/libgenerated_models.a"
QUANTIZE_STAT_TARGET = "//mace/tools/quantization:quantize_stat"
BM_MODEL_STATIC_NAME = "benchmark_model_static"
BM_MODEL_DYNAMIC_NAME = "benchmark_model_dynamic"
BM_MODEL_STATIC_TARGET = "//mace/benchmark:" + BM_MODEL_STATIC_NAME
BM_MODEL_DYNAMIC_TARGET = "//mace/benchmark:" + BM_MODEL_DYNAMIC_NAME
ALL_SOC_TAG = 'all'
################################
# Model File Format
################################
class ModelFormat(object):
file = 'file'
code = 'code'
################################
# ABI Type
################################
class ABIType(object):
armeabi_v7a = 'armeabi-v7a'
arm64_v8a = 'arm64-v8a'
arm64 = 'arm64'
aarch64 = 'aarch64'
armhf = 'armhf'
host = 'host'
################################
# Module name
################################
class ModuleName(object):
YAML_CONFIG = 'YAML CONFIG'
MODEL_CONVERTER = 'Model Converter'
RUN = 'RUN'
BENCHMARK = 'Benchmark'
#################################
# mace lib type
#################################
class MACELibType(object):
static = 0
dynamic = 1
#################################
# Run time type
#################################
class RuntimeType(object):
cpu = 'cpu'
gpu = 'gpu'
dsp = 'dsp'
cpu_gpu = 'cpu+gpu'
#################################
# Tool chain Type
#################################
class ToolchainType:
android = 'android'
arm_linux_gnueabihf = 'arm_linux_gnueabihf'
aarch64_linux_gnu = 'aarch64_linux_gnu'
......@@ -18,7 +18,6 @@ import hashlib
import os
import re
import sh
import subprocess
import sys
import urllib
import yaml
......@@ -27,14 +26,9 @@ from enum import Enum
import six
import sh_commands
from sh_commands import BuildType
from sh_commands import ModelFormat
from common import CaffeEnvType
from common import DeviceType
from common import mace_check
from common import MaceLogger
from common import StringFormatter
from common import *
from device import DeviceWrapper, DeviceManager
################################
# set environment
......@@ -44,69 +38,20 @@ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
################################
# common definitions
################################
BUILD_OUTPUT_DIR = 'builds'
BUILD_DOWNLOADS_DIR = BUILD_OUTPUT_DIR + '/downloads'
PHONE_DATA_DIR = "/data/local/tmp/mace_run"
MODEL_OUTPUT_DIR_NAME = 'model'
MODEL_HEADER_DIR_PATH = 'include/mace/public'
BUILD_TMP_DIR_NAME = '_tmp'
BUILD_TMP_GENERAL_OUTPUT_DIR_NAME = 'general'
OUTPUT_LIBRARY_DIR_NAME = 'lib'
OUTPUT_OPENCL_BINARY_DIR_NAME = 'opencl'
OUTPUT_OPENCL_BINARY_FILE_NAME = 'compiled_opencl_kernel'
OUTPUT_OPENCL_PARAMETER_FILE_NAME = 'tuned_opencl_parameter'
CL_COMPILED_BINARY_FILE_NAME = "mace_cl_compiled_program.bin"
CL_TUNED_PARAMETER_FILE_NAME = "mace_run.config"
CODEGEN_BASE_DIR = 'mace/codegen'
MODEL_CODEGEN_DIR = CODEGEN_BASE_DIR + '/models'
ENGINE_CODEGEN_DIR = CODEGEN_BASE_DIR + '/engine'
LIB_CODEGEN_DIR = CODEGEN_BASE_DIR + '/lib'
LIBMACE_SO_TARGET = "//mace/libmace:libmace.so"
LIBMACE_STATIC_TARGET = "//mace/libmace:libmace_static"
LIBMACE_STATIC_PATH = "bazel-genfiles/mace/libmace/libmace.a"
LIBMACE_DYNAMIC_PATH = "bazel-bin/mace/libmace/libmace.so"
MODEL_LIB_TARGET = "//mace/codegen:generated_models"
MODEL_LIB_PATH = "bazel-genfiles/mace/codegen/libgenerated_models.a"
MACE_RUN_STATIC_NAME = "mace_run_static"
MACE_RUN_DYNAMIC_NAME = "mace_run_dynamic"
MACE_RUN_STATIC_TARGET = "//mace/tools/validation:" + MACE_RUN_STATIC_NAME
MACE_RUN_DYNAMIC_TARGET = "//mace/tools/validation:" + MACE_RUN_DYNAMIC_NAME
EXAMPLE_STATIC_NAME = "example_static"
EXAMPLE_DYNAMIC_NAME = "example_dynamic"
EXAMPLE_STATIC_TARGET = "//mace/examples/cli:" + EXAMPLE_STATIC_NAME
EXAMPLE_DYNAMIC_TARGET = "//mace/examples/cli:" + EXAMPLE_DYNAMIC_NAME
BM_MODEL_STATIC_NAME = "benchmark_model_static"
BM_MODEL_DYNAMIC_NAME = "benchmark_model_dynamic"
BM_MODEL_STATIC_TARGET = "//mace/benchmark:" + BM_MODEL_STATIC_NAME
BM_MODEL_DYNAMIC_TARGET = "//mace/benchmark:" + BM_MODEL_DYNAMIC_NAME
DEVICE_INTERIOR_DIR = PHONE_DATA_DIR + "/interior"
BUILD_TMP_OPENCL_BIN_DIR = 'opencl_bin'
ALL_SOC_TAG = 'all'
ABITypeStrs = [
'armeabi-v7a',
'arm64-v8a',
'arm64',
'armhf',
'host',
]
class ABIType(object):
armeabi_v7a = 'armeabi-v7a'
arm64_v8a = 'arm64-v8a'
host = 'host'
ModelFormatStrs = [
"file",
"code",
]
class MACELibType(object):
static = 0
dynamic = 1
PlatformTypeStrs = [
"tensorflow",
"caffe",
......@@ -121,14 +66,6 @@ RuntimeTypeStrs = [
"cpu+gpu"
]
class RuntimeType(object):
cpu = 'cpu'
gpu = 'gpu'
dsp = 'dsp'
cpu_gpu = 'cpu+gpu'
InputDataTypeStrs = [
"int32",
"float32",
......@@ -174,49 +111,11 @@ class DefaultValues(object):
gpu_priority_hint = 3,
class YAMLKeyword(object):
library_name = 'library_name'
target_abis = 'target_abis'
target_socs = 'target_socs'
model_graph_format = 'model_graph_format'
model_data_format = 'model_data_format'
models = 'models'
platform = 'platform'
model_file_path = 'model_file_path'
model_sha256_checksum = 'model_sha256_checksum'
weight_file_path = 'weight_file_path'
weight_sha256_checksum = 'weight_sha256_checksum'
subgraphs = 'subgraphs'
input_tensors = 'input_tensors'
input_shapes = 'input_shapes'
input_ranges = 'input_ranges'
output_tensors = 'output_tensors'
output_shapes = 'output_shapes'
check_tensors = 'check_tensors'
check_shapes = 'check_shapes'
runtime = 'runtime'
data_type = 'data_type'
input_data_types = 'input_data_types'
input_data_formats = 'input_data_formats'
output_data_formats = 'output_data_formats'
limit_opencl_kernel_time = 'limit_opencl_kernel_time'
nnlib_graph_mode = 'nnlib_graph_mode'
obfuscate = 'obfuscate'
winograd = 'winograd'
quantize = 'quantize'
quantize_range_file = 'quantize_range_file'
change_concat_ranges = 'change_concat_ranges'
validation_inputs_data = 'validation_inputs_data'
validation_threshold = 'validation_threshold'
graph_optimize_options = 'graph_optimize_options' # internal use for now
cl_mem_type = 'cl_mem_type'
class ModuleName(object):
YAML_CONFIG = 'YAML CONFIG'
MODEL_CONVERTER = 'Model Converter'
RUN = 'RUN'
BENCHMARK = 'Benchmark'
class ValidationThreshold(object):
cpu_threshold = 0.999,
gpu_threshold = 0.995,
hexagon_threshold = 0.930,
cpu_quantize_threshold = 0.980,
CPP_KEYWORDS = [
......@@ -260,7 +159,7 @@ def parse_device_type(runtime):
def get_hexagon_mode(configs):
runtime_list = []
for model_name in configs[YAMLKeyword.models]:
model_runtime =\
model_runtime = \
configs[YAMLKeyword.models][model_name].get(
YAMLKeyword.runtime, "")
runtime_list.append(model_runtime.lower())
......@@ -273,7 +172,7 @@ def get_hexagon_mode(configs):
def get_opencl_mode(configs):
runtime_list = []
for model_name in configs[YAMLKeyword.models]:
model_runtime =\
model_runtime = \
configs[YAMLKeyword.models][model_name].get(
YAMLKeyword.runtime, "")
runtime_list.append(model_runtime.lower())
......@@ -331,7 +230,7 @@ def format_model_config(flags):
target_socs = configs.get(YAMLKeyword.target_socs, "")
if flags.target_socs:
configs[YAMLKeyword.target_socs] = \
[soc.lower() for soc in flags.target_socs.split(',')]
[soc.lower() for soc in flags.target_socs.split(',')]
elif not target_socs:
configs[YAMLKeyword.target_socs] = []
elif not isinstance(target_socs, list):
......@@ -347,7 +246,9 @@ def format_model_config(flags):
if ALL_SOC_TAG in target_socs:
mace_check(available_socs,
ModuleName.YAML_CONFIG,
"Build for all SOCs plugged in computer, "
"Android abi is listed in config file and "
"build for all SOCs plugged in computer, "
"But no android phone found, "
"you at least plug in one phone")
else:
for soc in target_socs:
......@@ -412,7 +313,7 @@ def format_model_config(flags):
weight_file_path = model_config.get(YAMLKeyword.weight_file_path, "")
if weight_file_path:
weight_checksum =\
weight_checksum = \
model_config.get(YAMLKeyword.weight_sha256_checksum, "")
mace_check(weight_checksum != "", ModuleName.YAML_CONFIG,
"'%s' is necessary" %
......@@ -538,14 +439,15 @@ def format_model_config(flags):
YAMLKeyword.validation_threshold, {})
if not isinstance(validation_threshold, dict):
raise argparse.ArgumentTypeError(
'similarity threshold must be a dict.')
'similarity threshold must be a dict.')
threshold_dict = {
DeviceType.CPU: 0.999,
DeviceType.GPU: 0.995,
DeviceType.HEXAGON: 0.930,
DeviceType.CPU + "_QUANTIZE": 0.980,
}
DeviceType.CPU: ValidationThreshold.cpu_threshold,
DeviceType.GPU: ValidationThreshold.gpu_threshold,
DeviceType.HEXAGON: ValidationThreshold.hexagon_threshold,
DeviceType.CPU + "_QUANTIZE":
ValidationThreshold.cpu_quantize_threshold,
}
for k, v in six.iteritems(validation_threshold):
if k.upper() == 'DSP':
k = DeviceType.HEXAGON
......@@ -554,7 +456,7 @@ def format_model_config(flags):
DeviceType.HEXAGON,
DeviceType.CPU + "_QUANTIZE"):
raise argparse.ArgumentTypeError(
'Unsupported validation threshold runtime: %s' % k)
'Unsupported validation threshold runtime: %s' % k)
threshold_dict[k.upper()] = v
subgraph[YAMLKeyword.validation_threshold] = threshold_dict
......@@ -573,7 +475,7 @@ def format_model_config(flags):
subgraph[YAMLKeyword.input_ranges] = [input_ranges]
else:
subgraph[YAMLKeyword.input_ranges] = input_ranges
subgraph[YAMLKeyword.input_ranges] =\
subgraph[YAMLKeyword.input_ranges] = \
[str(v) for v in subgraph[YAMLKeyword.input_ranges]]
for key in [YAMLKeyword.limit_opencl_kernel_time,
......@@ -598,67 +500,6 @@ def format_model_config(flags):
return configs
def get_build_binary_dir(library_name, target_abi):
return "%s/%s/%s/%s" % (
BUILD_OUTPUT_DIR, library_name, BUILD_TMP_DIR_NAME, target_abi)
def get_build_model_dirs(library_name, model_name, target_abi, target_soc,
serial_num, model_file_path):
model_path_digest = md5sum(model_file_path)
model_output_base_dir = "%s/%s/%s/%s/%s" % (
BUILD_OUTPUT_DIR, library_name, BUILD_TMP_DIR_NAME,
model_name, model_path_digest)
if target_abi == ABIType.host:
model_output_dir = "%s/%s" % (model_output_base_dir, target_abi)
elif not target_soc or not serial_num:
model_output_dir = "%s/%s/%s" % (
model_output_base_dir, BUILD_TMP_GENERAL_OUTPUT_DIR_NAME,
target_abi)
else:
device_name = \
sh_commands.adb_get_device_name_by_serialno(serial_num)
model_output_dir = "%s/%s_%s/%s" % (
model_output_base_dir, device_name,
target_soc, target_abi)
mace_model_dir = \
'%s/%s/%s' % (BUILD_OUTPUT_DIR, library_name, MODEL_OUTPUT_DIR_NAME)
return model_output_base_dir, model_output_dir, mace_model_dir
def get_opencl_binary_output_path(library_name, target_abi,
target_soc, serial_num):
device_name = \
sh_commands.adb_get_device_name_by_serialno(serial_num)
return '%s/%s/%s/%s/%s_%s.%s.%s.bin' % \
(BUILD_OUTPUT_DIR,
library_name,
OUTPUT_OPENCL_BINARY_DIR_NAME,
target_abi,
library_name,
OUTPUT_OPENCL_BINARY_FILE_NAME,
device_name,
target_soc)
def get_opencl_parameter_output_path(library_name, target_abi,
target_soc, serial_num):
device_name = \
sh_commands.adb_get_device_name_by_serialno(serial_num)
return '%s/%s/%s/%s/%s_%s.%s.%s.bin' % \
(BUILD_OUTPUT_DIR,
library_name,
OUTPUT_OPENCL_BINARY_DIR_NAME,
target_abi,
library_name,
OUTPUT_OPENCL_PARAMETER_FILE_NAME,
device_name,
target_soc)
def clear_build_dirs(library_name):
# make build dir
if not os.path.exists(BUILD_OUTPUT_DIR):
......@@ -676,27 +517,6 @@ def clear_build_dirs(library_name):
sh.rm('-rf', lib_output_dir)
def check_model_converted(library_name, model_name,
model_graph_format, model_data_format,
abi):
model_output_dir = \
'%s/%s/%s' % (BUILD_OUTPUT_DIR, library_name, MODEL_OUTPUT_DIR_NAME)
if model_graph_format == ModelFormat.file:
mace_check(os.path.exists("%s/%s.pb" % (model_output_dir, model_name)),
ModuleName.RUN,
"You should convert model first.")
else:
model_lib_path = get_model_lib_output_path(library_name, abi)
mace_check(os.path.exists(model_lib_path),
ModuleName.RUN,
"You should convert model first.")
if model_data_format == ModelFormat.file:
mace_check(os.path.exists("%s/%s.data" %
(model_output_dir, model_name)),
ModuleName.RUN,
"You should convert model first.")
################################
# convert
################################
......@@ -883,13 +703,6 @@ def convert_model(configs, cl_mem_type):
StringFormatter.block("Model %s converted" % model_name))
def get_model_lib_output_path(library_name, abi):
lib_output_path = os.path.join(BUILD_OUTPUT_DIR, library_name,
MODEL_OUTPUT_DIR_NAME, abi,
"%s.a" % library_name)
return lib_output_path
def build_model_lib(configs, address_sanitizer):
MaceLogger.header(StringFormatter.block("Building model library"))
......@@ -902,10 +715,11 @@ def build_model_lib(configs, address_sanitizer):
library_out_dir = os.path.dirname(model_lib_output_path)
if not os.path.exists(library_out_dir):
os.makedirs(library_out_dir)
toolchain = infer_toolchain(target_abi)
sh_commands.bazel_build(
MODEL_LIB_TARGET,
abi=target_abi,
toolchain=toolchain,
hexagon_mode=hexagon_mode,
enable_opencl=get_opencl_mode(configs),
enable_quantize=get_quantize_mode(configs),
......@@ -994,8 +808,8 @@ def report_run_statistics(stdout,
f.write(data_str)
def build_mace_run(configs, target_abi, enable_openmp, address_sanitizer,
mace_lib_type):
def build_mace_run(configs, target_abi, toolchain, enable_openmp,
address_sanitizer, mace_lib_type):
library_name = configs[YAMLKeyword.library_name]
hexagon_mode = get_hexagon_mode(configs)
......@@ -1019,6 +833,7 @@ def build_mace_run(configs, target_abi, enable_openmp, address_sanitizer,
sh_commands.bazel_build(
mace_run_target,
abi=target_abi,
toolchain=toolchain,
hexagon_mode=hexagon_mode,
enable_openmp=enable_openmp,
enable_opencl=get_opencl_mode(configs),
......@@ -1031,8 +846,8 @@ def build_mace_run(configs, target_abi, enable_openmp, address_sanitizer,
mace_lib_type == MACELibType.dynamic)
def build_example(configs, target_abi, enable_openmp, address_sanitizer,
mace_lib_type):
def build_example(configs, target_abi, toolchain,
enable_openmp, mace_lib_type):
library_name = configs[YAMLKeyword.library_name]
hexagon_mode = get_hexagon_mode(configs)
......@@ -1042,6 +857,7 @@ def build_example(configs, target_abi, enable_openmp, address_sanitizer,
os.makedirs(build_tmp_binary_dir)
symbol_hidden = True
libmace_target = LIBMACE_STATIC_TARGET
if mace_lib_type == MACELibType.dynamic:
symbol_hidden = False
......@@ -1049,11 +865,12 @@ def build_example(configs, target_abi, enable_openmp, address_sanitizer,
sh_commands.bazel_build(libmace_target,
abi=target_abi,
toolchain=toolchain,
enable_openmp=enable_openmp,
enable_opencl=get_opencl_mode(configs),
enable_quantize=get_quantize_mode(configs),
hexagon_mode=hexagon_mode,
address_sanitizer=address_sanitizer,
address_sanitizer=flags.address_sanitizer,
symbol_hidden=symbol_hidden)
if os.path.exists(LIB_CODEGEN_DIR):
......@@ -1079,11 +896,12 @@ def build_example(configs, target_abi, enable_openmp, address_sanitizer,
sh_commands.bazel_build(example_target,
abi=target_abi,
toolchain=toolchain,
enable_openmp=enable_openmp,
enable_opencl=get_opencl_mode(configs),
enable_quantize=get_quantize_mode(configs),
hexagon_mode=hexagon_mode,
address_sanitizer=address_sanitizer,
address_sanitizer=flags.address_sanitizer,
extra_args=build_arg)
target_bin = "/".join(sh_commands.bazel_target_to_bin(example_target))
......@@ -1092,296 +910,6 @@ def build_example(configs, target_abi, enable_openmp, address_sanitizer,
sh.rm("-rf", LIB_CODEGEN_DIR)
def tuning(library_name, model_name, model_config,
model_graph_format, model_data_format,
target_abi, target_soc, serial_num,
mace_lib_type):
six.print_('* Tuning, it may take some time...')
build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi)
mace_run_name = MACE_RUN_STATIC_NAME
link_dynamic = False
if mace_lib_type == MACELibType.dynamic:
mace_run_name = MACE_RUN_DYNAMIC_NAME
link_dynamic = True
embed_model_data = model_data_format == ModelFormat.code
model_output_base_dir, model_output_dir, mace_model_dir = \
get_build_model_dirs(library_name, model_name, target_abi,
target_soc, serial_num,
model_config[YAMLKeyword.model_file_path])
# build for specified soc
sh_commands.clear_phone_data_dir(serial_num, PHONE_DATA_DIR)
subgraphs = model_config[YAMLKeyword.subgraphs]
# generate input data
sh_commands.gen_random_input(
model_output_dir,
subgraphs[0][YAMLKeyword.input_tensors],
subgraphs[0][YAMLKeyword.input_shapes],
subgraphs[0][YAMLKeyword.validation_inputs_data],
input_ranges=subgraphs[0][YAMLKeyword.input_ranges],
input_data_types=subgraphs[0][YAMLKeyword.input_data_types])
sh_commands.tuning_run(
abi=target_abi,
serialno=serial_num,
target_dir=build_tmp_binary_dir,
target_name=mace_run_name,
vlog_level=0,
embed_model_data=embed_model_data,
model_output_dir=model_output_dir,
input_nodes=subgraphs[0][YAMLKeyword.input_tensors],
output_nodes=subgraphs[0][YAMLKeyword.output_tensors],
input_shapes=subgraphs[0][YAMLKeyword.input_shapes],
output_shapes=subgraphs[0][YAMLKeyword.output_shapes],
mace_model_dir=mace_model_dir,
model_tag=model_name,
device_type=DeviceType.GPU,
running_round=0,
restart_round=1,
limit_opencl_kernel_time=model_config[YAMLKeyword.limit_opencl_kernel_time], # noqa
tuning=True,
out_of_range_check=False,
phone_data_dir=PHONE_DATA_DIR,
model_graph_format=model_graph_format,
opencl_binary_file="",
opencl_parameter_file="",
libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH,
link_dynamic=link_dynamic,
)
# pull opencl binary
sh_commands.pull_file_from_device(
serial_num,
DEVICE_INTERIOR_DIR,
CL_COMPILED_BINARY_FILE_NAME,
"%s/%s" % (model_output_dir, BUILD_TMP_OPENCL_BIN_DIR))
# pull opencl parameter
sh_commands.pull_file_from_device(
serial_num,
PHONE_DATA_DIR,
CL_TUNED_PARAMETER_FILE_NAME,
"%s/%s" % (model_output_dir, BUILD_TMP_OPENCL_BIN_DIR))
six.print_('Tuning done\n')
def run_specific_target(flags, configs, target_abi,
target_soc, serial_num):
library_name = configs[YAMLKeyword.library_name]
mace_lib_type = flags.mace_lib_type
embed_model_data = \
configs[YAMLKeyword.model_data_format] == ModelFormat.code
build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi)
# get target name for run
if flags.example:
if mace_lib_type == MACELibType.static:
target_name = EXAMPLE_STATIC_NAME
else:
target_name = EXAMPLE_DYNAMIC_NAME
else:
if mace_lib_type == MACELibType.static:
target_name = MACE_RUN_STATIC_NAME
else:
target_name = MACE_RUN_DYNAMIC_NAME
link_dynamic = mace_lib_type == MACELibType.dynamic
model_output_dirs = []
for model_name in configs[YAMLKeyword.models]:
check_model_converted(library_name, model_name,
configs[YAMLKeyword.model_graph_format],
configs[YAMLKeyword.model_data_format],
target_abi)
if target_abi == ABIType.host:
device_name = ABIType.host
else:
device_name = \
sh_commands.adb_get_device_name_by_serialno(serial_num)
sh_commands.clear_phone_data_dir(serial_num, PHONE_DATA_DIR)
MaceLogger.header(
StringFormatter.block(
"Run model %s on %s" % (model_name, device_name)))
model_config = configs[YAMLKeyword.models][model_name]
model_runtime = model_config[YAMLKeyword.runtime]
subgraphs = model_config[YAMLKeyword.subgraphs]
if not configs[YAMLKeyword.target_socs] or target_abi == ABIType.host:
model_output_base_dir, model_output_dir, mace_model_dir = \
get_build_model_dirs(library_name, model_name, target_abi,
None, None,
model_config[YAMLKeyword.model_file_path])
else:
model_output_base_dir, model_output_dir, mace_model_dir = \
get_build_model_dirs(library_name, model_name, target_abi,
target_soc, serial_num,
model_config[YAMLKeyword.model_file_path])
# clear temp model output dir
if os.path.exists(model_output_dir):
sh.rm("-rf", model_output_dir)
os.makedirs(model_output_dir)
is_tuned = False
model_opencl_output_bin_path = ""
model_opencl_parameter_path = ""
# tuning for specified soc
if not flags.address_sanitizer \
and not flags.example \
and target_abi != ABIType.host \
and configs[YAMLKeyword.target_socs] \
and target_soc \
and model_runtime in [RuntimeType.gpu, RuntimeType.cpu_gpu] \
and not flags.disable_tuning:
tuning(library_name, model_name, model_config,
configs[YAMLKeyword.model_graph_format],
configs[YAMLKeyword.model_data_format],
target_abi, target_soc, serial_num,
mace_lib_type)
model_output_dirs.append(model_output_dir)
model_opencl_output_bin_path =\
"%s/%s/%s" % (model_output_dir,
BUILD_TMP_OPENCL_BIN_DIR,
CL_COMPILED_BINARY_FILE_NAME)
model_opencl_parameter_path = \
"%s/%s/%s" % (model_output_dir,
BUILD_TMP_OPENCL_BIN_DIR,
CL_TUNED_PARAMETER_FILE_NAME)
sh_commands.clear_phone_data_dir(serial_num, PHONE_DATA_DIR)
is_tuned = True
elif target_abi != ABIType.host and target_soc:
model_opencl_output_bin_path = get_opencl_binary_output_path(
library_name, target_abi, target_soc, serial_num
)
model_opencl_parameter_path = get_opencl_parameter_output_path(
library_name, target_abi, target_soc, serial_num
)
# generate input data
sh_commands.gen_random_input(
model_output_dir,
subgraphs[0][YAMLKeyword.input_tensors],
subgraphs[0][YAMLKeyword.input_shapes],
subgraphs[0][YAMLKeyword.validation_inputs_data],
input_ranges=subgraphs[0][YAMLKeyword.input_ranges],
input_data_types=subgraphs[0][YAMLKeyword.input_data_types])
runtime_list = []
if target_abi == ABIType.host:
runtime_list.extend([RuntimeType.cpu])
elif model_runtime == RuntimeType.cpu_gpu:
runtime_list.extend([RuntimeType.cpu, RuntimeType.gpu])
else:
runtime_list.extend([model_runtime])
for runtime in runtime_list:
device_type = parse_device_type(runtime)
# run for specified soc
if not subgraphs[0][YAMLKeyword.check_tensors]:
output_nodes = subgraphs[0][YAMLKeyword.output_tensors]
output_shapes = subgraphs[0][YAMLKeyword.output_shapes]
else:
output_nodes = subgraphs[0][YAMLKeyword.check_tensors]
output_shapes = subgraphs[0][YAMLKeyword.check_shapes]
run_output = sh_commands.tuning_run(
abi=target_abi,
serialno=serial_num,
target_dir=build_tmp_binary_dir,
target_name=target_name,
vlog_level=flags.vlog_level,
embed_model_data=embed_model_data,
model_output_dir=model_output_dir,
input_nodes=subgraphs[0][YAMLKeyword.input_tensors],
output_nodes=output_nodes,
input_shapes=subgraphs[0][YAMLKeyword.input_shapes],
output_shapes=output_shapes,
mace_model_dir=mace_model_dir,
model_tag=model_name,
device_type=device_type,
running_round=flags.round,
restart_round=flags.restart_round,
limit_opencl_kernel_time=model_config[YAMLKeyword.limit_opencl_kernel_time], # noqa
tuning=False,
out_of_range_check=flags.gpu_out_of_range_check,
phone_data_dir=PHONE_DATA_DIR,
model_graph_format=configs[YAMLKeyword.model_graph_format],
omp_num_threads=flags.omp_num_threads,
cpu_affinity_policy=flags.cpu_affinity_policy,
gpu_perf_hint=flags.gpu_perf_hint,
gpu_priority_hint=flags.gpu_priority_hint,
input_dir=flags.input_dir,
output_dir=flags.output_dir,
runtime_failure_ratio=flags.runtime_failure_ratio,
address_sanitizer=flags.address_sanitizer,
opencl_binary_file=model_opencl_output_bin_path,
opencl_parameter_file=model_opencl_parameter_path,
libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH,
link_dynamic=link_dynamic,
quantize_stat=flags.quantize_stat,
)
if flags.validate:
model_file_path, weight_file_path = get_model_files(
model_config[YAMLKeyword.model_file_path],
model_config[YAMLKeyword.model_sha256_checksum],
BUILD_DOWNLOADS_DIR,
model_config[YAMLKeyword.weight_file_path],
model_config[YAMLKeyword.weight_sha256_checksum])
validate_type = device_type
if model_config[YAMLKeyword.quantize] == 1 \
and device_type == DeviceType.CPU:
validate_type = device_type + "_QUANTIZE"
sh_commands.validate_model(
abi=target_abi,
serialno=serial_num,
model_file_path=model_file_path,
weight_file_path=weight_file_path,
platform=model_config[YAMLKeyword.platform],
device_type=device_type,
input_nodes=subgraphs[0][YAMLKeyword.input_tensors],
output_nodes=output_nodes,
input_shapes=subgraphs[0][YAMLKeyword.input_shapes],
output_shapes=output_shapes,
model_output_dir=model_output_dir,
phone_data_dir=PHONE_DATA_DIR,
input_data_types=subgraphs[0][YAMLKeyword.input_data_types], # noqa
caffe_env=flags.caffe_env,
validation_threshold=subgraphs[0][YAMLKeyword.validation_threshold][validate_type]) # noqa
if flags.report and flags.round > 0:
tuned = is_tuned and device_type == DeviceType.GPU
report_run_statistics(
run_output, target_abi, serial_num,
model_name, device_type, flags.report_dir,
tuned)
if model_output_dirs:
opencl_output_bin_path = get_opencl_binary_output_path(
library_name, target_abi, target_soc, serial_num
)
opencl_parameter_bin_path = get_opencl_parameter_output_path(
library_name, target_abi, target_soc, serial_num
)
# clear opencl output dir
if os.path.exists(opencl_output_bin_path):
sh.rm('-rf', opencl_output_bin_path)
if os.path.exists(opencl_parameter_bin_path):
sh.rm('-rf', opencl_parameter_bin_path)
# merge all models' OpenCL binaries together
sh_commands.merge_opencl_binaries(
model_output_dirs, CL_COMPILED_BINARY_FILE_NAME,
opencl_output_bin_path)
# merge all models' OpenCL parameters together
sh_commands.merge_opencl_parameters(
model_output_dirs, CL_TUNED_PARAMETER_FILE_NAME,
opencl_parameter_bin_path)
def print_package_summary(package_path):
title = "Library"
header = ["key", "value"]
......@@ -1398,36 +926,37 @@ def run_mace(flags):
clear_build_dirs(configs[YAMLKeyword.library_name])
target_socs = configs[YAMLKeyword.target_socs]
if not target_socs or ALL_SOC_TAG in target_socs:
target_socs = sh_commands.adb_get_all_socs()
device_list = DeviceManager.list_devices(flags.device_yml)
if target_socs and ALL_SOC_TAG not in target_socs:
device_list = [dev for dev in device_list
if dev[YAMLKeyword.target_socs].lower() in target_socs]
for target_abi in configs[YAMLKeyword.target_abis]:
# build target
if flags.example:
build_example(configs, target_abi,
not flags.disable_openmp,
flags.address_sanitizer,
flags.mace_lib_type)
else:
build_mace_run(configs, target_abi,
not flags.disable_openmp,
flags.address_sanitizer,
flags.mace_lib_type)
# run
if target_abi == ABIType.host:
run_specific_target(flags, configs, target_abi, None, None)
else:
for target_soc in target_socs:
serial_nums = \
sh_commands.get_target_socs_serialnos([target_soc])
mace_check(serial_nums,
ModuleName.RUN,
'There is no device with soc: ' + target_soc)
for serial_num in serial_nums:
with sh_commands.device_lock(serial_num):
run_specific_target(flags, configs, target_abi,
target_soc, serial_num)
for dev in device_list:
if target_abi in dev[YAMLKeyword.target_abis]:
# get toolchain
toolchain = infer_toolchain(target_abi)
if flags.example:
build_example(configs,
target_abi,
toolchain,
not flags.disable_openmp,
flags.mace_lib_type)
else:
build_mace_run(configs,
target_abi,
toolchain,
not flags.disable_openmp,
flags.address_sanitizer,
flags.mace_lib_type)
# run
device = DeviceWrapper(dev)
with device.lock():
device.run_specify_abi(flags, configs, target_abi)
elif dev[YAMLKeyword.device_name] != SystemType.host:
six.print_('The device with soc %s do not support abi %s' %
(dev[YAMLKeyword.target_socs], target_abi),
file=sys.stderr)
# package the output files
package_path = sh_commands.packaging_lib(BUILD_OUTPUT_DIR,
......@@ -1438,7 +967,11 @@ def run_mace(flags):
################################
# benchmark model
################################
def build_benchmark_model(configs, target_abi, enable_openmp, mace_lib_type):
def build_benchmark_model(configs,
target_abi,
toolchain,
enable_openmp,
mace_lib_type):
library_name = configs[YAMLKeyword.library_name]
hexagon_mode = get_hexagon_mode(configs)
......@@ -1459,6 +992,7 @@ def build_benchmark_model(configs, target_abi, enable_openmp, mace_lib_type):
sh_commands.bazel_build(benchmark_target,
abi=target_abi,
toolchain=toolchain,
enable_openmp=enable_openmp,
enable_opencl=get_opencl_mode(configs),
enable_quantize=get_quantize_mode(configs),
......@@ -1475,133 +1009,34 @@ def build_benchmark_model(configs, target_abi, enable_openmp, mace_lib_type):
sh.cp("-f", target_bin, build_tmp_binary_dir)
def bm_specific_target(flags, configs, target_abi, target_soc, serial_num):
library_name = configs[YAMLKeyword.library_name]
embed_model_data = \
configs[YAMLKeyword.model_data_format] == ModelFormat.code
opencl_output_bin_path = ""
opencl_parameter_path = ""
link_dynamic = flags.mace_lib_type == MACELibType.dynamic
if link_dynamic:
bm_model_binary_name = BM_MODEL_DYNAMIC_NAME
else:
bm_model_binary_name = BM_MODEL_STATIC_NAME
build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi)
if configs[YAMLKeyword.target_socs] and target_abi != ABIType.host:
opencl_output_bin_path = get_opencl_binary_output_path(
library_name, target_abi, target_soc, serial_num
)
opencl_parameter_path = get_opencl_parameter_output_path(
library_name, target_abi, target_soc, serial_num
)
for model_name in configs[YAMLKeyword.models]:
check_model_converted(library_name, model_name,
configs[YAMLKeyword.model_graph_format],
configs[YAMLKeyword.model_data_format],
target_abi)
if target_abi == ABIType.host:
device_name = ABIType.host
else:
device_name = \
sh_commands.adb_get_device_name_by_serialno(serial_num)
MaceLogger.header(
StringFormatter.block(
"Benchmark model %s on %s" % (model_name, device_name)))
model_config = configs[YAMLKeyword.models][model_name]
model_runtime = model_config[YAMLKeyword.runtime]
subgraphs = model_config[YAMLKeyword.subgraphs]
if not configs[YAMLKeyword.target_socs] or target_abi == ABIType.host:
model_output_base_dir, model_output_dir, mace_model_dir = \
get_build_model_dirs(library_name, model_name, target_abi,
None, None,
model_config[YAMLKeyword.model_file_path])
else:
model_output_base_dir, model_output_dir, mace_model_dir = \
get_build_model_dirs(library_name, model_name, target_abi,
target_soc, serial_num,
model_config[YAMLKeyword.model_file_path])
if os.path.exists(model_output_dir):
sh.rm("-rf", model_output_dir)
os.makedirs(model_output_dir)
if target_abi != ABIType.host:
sh_commands.clear_phone_data_dir(serial_num, PHONE_DATA_DIR)
sh_commands.gen_random_input(
model_output_dir,
subgraphs[0][YAMLKeyword.input_tensors],
subgraphs[0][YAMLKeyword.input_shapes],
subgraphs[0][YAMLKeyword.validation_inputs_data],
input_ranges=subgraphs[0][YAMLKeyword.input_ranges],
input_data_types=subgraphs[0][YAMLKeyword.input_data_types])
runtime_list = []
if target_abi == ABIType.host:
runtime_list.extend([RuntimeType.cpu])
elif model_runtime == RuntimeType.cpu_gpu:
runtime_list.extend([RuntimeType.cpu, RuntimeType.gpu])
else:
runtime_list.extend([model_runtime])
for runtime in runtime_list:
device_type = parse_device_type(runtime)
sh_commands.benchmark_model(
abi=target_abi,
serialno=serial_num,
benchmark_binary_dir=build_tmp_binary_dir,
benchmark_binary_name=bm_model_binary_name,
vlog_level=0,
embed_model_data=embed_model_data,
model_output_dir=model_output_dir,
input_nodes=subgraphs[0][YAMLKeyword.input_tensors],
output_nodes=subgraphs[0][YAMLKeyword.output_tensors],
input_shapes=subgraphs[0][YAMLKeyword.input_shapes],
output_shapes=subgraphs[0][YAMLKeyword.output_shapes],
mace_model_dir=mace_model_dir,
model_tag=model_name,
device_type=device_type,
phone_data_dir=PHONE_DATA_DIR,
model_graph_format=configs[YAMLKeyword.model_graph_format],
omp_num_threads=flags.omp_num_threads,
cpu_affinity_policy=flags.cpu_affinity_policy,
gpu_perf_hint=flags.gpu_perf_hint,
gpu_priority_hint=flags.gpu_priority_hint,
opencl_binary_file=opencl_output_bin_path,
opencl_parameter_file=opencl_parameter_path,
libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH,
link_dynamic=link_dynamic)
def benchmark_model(flags):
configs = format_model_config(flags)
clear_build_dirs(configs[YAMLKeyword.library_name])
target_socs = configs[YAMLKeyword.target_socs]
if not target_socs or ALL_SOC_TAG in target_socs:
target_socs = sh_commands.adb_get_all_socs()
device_list = DeviceManager.list_devices(flags.device_yml)
if target_socs and ALL_SOC_TAG not in target_socs:
device_list = [dev for dev in device_list
if dev[YAMLKeyword.target_socs].lower() in target_socs]
for target_abi in configs[YAMLKeyword.target_abis]:
# build benchmark_model binary
build_benchmark_model(configs, target_abi,
not flags.disable_openmp,
flags.mace_lib_type)
if target_abi == ABIType.host:
bm_specific_target(flags, configs, target_abi, None, None)
else:
for target_soc in target_socs:
serial_nums = \
sh_commands.get_target_socs_serialnos([target_soc])
mace_check(serial_nums,
ModuleName.BENCHMARK,
'There is no device with soc: ' + target_soc)
for serial_num in serial_nums:
with sh_commands.device_lock(serial_num):
bm_specific_target(flags, configs, target_abi,
target_soc, serial_num)
for dev in device_list:
if target_abi in dev[YAMLKeyword.target_abis]:
toolchain = infer_toolchain(target_abi)
build_benchmark_model(configs,
target_abi,
toolchain,
not flags.disable_openmp,
flags.mace_lib_type)
device = DeviceWrapper(dev)
with device.lock():
device.bm_specific_target(flags, configs, target_abi)
else:
six.print_('There is no abi %s with soc %s' %
(target_abi, dev[YAMLKeyword.target_socs]),
file=sys.stderr)
################################
......@@ -1698,7 +1133,12 @@ def parse_args():
type=int,
default=DefaultValues.gpu_priority_hint,
help="0:DEFAULT/1:LOW/2:NORMAL/3:HIGH")
run_bm_parent_parser.add_argument(
"--device_yml",
type=str,
default='',
help='embedded linux device config yml file'
)
parser = argparse.ArgumentParser()
subparsers = parser.add_subparsers()
convert = subparsers.add_parser(
......
# Copyright 2018 Xiaomi, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import socket
import subprocess
import time
import six
import sh
import yaml
import common
from common import *
import sh_commands
class DeviceWrapper:
allow_scheme = ('ssh', 'adb')
def __init__(self, device_dict):
"""
init device with device dict info
:type device_dict: Device
:param device_dict: a key-value dict that holds the device information,
which attribute has:
device_name, target_abis, target_socs, system,
address, username
"""
diff = set(device_dict.keys()) - set(YAMLKeyword.__dict__.keys())
if len(diff) > 0:
six.print_('Wrong key detected: ')
six.print_(diff)
raise KeyError(str(diff))
self.__dict__.update(device_dict)
if self.system == SystemType.android:
self.data_dir = PHONE_DATA_DIR
self.interior_dir = self.data_dir + '/interior'
elif self.system == SystemType.arm_linux:
try:
sh.ssh('-q', '{}@{}'.format(self.username, self.address),
'exit')
except sh.ErrorReturnCode as e:
six.print_('device connect failed, '
'please check your authentication')
raise e
self.data_dir = DEVICE_DATA_DIR
self.interior_dir = self.data_dir + '/interior'
##################
# internal use #
##################
def exec_command(self, command, *args, **kwargs):
if self.system == SystemType.android:
sh.adb('-s', self.address, 'shell', command, *args, **kwargs)
elif self.system == SystemType.arm_linux:
sh.ssh('{}@{}'.format(self.username, self.address),
command, *args, **kwargs)
#####################
# public interface #
#####################
def is_lock(self):
return sh_commands.is_device_locked(self.address)
def lock(self):
return sh_commands.device_lock(self.address)
def clear_data_dir(self):
if self.system == SystemType.android:
sh_commands.clear_phone_data_dir(self.address, PHONE_DATA_DIR)
elif self.system == SystemType.arm_linux:
self.exec_command('rm -rf {}'.format(self.data_dir))
def pull_from_data_dir(self, filename, dst_path):
if self.system == SystemType.android:
self.pull(PHONE_DATA_DIR, filename, dst_path)
elif self.system == SystemType.arm_linux:
self.pull(DEVICE_DATA_DIR, filename, dst_path)
def create_internal_storage_dir(self):
internal_storage_dir = '{}/interior/'.format(self.data_dir)
if self.system == SystemType.android:
sh_commands.create_internal_storage_dir(self.address,
internal_storage_dir)
elif self.system == SystemType.arm_linux:
self.exec_command('mkdir -p {}'.format(internal_storage_dir))
return internal_storage_dir
def rm(self, file):
if self.system == SystemType.android:
sh.adb('-s', self.address, 'shell', 'rm', '-rf', file, _fg=True)
elif self.system == SystemType.arm_linux:
self.exec_command('rm -rf {}'.format(file), _fg=True)
def push(self, src_path, dst_path):
mace_check(os.path.exists(src_path), "Device",
'{} not found'.format(src_path))
six.print_("Push %s to %s" % (src_path, dst_path))
if self.system == SystemType.android:
sh_commands.adb_push(src_path, dst_path, self.address)
elif self.system == SystemType.arm_linux:
try:
sh.scp(src_path, '{}@{}:{}'.format(self.username,
self.address,
dst_path))
except sh.ErrorReturnCode_1 as e:
six.print_('Push Failed !', e, file=sys.stderr)
raise e
def pull(self, src_path, file_name, dst_path='.'):
if not os.path.exists(dst_path):
sh.mkdir("-p", dst_path)
src_file = "%s/%s" % (src_path, file_name)
dst_file = "%s/%s" % (dst_path, file_name)
if os.path.exists(dst_file):
sh.rm('-f', dst_file)
six.print_("Pull %s to %s" % (src_path, dst_path))
if self.system == SystemType.android:
sh_commands.adb_pull(
src_file, dst_file, self.address)
elif self.system == SystemType.arm_linux:
try:
sh.scp('-r', '%s@%s:%s' % (self.username,
self.address,
src_file),
dst_file)
except sh.ErrorReturnCode_1 as e:
six.print_("Pull Failed !", file=sys.stderr)
raise e
def tuning_run(self,
abi,
target_dir,
target_name,
vlog_level,
embed_model_data,
model_output_dir,
input_nodes,
output_nodes,
input_shapes,
output_shapes,
mace_model_dir,
model_tag,
device_type,
running_round,
restart_round,
limit_opencl_kernel_time,
tuning,
out_of_range_check,
model_graph_format,
opencl_binary_file,
opencl_parameter_file,
libmace_dynamic_library_path,
omp_num_threads=-1,
cpu_affinity_policy=1,
gpu_perf_hint=3,
gpu_priority_hint=3,
input_file_name='model_input',
output_file_name='model_out',
runtime_failure_ratio=0.0,
address_sanitizer=False,
link_dynamic=False
):
six.print_("* Run '%s' with round=%s, restart_round=%s, tuning=%s, "
"out_of_range_check=%s, omp_num_threads=%s, "
"cpu_affinity_policy=%s, gpu_perf_hint=%s, "
"gpu_priority_hint=%s" %
(model_tag, running_round, restart_round, str(tuning),
str(out_of_range_check), omp_num_threads,
cpu_affinity_policy, gpu_perf_hint, gpu_priority_hint))
mace_model_path = ""
if model_graph_format == ModelFormat.file:
mace_model_path = "%s/%s.pb" % (mace_model_dir, model_tag)
if self.system == SystemType.host:
libmace_dynamic_lib_path = \
os.path.dirname(libmace_dynamic_library_path)
p = subprocess.Popen(
[
"env",
"LD_LIBRARY_PATH=%s" % libmace_dynamic_lib_path,
"MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
"MACE_RUNTIME_FAILURE_RATIO=%f" % runtime_failure_ratio,
"%s/%s" % (target_dir, target_name),
"--model_name=%s" % model_tag,
"--input_node=%s" % ",".join(input_nodes),
"--output_node=%s" % ",".join(output_nodes),
"--input_shape=%s" % ":".join(input_shapes),
"--output_shape=%s" % ":".join(output_shapes),
"--input_file=%s/%s" % (model_output_dir,
input_file_name),
"--output_file=%s/%s" % (model_output_dir,
output_file_name),
"--model_data_file=%s/%s.data" % (mace_model_dir,
model_tag),
"--device=%s" % device_type,
"--round=%s" % running_round,
"--restart_round=%s" % restart_round,
"--omp_num_threads=%s" % omp_num_threads,
"--cpu_affinity_policy=%s" % cpu_affinity_policy,
"--gpu_perf_hint=%s" % gpu_perf_hint,
"--gpu_priority_hint=%s" % gpu_priority_hint,
"--model_file=%s" % mace_model_path,
],
stderr=subprocess.PIPE,
stdout=subprocess.PIPE)
out, err = p.communicate()
self.stdout = err + out
six.print_(self.stdout)
six.print_("Running finished!\n")
elif self.system in [SystemType.android, SystemType.arm_linux]:
self.rm(self.data_dir)
self.exec_command('mkdir -p {}'.format(self.data_dir))
internal_storage_dir = self.create_internal_storage_dir()
for input_name in input_nodes:
formatted_name = common.formatted_file_name(input_file_name,
input_name)
self.push("%s/%s" % (model_output_dir, formatted_name),
self.data_dir)
if self.system == SystemType.android and address_sanitizer:
self.push(sh_commands.find_asan_rt_library(abi),
self.data_dir)
if not embed_model_data:
model_data_path = "%s/%s.data" % (mace_model_dir, model_tag)
mace_check(os.path.exists(model_data_path), "Device",
'model data file not found,'
' please convert model first')
self.push(model_data_path, self.data_dir)
if device_type == common.DeviceType.GPU:
if os.path.exists(opencl_binary_file):
self.push(opencl_binary_file, self.data_dir)
if os.path.exists(opencl_parameter_file):
self.push(opencl_parameter_file, self.data_dir)
self.push("third_party/nnlib/libhexagon_controller.so",
self.data_dir)
mace_model_phone_path = ""
if model_graph_format == ModelFormat.file:
mace_model_phone_path = "%s/%s.pb" % (self.data_dir,
model_tag)
self.push(mace_model_path, mace_model_phone_path)
if link_dynamic:
self.push(libmace_dynamic_library_path, self.data_dir)
if self.system == SystemType.android:
sh_commands.push_depended_so_libs(
libmace_dynamic_library_path, abi, self.data_dir,
self.address)
self.push("%s/%s" % (target_dir, target_name), self.data_dir)
stdout_buff = []
process_output = sh_commands.make_output_processor(stdout_buff)
cmd = [
"LD_LIBRARY_PATH=%s" % self.data_dir,
"MACE_TUNING=%s" % int(tuning),
"MACE_OUT_OF_RANGE_CHECK=%s" % int(out_of_range_check),
"MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
"MACE_RUN_PARAMETER_PATH=%s/mace_run.config" % self.data_dir,
"MACE_INTERNAL_STORAGE_PATH=%s" % internal_storage_dir,
"MACE_LIMIT_OPENCL_KERNEL_TIME=%s" % limit_opencl_kernel_time,
"MACE_RUNTIME_FAILURE_RATIO=%f" % runtime_failure_ratio,
]
if self.system == SystemType.android and address_sanitizer:
cmd.extend([
"LD_PRELOAD=%s/%s" %
(self.data_dir,
sh_commands.asan_rt_library_names(abi))
])
cmd.extend([
"%s/%s" % (self.data_dir, target_name),
"--model_name=%s" % model_tag,
"--input_node=%s" % ",".join(input_nodes),
"--output_node=%s" % ",".join(output_nodes),
"--input_shape=%s" % ":".join(input_shapes),
"--output_shape=%s" % ":".join(output_shapes),
"--input_file=%s/%s" % (self.data_dir, input_file_name),
"--output_file=%s/%s" % (self.data_dir, output_file_name),
"--model_data_file=%s/%s.data" % (self.data_dir, model_tag),
"--device=%s" % device_type,
"--round=%s" % running_round,
"--restart_round=%s" % restart_round,
"--omp_num_threads=%s" % omp_num_threads,
"--cpu_affinity_policy=%s" % cpu_affinity_policy,
"--gpu_perf_hint=%s" % gpu_perf_hint,
"--gpu_priority_hint=%s" % gpu_priority_hint,
"--model_file=%s" % mace_model_phone_path,
"--opencl_binary_file=%s/%s" %
(self.data_dir, os.path.basename(opencl_binary_file)),
"--opencl_parameter_file=%s/%s" %
(self.data_dir, os.path.basename(opencl_parameter_file)),
])
cmd = ' '.join(cmd)
cmd_file_name = "%s-%s-%s" % ('cmd_file',
model_tag,
str(time.time()))
cmd_file = "%s/%s" % (self.data_dir, cmd_file_name)
tmp_cmd_file = "%s/%s" % ('/tmp', cmd_file_name)
with open(tmp_cmd_file, 'w') as file:
file.write(cmd)
self.push(tmp_cmd_file, cmd_file)
os.remove(tmp_cmd_file)
self.exec_command('sh {}'.format(cmd_file),
_tty_in=True,
_out=process_output,
_err_to_out=True)
self.stdout = "".join(stdout_buff)
if not sh_commands.stdout_success(self.stdout):
common.MaceLogger.error("Mace Run", "Mace run failed.")
six.print_("Running finished!\n")
else:
six.print_('Unsupported system %s' % self.system, file=sys.stderr)
raise Exception('Wrong device')
return self.stdout
def tuning(self, library_name, model_name, model_config,
model_graph_format, model_data_format,
target_abi, mace_lib_type):
six.print_('* Tuning, it may take some time')
build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi)
mace_run_name = MACE_RUN_STATIC_NAME
link_dynamic = False
if mace_lib_type == MACELibType.dynamic:
mace_run_name = MACE_RUN_DYNAMIC_NAME
link_dynamic = True
embed_model_data = model_data_format == ModelFormat.code
# build for specified soc
# device_wrapper = DeviceWrapper(device)
model_output_base_dir, model_output_dir, mace_model_dir = \
get_build_model_dirs(
library_name, model_name, target_abi, self,
model_config[YAMLKeyword.model_file_path])
self.clear_data_dir()
subgraphs = model_config[YAMLKeyword.subgraphs]
# generate input data
sh_commands.gen_random_input(
model_output_dir,
subgraphs[0][YAMLKeyword.input_tensors],
subgraphs[0][YAMLKeyword.input_shapes],
subgraphs[0][YAMLKeyword.validation_inputs_data],
input_ranges=subgraphs[0][YAMLKeyword.input_ranges],
input_data_types=subgraphs[0][YAMLKeyword.input_data_types]
)
self.tuning_run(
abi=target_abi,
target_dir=build_tmp_binary_dir,
target_name=mace_run_name,
vlog_level=0,
embed_model_data=embed_model_data,
model_output_dir=model_output_dir,
input_nodes=subgraphs[0][YAMLKeyword.input_tensors],
output_nodes=subgraphs[0][YAMLKeyword.output_tensors],
input_shapes=subgraphs[0][YAMLKeyword.input_shapes],
output_shapes=subgraphs[0][YAMLKeyword.output_shapes],
mace_model_dir=mace_model_dir,
model_tag=model_name,
device_type=DeviceType.GPU,
running_round=0,
restart_round=1,
limit_opencl_kernel_time=model_config[
YAMLKeyword.limit_opencl_kernel_time],
tuning=True,
out_of_range_check=False,
model_graph_format=model_graph_format,
opencl_binary_file='',
opencl_parameter_file='',
libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH,
link_dynamic=link_dynamic,
)
# pull opencl library
self.pull(self.interior_dir, CL_COMPILED_BINARY_FILE_NAME,
'{}/{}'.format(model_output_dir,
BUILD_TMP_OPENCL_BIN_DIR))
# pull opencl parameter
self.pull_from_data_dir(CL_TUNED_PARAMETER_FILE_NAME,
'{}/{}'.format(model_output_dir,
BUILD_TMP_OPENCL_BIN_DIR))
six.print_('Tuning done! \n')
def run_specify_abi(self, flags, configs, target_abi):
if target_abi not in self.target_abis:
six.print_('There is no device with soc: %s abi: %s' %
(self.target_socs, target_abi))
return
library_name = configs[YAMLKeyword.library_name]
mace_lib_type = flags.mace_lib_type
embed_model_data = \
configs[YAMLKeyword.model_data_format] == ModelFormat.code
build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi)
# get target name for run
if flags.example:
if mace_lib_type == MACELibType.static:
target_name = EXAMPLE_STATIC_NAME
else:
target_name = EXAMPLE_DYNAMIC_NAME
else:
if mace_lib_type == MACELibType.static:
target_name = MACE_RUN_STATIC_NAME
else:
target_name = MACE_RUN_DYNAMIC_NAME
link_dynamic = mace_lib_type == MACELibType.dynamic
model_output_dirs = []
for model_name in configs[YAMLKeyword.models]:
check_model_converted(library_name, model_name,
configs[YAMLKeyword.model_graph_format],
configs[YAMLKeyword.model_data_format],
target_abi)
if target_abi != ABIType.host:
self.clear_data_dir()
MaceLogger.header(
StringFormatter.block(
'Run model {} on {}'.format(model_name, self.device_name)))
model_config = configs[YAMLKeyword.models][model_name]
model_runtime = model_config[YAMLKeyword.runtime]
subgraphs = model_config[YAMLKeyword.subgraphs]
if not configs[YAMLKeyword.target_socs] \
or target_abi == ABIType.host:
model_output_base_dir, model_output_dir, mace_model_dir = \
get_build_model_dirs(
library_name, model_name, target_abi, self,
model_config[YAMLKeyword.model_file_path])
else:
model_output_base_dir, model_output_dir, mace_model_dir = \
get_build_model_dirs(
library_name, model_name, target_abi, self,
model_config[YAMLKeyword.model_file_path])
# clear temp model output dir
if os.path.exists(model_output_dir):
sh.rm('-rf', model_output_dir)
os.makedirs(model_output_dir)
is_tuned = False
model_opencl_output_bin_path = ''
model_opencl_parameter_path = ''
if not flags.address_sanitizer \
and not flags.example \
and target_abi != ABIType.host \
and configs[YAMLKeyword.target_socs] \
and self.target_socs \
and model_runtime in [RuntimeType.gpu,
RuntimeType.cpu_gpu] \
and not flags.disable_tuning:
self.tuning(library_name, model_name, model_config,
configs[YAMLKeyword.model_graph_format],
configs[YAMLKeyword.model_data_format],
target_abi, mace_lib_type)
model_output_dirs.append(model_output_dir)
model_opencl_output_bin_path = \
'{}/{}/{}'.format(model_output_dir,
BUILD_TMP_OPENCL_BIN_DIR,
CL_COMPILED_BINARY_FILE_NAME)
model_opencl_parameter_path = \
'{}/{}/{}'.format(model_output_dir,
BUILD_TMP_OPENCL_BIN_DIR,
CL_TUNED_PARAMETER_FILE_NAME)
self.clear_data_dir()
is_tuned = True
elif target_abi != ABIType.host and self.target_socs:
model_opencl_output_bin_path = get_opencl_binary_output_path(
library_name, target_abi, self
)
model_opencl_parameter_path = get_opencl_parameter_output_path(
library_name, target_abi, self
)
sh_commands.gen_random_input(
model_output_dir,
subgraphs[0][YAMLKeyword.input_tensors],
subgraphs[0][YAMLKeyword.input_shapes],
subgraphs[0][YAMLKeyword.validation_inputs_data],
input_ranges=subgraphs[0][YAMLKeyword.input_ranges],
input_data_types=subgraphs[0][YAMLKeyword.input_data_types]
)
runtime_list = []
if target_abi == ABIType.host:
runtime_list.append(RuntimeType.cpu)
elif model_runtime == RuntimeType.cpu_gpu:
runtime_list.extend([RuntimeType.cpu, RuntimeType.gpu])
else:
runtime_list.append(model_runtime)
for runtime in runtime_list:
device_type = parse_device_type(runtime)
# run for specified soc
run_output = self.tuning_run(
abi=target_abi,
target_dir=build_tmp_binary_dir,
target_name=target_name,
vlog_level=flags.vlog_level,
embed_model_data=embed_model_data,
model_output_dir=model_output_dir,
input_nodes=subgraphs[0][YAMLKeyword.input_tensors],
output_nodes=subgraphs[0][YAMLKeyword.output_tensors],
input_shapes=subgraphs[0][YAMLKeyword.input_shapes],
output_shapes=subgraphs[0][YAMLKeyword.output_shapes],
mace_model_dir=mace_model_dir,
model_tag=model_name,
device_type=device_type,
running_round=flags.round,
restart_round=flags.restart_round,
limit_opencl_kernel_time=model_config[
YAMLKeyword.limit_opencl_kernel_time],
tuning=False,
out_of_range_check=flags.gpu_out_of_range_check,
model_graph_format=configs[YAMLKeyword.model_graph_format],
omp_num_threads=flags.omp_num_threads,
cpu_affinity_policy=flags.cpu_affinity_policy,
gpu_perf_hint=flags.gpu_perf_hint,
gpu_priority_hint=flags.gpu_priority_hint,
runtime_failure_ratio=flags.runtime_failure_ratio,
address_sanitizer=flags.address_sanitizer,
opencl_binary_file=model_opencl_output_bin_path,
opencl_parameter_file=model_opencl_parameter_path,
libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH,
link_dynamic=link_dynamic
)
if flags.validate:
model_file_path, weight_file_path = get_model_files(
model_config[YAMLKeyword.model_file_path],
model_config[YAMLKeyword.model_sha256_checksum],
BUILD_DOWNLOADS_DIR,
model_config[YAMLKeyword.weight_file_path],
model_config[YAMLKeyword.weight_sha256_checksum]
)
validate_type = device_type
if model_config[YAMLKeyword.quantize] == 1:
validate_type = device_type + '_QUANTIZE'
sh_commands.validate_model(
abi=target_abi,
device=self,
model_file_path=model_file_path,
weight_file_path=weight_file_path,
platform=model_config[YAMLKeyword.platform],
device_type=device_type,
input_nodes=subgraphs[0][YAMLKeyword.input_tensors],
output_nodes=subgraphs[0][YAMLKeyword.output_tensors],
input_shapes=subgraphs[0][YAMLKeyword.input_shapes],
output_shapes=subgraphs[0][YAMLKeyword.output_shapes],
model_output_dir=model_output_dir,
input_data_types=subgraphs[0][
YAMLKeyword.input_data_types],
caffe_env=flags.caffe_env,
validation_threshold=subgraphs[0][
YAMLKeyword.validation_threshold][validate_type]
)
if flags.report and flags.round > 0:
tuned = is_tuned and device_type == DeviceType.GPU
self.report_run_statistics(
target_abi=target_abi,
model_name=model_name,
device_type=device_type,
output_dir=flags.report_dir,
tuned=tuned
)
if model_output_dirs:
opencl_output_bin_path = get_opencl_binary_output_path(
library_name, target_abi, self
)
opencl_parameter_bin_path = get_opencl_parameter_output_path(
library_name, target_abi, self
)
# clear opencl output dir
if os.path.exists(opencl_output_bin_path):
sh.rm('-rf', opencl_output_bin_path)
if os.path.exists(opencl_parameter_bin_path):
sh.rm('-rf', opencl_parameter_bin_path)
# merge all model's opencl binaries together
sh_commands.merge_opencl_binaries(
model_output_dirs, CL_COMPILED_BINARY_FILE_NAME,
opencl_output_bin_path
)
# merge all model's opencl parameter together
sh_commands.merge_opencl_parameters(
model_output_dirs, CL_TUNED_PARAMETER_FILE_NAME,
opencl_parameter_bin_path
)
def report_run_statistics(self,
target_abi,
model_name,
device_type,
output_dir,
tuned):
metrics = [0] * 3
for line in self.stdout.split('\n'):
line = line.strip()
parts = line.split()
if len(parts) == 4 and parts[0].startswith('time'):
metrics[0] = str(float(parts[1]))
metrics[1] = str(float(parts[2]))
metrics[2] = str(float(parts[3]))
break
report_filename = output_dir + '/report.csv'
if not os.path.exists(report_filename):
with open(report_filename, 'w') as f:
f.write('model_name,device_name,soc,abi,runtime,'
'init(ms),warmup(ms),run_avg(ms),tuned\n')
data_str = '{model_name},{device_name},{soc},{abi},{device_type},' \
'{init},{warmup},{run_avg},{tuned}\n'.format(
model_name=model_name,
device_name=self.device_name,
soc=self.target_socs,
abi=target_abi,
device_type=device_type,
init=metrics[0],
warmup=metrics[1],
run_avg=metrics[2],
tuned=tuned)
with open(report_filename, 'a') as f:
f.write(data_str)
def benchmark_model(self,
abi,
benchmark_binary_dir,
benchmark_binary_name,
vlog_level,
embed_model_data,
model_output_dir,
mace_model_dir,
input_nodes,
output_nodes,
input_shapes,
output_shapes,
model_tag,
device_type,
model_graph_format,
opencl_binary_file,
opencl_parameter_file,
libmace_dynamic_library_path,
omp_num_threads=-1,
cpu_affinity_policy=1,
gpu_perf_hint=3,
gpu_priority_hint=3,
input_file_name='model_input',
link_dynamic=False):
six.print_('* Benchmark for %s' % model_tag)
mace_model_path = ''
if model_graph_format == ModelFormat.file:
mace_model_path = '%s/%s.pb' % (mace_model_dir, model_tag)
if abi == ABIType.host:
libmace_dynamic_lib_dir_path = \
os.path.dirname(libmace_dynamic_library_path)
p = subprocess.Popen(
[
'env',
'LD_LIBRARY_PATH=%s' % libmace_dynamic_lib_dir_path,
'MACE_CPP_MIN_VLOG_LEVEL=%s' % vlog_level,
'%s/%s' % (benchmark_binary_dir, benchmark_binary_name),
'--model_name=%s' % model_tag,
'--input_node=%s' % ','.join(input_nodes),
'--output_node=%s' % ','.join(output_nodes),
'--input_shape=%s' % ':'.join(input_shapes),
'--output_shapes=%s' % ':'.join(output_shapes),
'--input_file=%s/%s' % (model_output_dir, input_file_name),
'--model_data_file=%s/%s.data' % (mace_model_dir,
model_tag),
'--device=%s' % device_type,
'--omp_num_threads=%s' % omp_num_threads,
'--cpu_addinity_policy=%s' % cpu_affinity_policy,
'--gpu_perf_hint=%s' % gpu_perf_hint,
'--gpu_priority_hint=%s' % gpu_priority_hint,
'--model_file=%s' % mace_model_path
])
p.wait()
elif self.system in [SystemType.android, SystemType.arm_linux]:
self.exec_command('mkdir -p %s' % self.data_dir)
internal_storage_dir = self.create_internal_storage_dir()
for input_name in input_nodes:
formatted_name = formatted_file_name(input_file_name,
input_name)
self.push('%s/%s' % (model_output_dir, formatted_name),
self.data_dir)
if not embed_model_data:
self.push('%s/%s.data' % (mace_model_dir, model_tag),
self.data_dir)
if device_type == common.DeviceType.GPU:
if os.path.exists(opencl_binary_file):
self.push(opencl_binary_file, self.data_dir)
if os.path.exists(opencl_parameter_file):
self.push(opencl_parameter_file, self.data_dir)
mace_model_device_path = ''
if model_graph_format == ModelFormat.file:
mace_model_device_path = '%s/%s.pb' % \
(self.data_dir, model_tag)
self.push(mace_model_path, mace_model_device_path)
if link_dynamic:
self.push(libmace_dynamic_library_path, self.data_dir)
if self.system == SystemType.android:
sh_commands.push_depended_so_libs(
libmace_dynamic_library_path, abi, self.data_dir,
self.address)
self.rm('%s/%s' % (self.data_dir, benchmark_binary_name))
self.push('%s/%s' % (benchmark_binary_dir, benchmark_binary_name),
self.data_dir)
cmd = [
'LD_LIBRARY_PATH=%s' % self.data_dir,
'MACE_CPP_MIN_VLOG_LEVEL=%s' % vlog_level,
'MACE_RUN_PARAMETER_PATH=%s/mace_run.config' % self.data_dir,
'MACE_INTERNAL_STORAGE_PATH=%s' % internal_storage_dir,
'MACE_OPENCL_PROFILING=1',
'%s/%s' % (self.data_dir, benchmark_binary_name),
'--model_name=%s' % model_tag,
'--input_node=%s' % ','.join(input_nodes),
'--output_node=%s' % ','.join(output_nodes),
'--input_shape=%s' % ':'.join(input_shapes),
'--output_shape=%s' % ':'.join(output_shapes),
'--input_file=%s/%s' % (self.data_dir, input_file_name),
'--model_data_file=%s/%s.data' % (self.data_dir, model_tag),
'--device=%s' % device_type,
'--omp_num_threads=%s' % omp_num_threads,
'--cpu_affinity_policy=%s' % cpu_affinity_policy,
'--gpu_perf_hint=%s' % gpu_perf_hint,
'--gpu_priority_hint=%s' % gpu_priority_hint,
'--model_file=%s' % mace_model_device_path,
'--opencl_binary_file=%s/%s' %
(self.data_dir, os.path.basename(opencl_binary_file)),
'--opencl_parameter_file=%s/%s' %
(self.data_dir, os.path.basename(opencl_parameter_file))
]
cmd = ' '.join(cmd)
cmd_file_name = '%s-%s-%s' % \
('cmd_file', model_tag, str(time.time()))
cmd_file_path = '%s/%s' % (self.data_dir, cmd_file_name)
tmp_cmd_file = '%s/%s' % ('/tmp', cmd_file_name)
with open(tmp_cmd_file, 'w') as f:
f.write(cmd)
self.push(tmp_cmd_file, cmd_file_path)
os.remove(tmp_cmd_file)
if self.system == SystemType.android:
sh.adb('-s', self.address, 'shell', 'sh', cmd_file_path,
_fg=True)
elif self.system == SystemType.arm_linux:
sh.ssh('%s@%s' % (self.username, self.address),
'sh', cmd_file_path, _fg=True)
self.rm(cmd_file_path)
six.print_('Benchmark done! \n')
def bm_specific_target(self, flags, configs, target_abi):
library_name = configs[YAMLKeyword.library_name]
embed_model_data = \
configs[YAMLKeyword.model_data_format] == ModelFormat.code
opencl_output_bin_path = ''
opencl_parameter_path = ''
link_dynamic = flags.mace_lib_type == MACELibType.dynamic
if link_dynamic:
bm_model_binary_name = BM_MODEL_DYNAMIC_NAME
else:
bm_model_binary_name = BM_MODEL_STATIC_NAME
build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi)
if configs[YAMLKeyword.target_socs] and target_abi != ABIType.host:
opencl_output_bin_path = get_opencl_binary_output_path(
library_name, target_abi, self
)
opencl_parameter_path = get_opencl_parameter_output_path(
library_name, target_abi, self
)
for model_name in configs[YAMLKeyword.models]:
check_model_converted(library_name,
model_name,
configs[YAMLKeyword.model_graph_format],
configs[YAMLKeyword.model_data_format],
target_abi)
MaceLogger.header(
StringFormatter.block(
'Benchmark model %s on %s' % (model_name,
self.device_name)))
model_config = configs[YAMLKeyword.models][model_name]
model_runtime = model_config[YAMLKeyword.runtime]
subgraphs = model_config[YAMLKeyword.subgraphs]
model_output_base_dir, model_output_dir, mace_model_dir = \
get_build_model_dirs(library_name, model_name,
target_abi, self,
model_config[YAMLKeyword.model_file_path])
if os.path.exists(model_output_dir):
sh.rm('-rf', model_output_dir)
os.makedirs(model_output_dir)
if target_abi != ABIType.host:
self.clear_data_dir()
sh_commands.gen_random_input(
model_output_dir,
subgraphs[0][YAMLKeyword.input_tensors],
subgraphs[0][YAMLKeyword.input_shapes],
subgraphs[0][YAMLKeyword.validation_inputs_data],
input_ranges=subgraphs[0][YAMLKeyword.input_ranges],
input_data_types=subgraphs[0][YAMLKeyword.input_data_types]
)
runtime_list = []
if target_abi == ABIType.host:
runtime_list.append(RuntimeType.cpu)
elif model_runtime == RuntimeType.cpu_gpu:
runtime_list.extend([RuntimeType.cpu, RuntimeType.cpu_gpu])
else:
runtime_list.append(model_runtime)
for runtime in runtime_list:
device_type = parse_device_type(runtime)
self.benchmark_model(
abi=target_abi,
benchmark_binary_dir=build_tmp_binary_dir,
benchmark_binary_name=bm_model_binary_name,
vlog_level=0,
embed_model_data=embed_model_data,
model_output_dir=model_output_dir,
input_nodes=subgraphs[0][YAMLKeyword.input_tensors],
output_nodes=subgraphs[0][YAMLKeyword.output_tensors],
input_shapes=subgraphs[0][YAMLKeyword.input_shapes],
output_shapes=subgraphs[0][YAMLKeyword.output_shapes],
mace_model_dir=mace_model_dir,
model_tag=model_name,
device_type=device_type,
model_graph_format=configs[YAMLKeyword.model_graph_format],
omp_num_threads=flags.omp_num_threads,
cpu_affinity_policy=flags.cpu_affinity_policy,
gpu_perf_hint=flags.gpu_perf_hint,
gpu_priority_hint=flags.gpu_priority_hint,
opencl_binary_file=opencl_output_bin_path,
opencl_parameter_file=opencl_parameter_path,
libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH,
link_dynamic=link_dynamic
)
def run(self,
abi,
host_bin_path,
bin_name,
args='',
opencl_profiling=True,
vlog_level=0,
out_of_range_check=True,
address_sanitizer=False,
simpleperf=False):
host_bin_full_path = '%s/%s' % (host_bin_path, bin_name)
device_bin_full_path = '%s/%s' % (self.data_dir, bin_name)
print(
'================================================================'
)
print('Trying to lock device %s' % self.address)
with self.lock():
print('Run on device: %s, %s, %s' %
(self.address, self.target_socs, self.device_name))
self.rm(self.data_dir)
self.exec_command('mkdir -p %s' % self.data_dir)
self.push(host_bin_full_path, device_bin_full_path)
ld_preload = ''
if address_sanitizer:
self.push(sh_commands.find_asan_rt_library(abi),
self.data_dir)
ld_preload = 'LD_PRELOAD=%s/%s' % \
(self.data_dir,
sh_commands.asan_rt_library_names(abi))
opencl_profiling = 1 if opencl_profiling else 0
out_of_range_check = 1 if out_of_range_check else 0
print('Run %s' % device_bin_full_path)
stdout_buf = []
process_output = sh_commands.make_output_processor(stdout_buf)
if simpleperf and self.system == SystemType.android:
self.push(sh_commands.find_simpleperf_library(abi),
self.data_dir)
simpleperf_cmd = '%s/simpleperf' % self.data_dir
exec_cmd = [
ld_preload,
'MACE_OUT_OF_RANGE_CHECK=%s' % out_of_range_check,
'MACE_OPENCL_PROFILING=%d' % opencl_profiling,
'MACE_CPP_MIN_VLOG_LEVEL=%d' % vlog_level,
simpleperf_cmd,
'stat',
'--group',
'raw-l1-dcache,raw-l1-dcache-refill',
'--group',
'raw-l2-dcache,raw-l2-dcache-refill',
'--group',
'raw-l1-dtlb,raw-l1-dtlb-refill',
'--group',
'raw-l2-dtlb,raw-l2-dtlb-refill',
device_bin_full_path,
args,
]
else:
exec_cmd = [
ld_preload,
'MACE_OUT_OF_RANGE_CHECK=%d' % out_of_range_check,
'MACE_OPENCL_PROFILNG=%d' % opencl_profiling,
'MACE_CPP_MIN_VLOG_LEVEL=%d' % vlog_level,
device_bin_full_path,
args
]
exec_cmd = ' '.join(exec_cmd)
self.exec_command(exec_cmd, _tty_in=True,
_out=process_output, _err_to_out=True)
return ''.join(stdout_buf)
class DeviceManager:
@classmethod
def list_adb_device(cls):
adb_list = sh.adb('devices').stdout.decode('utf-8'). \
strip().split('\n')[1:]
adb_list = [tuple(pair.split('\t')) for pair in adb_list]
devices = []
for adb in adb_list:
prop = sh_commands.adb_getprop_by_serialno(adb[0])
android = {
YAMLKeyword.device_name:
prop['ro.product.model'].replace(' ', ''),
YAMLKeyword.target_abis:
prop['ro.product.cpu.abilist'].split(','),
YAMLKeyword.target_socs: prop['ro.board.platform'],
YAMLKeyword.system: SystemType.android,
YAMLKeyword.address: adb[0],
YAMLKeyword.username: '',
}
devices.append(android)
return devices
@classmethod
def list_ssh_device(cls, yml):
with open(yml) as f:
devices = yaml.load(f.read())
devices = devices['devices']
device_list = []
for name, dev in six.iteritems(devices):
dev[YAMLKeyword.device_name] = \
dev[YAMLKeyword.models].replace(' ', '')
dev[YAMLKeyword.system] = SystemType.arm_linux
device_list.append(dev)
return device_list
@classmethod
def list_devices(cls, yml):
devices_list = []
devices_list.extend(cls.list_adb_device())
if not yml:
if os.path.exists('devices.yml'):
devices_list.extend(cls.list_ssh_device('devices.yml'))
else:
if os.path.exists(yml):
devices_list.extend(cls.list_ssh_device(yml))
else:
MaceLogger.error(ModuleName.RUN,
'no ARM linux device config file found')
host = {
YAMLKeyword.device_name: SystemType.host,
YAMLKeyword.target_abis: [ABIType.host],
YAMLKeyword.target_socs: '',
YAMLKeyword.system: SystemType.host,
YAMLKeyword.address: None,
}
devices_list.append(host)
return devices_list
if __name__ == '__main__':
pass
import argparse
import os
import sys
import six
import tensorflow as tf
# TODO(liyin): use dataset api and estimator with distributed strategy
......@@ -70,7 +73,7 @@ def images_to_tensors(input_files, image_shape, mean_values=None):
def main(unused_args):
if not os.path.exists(FLAGS.input):
print ("input does not exist: %s" % FLAGS.input)
print("input does not exist: %s" % FLAGS.input)
sys.exit(-1)
input_files = []
......
import argparse
import os
import sys
import six
import numpy as np
import tensorflow as tf
......@@ -53,7 +56,7 @@ def tensors_to_images(input_files, image_shape):
def main(unused_args):
if not os.path.exists(FLAGS.input):
print ("input does not exist: %s" % FLAGS.input)
print("input does not exist: %s" % FLAGS.input)
sys.exit(-1)
input_files = []
......
......@@ -20,16 +20,14 @@ import os
import re
import sh
import struct
import subprocess
import sys
import time
import urllib
import platform
from enum import Enum
import six
import common
from common import abi_to_internal
sys.path.insert(0, "mace/python/tools")
try:
......@@ -89,11 +87,6 @@ class BuildType(object):
code = 'code'
class ModelFormat(object):
file = 'file'
code = 'code'
def stdout_success(stdout):
stdout_lines = stdout.split("\n")
for line in stdout_lines:
......@@ -181,97 +174,14 @@ def adb_get_all_socs():
def adb_push(src_path, dst_path, serialno):
six.print_("Push %s to %s" % (src_path, dst_path))
sh.adb("-s", serialno, "push", src_path, dst_path)
def adb_pull(src_path, dst_path, serialno):
six.print_("Pull %s to %s" % (src_path, dst_path))
try:
sh.adb("-s", serialno, "pull", src_path, dst_path)
except Exception as e:
six.print_("Error msg: %s" % e.stderr)
def adb_run(abi,
serialno,
host_bin_path,
bin_name,
args="",
opencl_profiling=True,
vlog_level=0,
device_bin_path="/data/local/tmp/mace",
out_of_range_check=True,
address_sanitizer=False,
simpleperf=False):
host_bin_full_path = "%s/%s" % (host_bin_path, bin_name)
device_bin_full_path = "%s/%s" % (device_bin_path, bin_name)
props = adb_getprop_by_serialno(serialno)
six.print_(
"====================================================================="
)
six.print_("Trying to lock device %s" % serialno)
with device_lock(serialno):
six.print_("Run on device: %s, %s, %s" %
(serialno, props["ro.board.platform"],
props["ro.product.model"]))
sh.adb("-s", serialno, "shell", "rm -rf %s" % device_bin_path)
sh.adb("-s", serialno, "shell", "mkdir -p %s" % device_bin_path)
adb_push(host_bin_full_path, device_bin_full_path, serialno)
ld_preload = ""
if address_sanitizer:
adb_push(find_asan_rt_library(abi), device_bin_path, serialno)
ld_preload = "LD_PRELOAD=%s/%s" % (device_bin_path,
asan_rt_library_names(abi)),
opencl_profiling = 1 if opencl_profiling else 0
out_of_range_check = 1 if out_of_range_check else 0
six.print_("Run %s" % device_bin_full_path)
stdout_buff = []
process_output = make_output_processor(stdout_buff)
if simpleperf:
adb_push(find_simpleperf_library(abi), device_bin_path, serialno)
simpleperf_cmd = "%s/simpleperf" % device_bin_path
sh.adb(
"-s",
serialno,
"shell",
ld_preload,
"MACE_OUT_OF_RANGE_CHECK=%d" % out_of_range_check,
"MACE_OPENCL_PROFILING=%d" % opencl_profiling,
"MACE_CPP_MIN_VLOG_LEVEL=%d" % vlog_level,
simpleperf_cmd,
"stat",
"--group",
"raw-l1-dcache,raw-l1-dcache-refill",
"--group",
"raw-l2-dcache,raw-l2-dcache-refill",
"--group",
"raw-l1-dtlb,raw-l1-dtlb-refill",
"--group",
"raw-l2-dtlb,raw-l2-dtlb-refill",
device_bin_full_path,
args,
_tty_in=True,
_out=process_output,
_err_to_out=True)
else:
sh.adb(
"-s",
serialno,
"shell",
ld_preload,
"MACE_OUT_OF_RANGE_CHECK=%d" % out_of_range_check,
"MACE_OPENCL_PROFILING=%d" % opencl_profiling,
"MACE_CPP_MIN_VLOG_LEVEL=%d" % vlog_level,
device_bin_full_path,
args,
_tty_in=True,
_out=process_output,
_err_to_out=True)
return "".join(stdout_buff)
six.print_("Error msg: %s" % e, file=sys.stderr)
################################
......@@ -293,7 +203,7 @@ def find_asan_rt_library(abi, asan_rt_path=''):
if len(candidates) == 0:
common.MaceLogger.error(
"Toolchain",
"Can't find AddressSanitizer runtime library in % s" %
"Can't find AddressSanitizer runtime library in %s" %
find_path)
elif len(candidates) > 1:
common.MaceLogger.info(
......@@ -338,6 +248,7 @@ def find_simpleperf_library(abi, simpleperf_path=''):
################################
def bazel_build(target,
abi="armeabi-v7a",
toolchain='android',
hexagon_mode=False,
enable_openmp=True,
enable_neon=True,
......@@ -361,8 +272,8 @@ def bazel_build(target,
"build",
target,
"--config",
"android",
"--cpu=%s" % abi,
toolchain,
"--cpu=%s" % abi_to_internal(abi),
"--define",
"neon=%s" % str(enable_neon).lower(),
"--define",
......@@ -434,15 +345,6 @@ def gen_mace_engine_factory_source(model_tags,
six.print_("Generate mace engine creator source done!\n")
def pull_file_from_device(serial_num, file_path, file_name, output_dir):
if not os.path.exists(output_dir):
sh.mkdir("-p", output_dir)
output_path = "%s/%s" % (output_dir, file_path)
if os.path.exists(output_path):
sh.rm('-rf', output_path)
adb_pull(file_path + '/' + file_name, output_dir, serial_num)
def merge_opencl_binaries(binaries_dirs,
cl_compiled_program_file_name,
output_file_path):
......@@ -691,233 +593,21 @@ def push_depended_so_libs(libmace_dynamic_library_path,
abi, phone_data_dir, serialno):
dep_so_libs = sh.bash(os.environ["ANDROID_NDK_HOME"] + "/ndk-depends",
libmace_dynamic_library_path)
src_file = ""
for dep in split_stdout(dep_so_libs):
if dep == "libgnustl_shared.so":
adb_push(
"%s/sources/cxx-stl/gnu-libstdc++/4.9/libs/%s/libgnustl_shared.so" # noqa
% (os.environ["ANDROID_NDK_HOME"], abi),
phone_data_dir,
serialno)
src_file = "%s/sources/cxx-stl/gnu-libstdc++/4.9/libs/" \
"%s/libgnustl_shared.so"\
% (os.environ["ANDROID_NDK_HOME"], abi)
elif dep == "libc++_shared.so":
adb_push(
"%s/sources/cxx-stl/llvm-libc++/libs/%s/libc++_shared.so" # noqa
% (os.environ["ANDROID_NDK_HOME"], abi),
phone_data_dir,
serialno)
def tuning_run(abi,
serialno,
target_dir,
target_name,
vlog_level,
embed_model_data,
model_output_dir,
input_nodes,
output_nodes,
input_shapes,
output_shapes,
mace_model_dir,
model_tag,
device_type,
running_round,
restart_round,
limit_opencl_kernel_time,
tuning,
out_of_range_check,
phone_data_dir,
model_graph_format,
opencl_binary_file,
opencl_parameter_file,
libmace_dynamic_library_path,
omp_num_threads=-1,
cpu_affinity_policy=1,
gpu_perf_hint=3,
gpu_priority_hint=3,
input_file_name="model_input",
output_file_name="model_out",
input_dir="",
output_dir="",
runtime_failure_ratio=0.0,
address_sanitizer=False,
link_dynamic=False,
quantize_stat=False):
six.print_("* Run '%s' with round=%s, restart_round=%s, tuning=%s, "
"out_of_range_check=%s, omp_num_threads=%s, "
"cpu_affinity_policy=%s, gpu_perf_hint=%s, "
"gpu_priority_hint=%s" %
(model_tag, running_round, restart_round, str(tuning),
str(out_of_range_check), omp_num_threads, cpu_affinity_policy,
gpu_perf_hint, gpu_priority_hint))
sys.stdout.flush()
mace_model_path = ""
if model_graph_format == ModelFormat.file:
mace_model_path = "%s/%s.pb" % (mace_model_dir, model_tag)
if abi == "host":
libmace_dynamic_lib_path = \
os.path.dirname(libmace_dynamic_library_path)
cmd = [
"env",
"LD_LIBRARY_PATH=%s" % libmace_dynamic_lib_path,
"MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
"MACE_RUNTIME_FAILURE_RATIO=%f" % runtime_failure_ratio,
]
if quantize_stat:
cmd.append("MACE_LOG_TENSOR_RANGE=1")
cmd.extend([
"%s/%s" % (target_dir, target_name),
"--model_name=%s" % model_tag,
"--input_node=%s" % ",".join(input_nodes),
"--output_node=%s" % ",".join(output_nodes),
"--input_shape=%s" % ":".join(input_shapes),
"--output_shape=%s" % ":".join(output_shapes),
"--input_file=%s/%s" % (model_output_dir, input_file_name),
"--output_file=%s/%s" % (model_output_dir, output_file_name),
"--input_dir=%s" % input_dir,
"--output_dir=%s" % output_dir,
"--model_data_file=%s/%s.data" % (mace_model_dir, model_tag),
"--device=%s" % device_type,
"--round=%s" % running_round,
"--restart_round=%s" % restart_round,
"--omp_num_threads=%s" % omp_num_threads,
"--cpu_affinity_policy=%s" % cpu_affinity_policy,
"--gpu_perf_hint=%s" % gpu_perf_hint,
"--gpu_priority_hint=%s" % gpu_priority_hint,
"--model_file=%s" % mace_model_path,
])
p = subprocess.Popen(
cmd,
stderr=subprocess.PIPE,
stdout=subprocess.PIPE)
out, err = p.communicate()
stdout = err + out
six.print_(stdout)
six.print_("Running finished!\n")
else:
sh.adb("-s", serialno, "shell", "mkdir", "-p", phone_data_dir)
internal_storage_dir = create_internal_storage_dir(
serialno, phone_data_dir)
for input_name in input_nodes:
formatted_name = common.formatted_file_name(input_file_name,
input_name)
adb_push("%s/%s" % (model_output_dir, formatted_name),
phone_data_dir, serialno)
if address_sanitizer:
adb_push(find_asan_rt_library(abi), phone_data_dir, serialno)
if not embed_model_data:
adb_push("%s/%s.data" % (mace_model_dir, model_tag),
phone_data_dir, serialno)
if device_type == common.DeviceType.GPU:
if os.path.exists(opencl_binary_file):
adb_push(opencl_binary_file, phone_data_dir, serialno)
if os.path.exists(opencl_parameter_file):
adb_push(opencl_parameter_file, phone_data_dir, serialno)
adb_push("third_party/nnlib/libhexagon_controller.so",
phone_data_dir, serialno)
mace_model_phone_path = ""
if model_graph_format == ModelFormat.file:
mace_model_phone_path = "%s/%s.pb" % (phone_data_dir, model_tag)
adb_push(mace_model_path,
mace_model_phone_path,
serialno)
if link_dynamic:
adb_push(libmace_dynamic_library_path, phone_data_dir,
serialno)
push_depended_so_libs(libmace_dynamic_library_path, abi,
phone_data_dir, serialno)
adb_push("%s/%s" % (target_dir, target_name), phone_data_dir,
serialno)
stdout_buff = []
process_output = make_output_processor(stdout_buff)
adb_cmd = [
"LD_LIBRARY_PATH=%s" % phone_data_dir,
"MACE_TUNING=%s" % int(tuning),
"MACE_OUT_OF_RANGE_CHECK=%s" % int(out_of_range_check),
"MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
"MACE_RUN_PARAMETER_PATH=%s/mace_run.config" % phone_data_dir,
"MACE_INTERNAL_STORAGE_PATH=%s" % internal_storage_dir,
"MACE_LIMIT_OPENCL_KERNEL_TIME=%s" % limit_opencl_kernel_time,
"MACE_RUNTIME_FAILURE_RATIO=%f" % runtime_failure_ratio,
]
if quantize_stat:
adb_cmd.append("MACE_LOG_TENSOR_RANGE=1")
if address_sanitizer:
adb_cmd.extend([
"LD_PRELOAD=%s/%s" % (phone_data_dir,
asan_rt_library_names(abi))
])
adb_cmd.extend([
"%s/%s" % (phone_data_dir, target_name),
"--model_name=%s" % model_tag,
"--input_node=%s" % ",".join(input_nodes),
"--output_node=%s" % ",".join(output_nodes),
"--input_shape=%s" % ":".join(input_shapes),
"--output_shape=%s" % ":".join(output_shapes),
"--input_file=%s/%s" % (phone_data_dir, input_file_name),
"--output_file=%s/%s" % (phone_data_dir, output_file_name),
"--input_dir=%s" % input_dir,
"--output_dir=%s" % output_dir,
"--model_data_file=%s/%s.data" % (phone_data_dir, model_tag),
"--device=%s" % device_type,
"--round=%s" % running_round,
"--restart_round=%s" % restart_round,
"--omp_num_threads=%s" % omp_num_threads,
"--cpu_affinity_policy=%s" % cpu_affinity_policy,
"--gpu_perf_hint=%s" % gpu_perf_hint,
"--gpu_priority_hint=%s" % gpu_priority_hint,
"--model_file=%s" % mace_model_phone_path,
"--opencl_binary_file=%s/%s" %
(phone_data_dir, os.path.basename(opencl_binary_file)),
"--opencl_parameter_file=%s/%s" %
(phone_data_dir, os.path.basename(opencl_parameter_file)),
])
adb_cmd = ' '.join(adb_cmd)
cmd_file_name = "%s-%s-%s" % ('cmd_file', model_tag, str(time.time()))
adb_cmd_file = "%s/%s" % (phone_data_dir, cmd_file_name)
tmp_cmd_file = "%s/%s" % ('/tmp', cmd_file_name)
with open(tmp_cmd_file, 'w') as cmd_file:
cmd_file.write(adb_cmd)
adb_push(tmp_cmd_file, adb_cmd_file, serialno)
os.remove(tmp_cmd_file)
sh.adb(
"-s",
serialno,
"shell",
"sh",
adb_cmd_file,
_tty_in=True,
_out=process_output,
_err_to_out=True)
stdout = "".join(stdout_buff)
if not stdout_success(stdout):
common.MaceLogger.error("Mace Run", "Mace run failed.")
sh.adb(
"-s",
serialno,
"shell",
"rm",
adb_cmd_file,
_fg=True)
six.print_("Running finished!\n")
sys.stdout.flush()
return stdout
src_file = "%s/sources/cxx-stl/llvm-libc++/libs/" \
"%s/libc++_shared.so" % (os.environ["ANDROID_NDK_HOME"], abi)
print("push %s to %s" % (src_file, phone_data_dir))
adb_push(src_file, phone_data_dir, serialno)
def validate_model(abi,
serialno,
device,
model_file_path,
weight_file_path,
platform,
......@@ -927,7 +617,6 @@ def validate_model(abi,
input_shapes,
output_shapes,
model_output_dir,
phone_data_dir,
input_data_types,
caffe_env,
input_file_name="model_input",
......@@ -941,8 +630,7 @@ def validate_model(abi,
if os.path.exists("%s/%s" % (model_output_dir,
formatted_name)):
sh.rm("-rf", "%s/%s" % (model_output_dir, formatted_name))
adb_pull("%s/%s" % (phone_data_dir, formatted_name),
model_output_dir, serialno)
device.pull_from_data_dir(formatted_name, model_output_dir)
if platform == "tensorflow":
validate(platform, model_file_path, "",
......@@ -956,11 +644,10 @@ def validate_model(abi,
container_name = "mace_caffe_validator"
if caffe_env == common.CaffeEnvType.LOCAL:
import imp
try:
imp.find_module('caffe')
import caffe
except ImportError:
logger.error('There is no caffe python module.')
logging.error('There is no caffe python module.')
validate(platform, model_file_path, weight_file_path,
"%s/%s" % (model_output_dir, input_file_name),
"%s/%s" % (model_output_dir, output_file_name),
......@@ -1075,149 +762,6 @@ def packaging_lib(libmace_output_dir, project_name):
################################
# benchmark
################################
def benchmark_model(abi,
serialno,
benchmark_binary_dir,
benchmark_binary_name,
vlog_level,
embed_model_data,
model_output_dir,
mace_model_dir,
input_nodes,
output_nodes,
input_shapes,
output_shapes,
model_tag,
device_type,
phone_data_dir,
model_graph_format,
opencl_binary_file,
opencl_parameter_file,
libmace_dynamic_library_path,
omp_num_threads=-1,
cpu_affinity_policy=1,
gpu_perf_hint=3,
gpu_priority_hint=3,
input_file_name="model_input",
link_dynamic=False):
six.print_("* Benchmark for %s" % model_tag)
mace_model_path = ""
if model_graph_format == ModelFormat.file:
mace_model_path = "%s/%s.pb" % (mace_model_dir, model_tag)
if abi == "host":
libmace_dynamic_lib_dir_path = \
os.path.dirname(libmace_dynamic_library_path)
p = subprocess.Popen(
[
"env",
"LD_LIBRARY_PATH=%s" % libmace_dynamic_lib_dir_path,
"MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
"%s/%s" % (benchmark_binary_dir, benchmark_binary_name),
"--model_name=%s" % model_tag,
"--input_node=%s" % ",".join(input_nodes),
"--output_node=%s" % ",".join(output_nodes),
"--input_shape=%s" % ":".join(input_shapes),
"--output_shape=%s" % ":".join(output_shapes),
"--input_file=%s/%s" % (model_output_dir, input_file_name),
"--model_data_file=%s/%s.data" % (mace_model_dir, model_tag),
"--device=%s" % device_type,
"--omp_num_threads=%s" % omp_num_threads,
"--cpu_affinity_policy=%s" % cpu_affinity_policy,
"--gpu_perf_hint=%s" % gpu_perf_hint,
"--gpu_priority_hint=%s" % gpu_priority_hint,
"--model_file=%s" % mace_model_path,
])
p.wait()
else:
sh.adb("-s", serialno, "shell", "mkdir", "-p", phone_data_dir)
internal_storage_dir = create_internal_storage_dir(
serialno, phone_data_dir)
for input_name in input_nodes:
formatted_name = common.formatted_file_name(input_file_name,
input_name)
adb_push("%s/%s" % (model_output_dir, formatted_name),
phone_data_dir, serialno)
if not embed_model_data:
adb_push("%s/%s.data" % (mace_model_dir, model_tag),
phone_data_dir, serialno)
if device_type == common.DeviceType.GPU:
if os.path.exists(opencl_binary_file):
adb_push(opencl_binary_file, phone_data_dir, serialno)
if os.path.exists(opencl_parameter_file):
adb_push(opencl_parameter_file, phone_data_dir, serialno)
mace_model_phone_path = ""
if model_graph_format == ModelFormat.file:
mace_model_phone_path = "%s/%s.pb" % (phone_data_dir, model_tag)
adb_push(mace_model_path,
mace_model_phone_path,
serialno)
if link_dynamic:
adb_push(libmace_dynamic_library_path, phone_data_dir,
serialno)
push_depended_so_lib(libmace_dynamic_library_path, abi,
phone_data_dir, serialno)
adb_push("%s/%s" % (benchmark_binary_dir, benchmark_binary_name),
phone_data_dir,
serialno)
adb_cmd = [
"LD_LIBRARY_PATH=%s" % phone_data_dir,
"MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
"MACE_RUN_PARAMETER_PATH=%s/mace_run.config" %
phone_data_dir,
"MACE_INTERNAL_STORAGE_PATH=%s" % internal_storage_dir,
"MACE_OPENCL_PROFILING=1",
"%s/%s" % (phone_data_dir, benchmark_binary_name),
"--model_name=%s" % model_tag,
"--input_node=%s" % ",".join(input_nodes),
"--output_node=%s" % ",".join(output_nodes),
"--input_shape=%s" % ":".join(input_shapes),
"--output_shape=%s" % ":".join(output_shapes),
"--input_file=%s/%s" % (phone_data_dir, input_file_name),
"--model_data_file=%s/%s.data" % (phone_data_dir, model_tag),
"--device=%s" % device_type,
"--omp_num_threads=%s" % omp_num_threads,
"--cpu_affinity_policy=%s" % cpu_affinity_policy,
"--gpu_perf_hint=%s" % gpu_perf_hint,
"--gpu_priority_hint=%s" % gpu_priority_hint,
"--model_file=%s" % mace_model_phone_path,
"--opencl_binary_file=%s/%s" %
(phone_data_dir, os.path.basename(opencl_binary_file)),
"--opencl_parameter_file=%s/%s" %
(phone_data_dir, os.path.basename(opencl_parameter_file)),
]
adb_cmd = ' '.join(adb_cmd)
cmd_file_name = "%s-%s-%s" % ('cmd_file', model_tag, str(time.time()))
adb_cmd_file = "%s/%s" % (phone_data_dir, cmd_file_name)
tmp_cmd_file = "%s/%s" % ('/tmp', cmd_file_name)
with open(tmp_cmd_file, 'w') as cmd_file:
cmd_file.write(adb_cmd)
adb_push(tmp_cmd_file, adb_cmd_file, serialno)
os.remove(tmp_cmd_file)
sh.adb(
"-s",
serialno,
"shell",
"sh",
adb_cmd_file,
_fg=True)
sh.adb(
"-s",
serialno,
"shell",
"rm",
adb_cmd_file,
_fg=True)
six.print_("Benchmark done!\n")
def build_run_throughput_test(abi,
serialno,
vlog_level,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册