提交 3cdf9973 编写于 作者: 叶剑武

Merge branch 'device_support' into 'master'

Device support

See merge request !898
...@@ -47,8 +47,13 @@ ops_test: ...@@ -47,8 +47,13 @@ ops_test:
stage: ops_test stage: ops_test
script: script:
- if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi - if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi
- python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS - >
- python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS --enable_neon=false if ping -c 1 v9.git.n.xiaomi.com 1>/dev/null 2>&1; then
GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git
DEVICE_CONF_FILE=generic-mobile-devices/devices.yml
fi
- python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64,armhf --target_socs=$TARGET_SOCS
- python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64,armhf --target_socs=$TARGET_SOCS --enable_neon=false
api_test: api_test:
stage: api_test stage: api_test
...@@ -68,14 +73,19 @@ extra_tests: ...@@ -68,14 +73,19 @@ extra_tests:
stage: extra_tests stage: extra_tests
script: script:
- if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi - if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi
- python tools/bazel_adb_run.py --target="//mace/utils:tuner_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS - >
if ping -c 1 v9.git.n.xiaomi.com 1>/dev/null 2>&1; then
GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git
DEVICE_CONF_FILE=generic-mobile-devices/devices.yml
fi
- python tools/bazel_adb_run.py --target="//mace/utils:tuner_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64 --target_socs=$TARGET_SOCS
platform_compatible_tests: platform_compatible_tests:
stage: platform_compatible_tests stage: platform_compatible_tests
script: script:
- bazel build mace/core:core --define openmp=true - bazel build mace/core:core --define openmp=true
- bazel build --config arm_linux --define openmp=true --define opencl=true --define neon=true //mace/libmace:libmace.so - bazel build --config arm_linux_gnueabihf --define openmp=true --define opencl=true --define neon=true //mace/libmace:libmace.so
- bazel build --config aarch64_linux --define openmp=true --define opencl=true --define neon=true //mace/libmace:libmace.so - bazel build --config aarch64_linux_gnu --define openmp=true --define opencl=true --define neon=true //mace/libmace:libmace.so
build_libraries: build_libraries:
stage: build_libraries stage: build_libraries
...@@ -87,6 +97,11 @@ ndk_versions_compatible_tests: ...@@ -87,6 +97,11 @@ ndk_versions_compatible_tests:
script: script:
- DEFAULT_NDK_PATH=$ANDROID_NDK_HOME - DEFAULT_NDK_PATH=$ANDROID_NDK_HOME
- prefix_path=${DEFAULT_NDK_PATH%android-ndk-*} - prefix_path=${DEFAULT_NDK_PATH%android-ndk-*}
- >
if ping -c 1 v9.git.n.xiaomi.com 1>/dev/null 2>&1; then
GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git
DEVICE_CONF_FILE=generic-mobile-devices/devices.yml
fi
- > - >
for ndk in android-ndk-r12b android-ndk-r15c android-ndk-r16 android-ndk-r17b; for ndk in android-ndk-r12b android-ndk-r15c android-ndk-r16 android-ndk-r17b;
do do
...@@ -96,8 +111,8 @@ ndk_versions_compatible_tests: ...@@ -96,8 +111,8 @@ ndk_versions_compatible_tests:
export PATH=$ANDROID_NDK_HOME:$PATH; export PATH=$ANDROID_NDK_HOME:$PATH;
echo "ndk path: $ANDROID_NDK_HOME"; echo "ndk path: $ANDROID_NDK_HOME";
if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi
python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS --args="--gtest_filter=ActivationOpTest*"; python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64 --target_socs=$TARGET_SOCS --args="--gtest_filter=ActivationOpTest*";
python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS --enable_neon=false --args="--gtest_filter=ActivationOpTest*"; python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64 --target_socs=$TARGET_SOCS --enable_neon=false --args="--gtest_filter=ActivationOpTest*";
fi fi
done done
- export ANDROID_NDK_HOME=$DEFAULT_NDK_PATH - export ANDROID_NDK_HOME=$DEFAULT_NDK_PATH
...@@ -111,16 +126,27 @@ python_tools_tests: ...@@ -111,16 +126,27 @@ python_tools_tests:
- GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@github.com:XiaoMi/mace-models.git - GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@github.com:XiaoMi/mace-models.git
- CONF_FILE=mace-models/mobilenet-v2/mobilenet-v2.yml - CONF_FILE=mace-models/mobilenet-v2/mobilenet-v2.yml
- > - >
python tools/converter.py convert --config=${CONF_FILE} --target_abis=armeabi-v7a --model_graph_format=file --model_data_format=file || exit 1; if ping -c 1 v9.git.n.xiaomi.com 1>/dev/null 2>&1; then
python tools/converter.py run --config=${CONF_FILE} --round=1 --target_abis=armeabi-v7a --validate --model_graph_format=file --model_data_format=file || exit 1; GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git
python tools/converter.py run --config=${CONF_FILE} --example --target_abis=armeabi-v7a --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1; DEVICE_CONF_FILE=generic-mobile-devices/devices.yml
fi
- >
python tools/converter.py convert --config=${CONF_FILE} --target_abis=armeabi-v7a,arm64 --model_graph_format=file --model_data_format=file || exit 1;
python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --round=1 --target_abis=armeabi-v7a,arm64 --validate --model_graph_format=file --model_data_format=file || exit 1;
python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --example --target_abis=armeabi-v7a,arm64 --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
model_tests: model_tests:
stage: model_tests stage: model_tests
script: script:
- pwd - pwd
- rm -rf mace-models - rm -rf mace-models
- rm -rf generic-mobile-devices
- GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@github.com:XiaoMi/mace-models.git - GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@github.com:XiaoMi/mace-models.git
- >
if ping -c 1 v9.git.n.xiaomi.com 1>/dev/null 2>&1; then
GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git
DEVICE_CONF_FILE=generic-mobile-devices/devices.yml
fi
- > - >
for CONF_FILE in mace-models/mobilenet-v1/mobilenet-v1.yml mace-models/mobilenet-v1/mobilenet-v1-quantize-retrain.yml; for CONF_FILE in mace-models/mobilenet-v1/mobilenet-v1.yml mace-models/mobilenet-v1/mobilenet-v1-quantize-retrain.yml;
do do
...@@ -131,8 +157,8 @@ model_tests: ...@@ -131,8 +157,8 @@ model_tests:
- CONF_FILE=mace-models/mobilenet-v2/mobilenet-v2-host.yml - CONF_FILE=mace-models/mobilenet-v2/mobilenet-v2-host.yml
- > - >
python tools/converter.py convert --config=${CONF_FILE} --model_graph_format=file --model_data_format=file || exit 1; python tools/converter.py convert --config=${CONF_FILE} --model_graph_format=file --model_data_format=file || exit 1;
python tools/converter.py run --config=${CONF_FILE} --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1; python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
python tools/converter.py run --config=${CONF_FILE} --example --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1; python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --example --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
- rm -rf mace-models - rm -rf mace-models
build_android_demo: build_android_demo:
......
...@@ -35,7 +35,7 @@ Required dependencies ...@@ -35,7 +35,7 @@ Required dependencies
- Required by model validation - Required by model validation
* - six * - six
- pip install -I six==1.11.0 - pip install -I six==1.11.0
- Required for Python 2 and 3 compatibility (TODO) - Required for Python 2 and 3 compatibility
Optional dependencies Optional dependencies
--------------------- ---------------------
......
...@@ -109,13 +109,66 @@ in one deployment file. ...@@ -109,13 +109,66 @@ in one deployment file.
sha256sum /path/to/your/file sha256sum /path/to/your/file
Advanced usage Advanced usage
-------------- --------------
There are two common advanced use cases: There are three common advanced use cases:
- run your model on the embedded device(ARM LINUX)
- converting model to C++ code. - converting model to C++ code.
- tuning GPU kernels for a specific SoC. - tuning GPU kernels for a specific SoC.
Run you model on the embedded device(ARM Linux)
-----------------------------------------------
The way to run your model on the ARM Linux is nearly same as with android, except you need specify a device config file.
.. code:: bash
python tools/converter.py run --config=/path/to/your/model_deployment_file.yml --device_yml=/path/to/devices.yml
There are two steps to do before run:
1. configure login without password
MACE use ssh to connect embedded device, you should copy your public key to embedded device with the blow command.
.. code:: bash
cat ~/.ssh/id_rsa.pub | ssh -q {user}@{ip} "cat >> ~/.ssh/authorized_keys"
2. write your own device yaml configuration file.
* **Example**
Here is an device yaml config demo.
.. literalinclude:: devices/demo_device_nanopi.yml
:language: yaml
* **Configuration**
The detailed explanation is listed in the blow table.
.. list-table::
:header-rows: 1
* - Options
- Usage
* - target_abis
- Device supported abis, you can get it via ``dpkg --print-architecture`` and
``dpkg --print-foreign-architectures`` command, if more than one abi is supported,
separate them by commas.
* - target_socs
- device soc, you can get it from device manual, we haven't found a way to get it in shell.
* - models
- device models full name, you can get via get ``lshw`` command (third party package, install it via your package manager).
see it's product value.
* - address
- Since we use ssh to connect device, ip address is required.
* - username
- login username, required.
Convert model(s) to C++ code Convert model(s) to C++ code
-------------------------------- --------------------------------
...@@ -403,6 +456,7 @@ Reduce Library Size ...@@ -403,6 +456,7 @@ Reduce Library Size
- It is recommended to use ``version script`` and ``strip`` feature when linking mace static library. The effect is remarkable. - It is recommended to use ``version script`` and ``strip`` feature when linking mace static library. The effect is remarkable.
* Remove the unused ops. * Remove the unused ops.
Remove the registration of the ops unused for your models in the ``mace/ops/ops_register.cc``, Remove the registration of the ops unused for your models in the ``mace/ops/ops_register.cc``,
which will reduce the library size significantly. the final binary just link the registered ops' code. which will reduce the library size significantly. the final binary just link the registered ops' code.
......
...@@ -68,7 +68,8 @@ Here we use the mobilenet-v2 model as an example. ...@@ -68,7 +68,8 @@ Here we use the mobilenet-v2 model as an example.
.. note:: .. note::
If you want to run on device/phone, please plug in at least one device/phone. If you want to run on phone, please plug in at least one phone.
Or if you want to run on embedded device, please give a :doc:`advanced_usage`.
.. code:: sh .. code:: sh
...@@ -245,10 +246,14 @@ to run and validate your model. ...@@ -245,10 +246,14 @@ to run and validate your model.
# Test model run time # Test model run time
python tools/converter.py run --config=/path/to/your/model_deployment_file.yml --round=100 python tools/converter.py run --config=/path/to/your/model_deployment_file.yml --round=100
# Validate the correctness by comparing the results against the # Validate the correctness by comparing the results against the
# original model and framework, measured with cosine distance for similarity. # original model and framework, measured with cosine distance for similarity.
python tools/converter.py run --config=/path/to/your/model_deployment_file.yml --validate python tools/converter.py run --config=/path/to/your/model_deployment_file.yml --validate
# If you want to run model on specified arm linux device, you should put device config file in the working directory or run with flag `--device_yml`
python tools/converter.py run --config=/path/to/your/model_deployment_file.yml --device_yml=/path/to/devices.yml
* **benchmark** * **benchmark**
benchmark and profile the model. benchmark and profile the model.
......
...@@ -12,12 +12,9 @@ devices: ...@@ -12,12 +12,9 @@ devices:
address: 10.0.0.0 address: 10.0.0.0
# login username # login username
username: user username: user
# login password, is required when you can login into device without password
password: 1234567
raspberry: raspberry:
target_abis: [armv7l] target_abis: [armv7l]
target_socs: BCM2837 target_socs: BCM2837
models: Raspberry Pi 3 Model B Plus Rev 1.3 models: Raspberry Pi 3 Model B Plus Rev 1.3
address: 10.0.0.1 address: 10.0.0.1
username: user username: user
password: 123456
...@@ -24,6 +24,24 @@ config_setting( ...@@ -24,6 +24,24 @@ config_setting(
visibility = ["//visibility:public"], visibility = ["//visibility:public"],
) )
config_setting(
name = "arm_linux_aarch64",
values = {
"crosstool_top": "//tools/aarch64_compiler:toolchain",
"cpu": "aarch64",
},
visibility = ["//visibility:public"],
)
config_setting(
name = "arm_linux_armhf",
values = {
"crosstool_top": "//tools/arm_compiler:toolchain",
"cpu": "armeabi-v7a",
},
visibility = ["//visibility:public"],
)
config_setting( config_setting(
name = "neon_enabled", name = "neon_enabled",
define_values = { define_values = {
......
...@@ -42,7 +42,7 @@ struct CPUFreq { ...@@ -42,7 +42,7 @@ struct CPUFreq {
}; };
namespace { namespace {
#if defined(__ANDROID__)
int GetCPUCount() { int GetCPUCount() {
int cpu_count = 0; int cpu_count = 0;
std::string cpu_sys_conf = "/proc/cpuinfo"; std::string cpu_sys_conf = "/proc/cpuinfo";
...@@ -69,10 +69,8 @@ int GetCPUCount() { ...@@ -69,10 +69,8 @@ int GetCPUCount() {
VLOG(2) << "CPU cores: " << cpu_count; VLOG(2) << "CPU cores: " << cpu_count;
return cpu_count; return cpu_count;
} }
#endif
int GetCPUMaxFreq(std::vector<float> *max_freqs) { int GetCPUMaxFreq(std::vector<float> *max_freqs) {
#if defined(__ANDROID__)
int cpu_count = GetCPUCount(); int cpu_count = GetCPUCount();
for (int cpu_id = 0; cpu_id < cpu_count; ++cpu_id) { for (int cpu_id = 0; cpu_id < cpu_count; ++cpu_id) {
std::string cpuinfo_max_freq_sys_conf = MakeString( std::string cpuinfo_max_freq_sys_conf = MakeString(
...@@ -94,34 +92,6 @@ int GetCPUMaxFreq(std::vector<float> *max_freqs) { ...@@ -94,34 +92,6 @@ int GetCPUMaxFreq(std::vector<float> *max_freqs) {
} }
f.close(); f.close();
} }
#else
std::string cpu_sys_conf = "/proc/cpuinfo";
std::ifstream f(cpu_sys_conf);
if (!f.is_open()) {
LOG(ERROR) << "failed to open " << cpu_sys_conf;
return -1;
}
std::string line;
const std::string freq_key = "cpu MHz";
while (std::getline(f, line)) {
if (line.size() >= freq_key.size()
&& line.compare(0, freq_key.size(), freq_key) == 0) {
size_t pos = line.find(":");
if (pos != std::string::npos) {
std::string freq_str = line.substr(pos + 1);
float freq = atof(freq_str.c_str());
max_freqs->push_back(freq);
}
}
}
if (f.bad()) {
LOG(ERROR) << "failed to read " << cpu_sys_conf;
}
if (!f.eof()) {
LOG(ERROR) << "failed to read end of " << cpu_sys_conf;
}
f.close();
#endif
for (float freq : *max_freqs) { for (float freq : *max_freqs) {
VLOG(2) << "CPU freq: " << freq; VLOG(2) << "CPU freq: " << freq;
......
# Examples # Examples
load( load(
"//mace:mace.bzl", "//mace:mace.bzl",
"if_openmp_enabled",
"if_android", "if_android",
"if_hexagon_enabled", "if_hexagon_enabled",
"if_opencl_enabled", "if_opencl_enabled",
"if_openmp_enabled",
) )
cc_binary( cc_binary(
...@@ -18,8 +18,9 @@ cc_binary( ...@@ -18,8 +18,9 @@ cc_binary(
]), ]),
linkopts = [ linkopts = [
"-lm", "-lm",
"-ldl",
] + if_openmp_enabled([ ] + if_openmp_enabled([
"-fopenmp" "-fopenmp",
]) + if_android([ ]) + if_android([
"-ldl", "-ldl",
"-pie", "-pie",
...@@ -47,6 +48,7 @@ cc_binary( ...@@ -47,6 +48,7 @@ cc_binary(
]), ]),
linkopts = [ linkopts = [
"-lm", "-lm",
"-ldl",
] + if_android([ ] + if_android([
"-ldl", "-ldl",
"-pie", "-pie",
...@@ -55,8 +57,7 @@ cc_binary( ...@@ -55,8 +57,7 @@ cc_binary(
linkstatic = 0, linkstatic = 0,
deps = [ deps = [
"//external:gflags_nothreads", "//external:gflags_nothreads",
"//mace/codegen:generated_mace_engine_factory",
"//mace/codegen:generated_libmace", "//mace/codegen:generated_libmace",
"//mace/codegen:generated_mace_engine_factory",
], ],
) )
...@@ -24,6 +24,18 @@ def if_android_arm64(a): ...@@ -24,6 +24,18 @@ def if_android_arm64(a):
"//conditions:default": [], "//conditions:default": [],
}) })
def if_arm_linux_aarch64(a):
return select({
"//mace:arm_linux_aarch64": a,
"//conditions:default": [],
})
def if_arm_linux_armhf(a):
return select({
"//mace:arm_linux_armhf": a,
"//conditions:default": []
})
def if_neon_enabled(a): def if_neon_enabled(a):
return select({ return select({
"//mace:neon_enabled": a, "//mace:neon_enabled": a,
...@@ -81,4 +93,3 @@ def encrypt_opencl_kernel_genrule(): ...@@ -81,4 +93,3 @@ def encrypt_opencl_kernel_genrule():
outs = ["opencl/encrypt_opencl_kernel.cc"], outs = ["opencl/encrypt_opencl_kernel.cc"],
cmd = "cat $(SRCS) > $@;" cmd = "cat $(SRCS) > $@;"
) )
...@@ -233,7 +233,7 @@ void TestNxNS12(const index_t height, const index_t width) { ...@@ -233,7 +233,7 @@ void TestNxNS12(const index_t height, const index_t width) {
auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w, auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w,
Padding type) { Padding type) {
// generate random input // generate random input
static unsigned int seed = time(NULL); // static unsigned int seed = time(NULL);
index_t batch = 1; index_t batch = 1;
index_t channel = 32; index_t channel = 32;
index_t multiplier = 1; index_t multiplier = 1;
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include <algorithm> #include <algorithm>
#include <cmath>
#include "mace/core/operator.h" #include "mace/core/operator.h"
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include "mace/ops/resize_bicubic.h" #include "mace/ops/resize_bicubic.h"
#include <algorithm> #include <algorithm>
#include <cmath>
#include <memory> #include <memory>
#include <vector> #include <vector>
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include <algorithm> #include <algorithm>
#include <cmath>
#include <vector> #include <vector>
#include "mace/core/operator.h" #include "mace/core/operator.h"
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include <algorithm> #include <algorithm>
#include <cmath>
#include <limits> #include <limits>
#include <memory> #include <memory>
#include <vector> #include <vector>
...@@ -106,7 +107,7 @@ class SoftmaxOp<DeviceType::CPU, float> : public Operation { ...@@ -106,7 +107,7 @@ class SoftmaxOp<DeviceType::CPU, float> : public Operation {
float sum = 0; float sum = 0;
for (index_t c = 0; c < class_count; ++c) { for (index_t c = 0; c < class_count; ++c) {
float exp_value = ::exp(input_ptr[c] - max_val); float exp_value = std::exp(input_ptr[c] - max_val);
sum += exp_value; sum += exp_value;
output_ptr[c] = exp_value; output_ptr[c] = exp_value;
} }
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include <algorithm> #include <algorithm>
#include <cmath>
#include <vector> #include <vector>
#include "mace/core/operator.h" #include "mace/core/operator.h"
......
...@@ -16,8 +16,9 @@ ...@@ -16,8 +16,9 @@
#include <arm_neon.h> #include <arm_neon.h>
#endif #endif
#include <vector>
#include <algorithm> #include <algorithm>
#include <cmath>
#include <vector>
#include "mace/core/operator.h" #include "mace/core/operator.h"
#include "mace/ops/transpose.h" #include "mace/ops/transpose.h"
......
...@@ -112,6 +112,8 @@ TFSupportedOps = [ ...@@ -112,6 +112,8 @@ TFSupportedOps = [
TFOpType = Enum('TFOpType', [(op, op) for op in TFSupportedOps], type=str) TFOpType = Enum('TFOpType', [(op, op) for op in TFSupportedOps], type=str)
TFSupportedOps = [six.b(op) for op in TFSupportedOps]
class TensorflowConverter(base_converter.ConverterInterface): class TensorflowConverter(base_converter.ConverterInterface):
"""A class for convert tensorflow frozen model to mace model. """A class for convert tensorflow frozen model to mace model.
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
import datetime import datetime
import os import os
import six
import uuid import uuid
import numpy as np import numpy as np
import hashlib import hashlib
...@@ -34,8 +35,8 @@ class ModelFormat(object): ...@@ -34,8 +35,8 @@ class ModelFormat(object):
def generate_obfuscated_name(namespace, name): def generate_obfuscated_name(namespace, name):
md5 = hashlib.md5() md5 = hashlib.md5()
md5.update(namespace) md5.update(six.b(namespace))
md5.update(name) md5.update(six.b(name))
md5_digest = md5.hexdigest() md5_digest = md5.hexdigest()
name = md5_digest[:8] name = md5_digest[:8]
......
...@@ -15,8 +15,9 @@ ...@@ -15,8 +15,9 @@
#ifndef MACE_UTILS_QUANTIZE_H_ #ifndef MACE_UTILS_QUANTIZE_H_
#define MACE_UTILS_QUANTIZE_H_ #define MACE_UTILS_QUANTIZE_H_
#include <limits>
#include <algorithm> #include <algorithm>
#include <cmath>
#include <limits>
namespace mace { namespace mace {
......
# Partially borrowed from tensorflow tools/bazel.rc # Partially borrowed from tensorflow tools/bazel.rc
# By default, we don't distinct target and host platfroms. # By default, we don't distinct target and host platfroms.
# When doing cross compilation, use --config=cross_compile to distinct them.
build --distinct_host_configuration=false build --distinct_host_configuration=false
build:cross_compile --distinct_host_configuration=true
build --verbose_failures build --verbose_failures
build --copt=-std=c++11 build --copt=-std=c++11
...@@ -17,34 +15,33 @@ build --copt=-DMACE_USE_NNLIB_CAF ...@@ -17,34 +15,33 @@ build --copt=-DMACE_USE_NNLIB_CAF
build:symbol_hidden --copt=-fvisibility=hidden build:symbol_hidden --copt=-fvisibility=hidden
# Usage example: bazel build --config android # Usage example: bazel build --config android
build:android --config=cross_compile build:android --distinct_host_configuration=true
build:android --crosstool_top=//external:android/crosstool build:android --crosstool_top=//external:android/crosstool
build:android --host_crosstool_top=@bazel_tools//tools/cpp:toolchain build:android --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
# Usage example: bazel build --config arm_linux # Usage example: bazel build --config arm_linux_gnueabihf
build:arm_linux --config=cross_compile build:arm_linux_gnueabihf --distinct_host_configuration=true
build:arm_linux --crosstool_top=//tools/arm_compiler:toolchain build:arm_linux_gnueabihf --crosstool_top=//tools/arm_compiler:toolchain
build:arm_linux --host_crosstool_top=@bazel_tools//tools/cpp:toolchain build:arm_linux_gnueabihf --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
build:arm_linux --cpu=armeabi-v7a build:arm_linux_gnueabihf --cpu=armeabi-v7a
build:arm_linux --copt -mfloat-abi=hard build:arm_linux_gnueabihf --copt -mfloat-abi=hard
build:arm_linux --copt -mfpu=neon build:arm_linux_gnueabihf --copt -mfpu=neon
build:arm_linux --copt -Wno-ignored-attributes build:arm_linux_gnueabihf --copt -Wno-ignored-attributes
build:arm_linux --copt -Wno-unused-function build:arm_linux_gnueabihf --copt -Wno-unused-function
build:arm_linux --copt -Wno-sequence-point build:arm_linux_gnueabihf --copt -Wno-sequence-point
build:arm_linux --copt -Wno-implicit-fallthrough build:arm_linux_gnueabihf --copt -Wno-implicit-fallthrough
build:arm_linux --copt -Wno-psabi
# Usage example: bazel build --config aarch64_linux # Usage example: bazel build --config aarch64_linux_gnu
build:aarch64_linux --config=cross_compile build:aarch64_linux_gnu --distinct_host_configuration=true
build:aarch64_linux --crosstool_top=//tools/aarch64_compiler:toolchain build:aarch64_linux_gnu --crosstool_top=//tools/aarch64_compiler:toolchain
build:aarch64_linux --host_crosstool_top=@bazel_tools//tools/cpp:toolchain build:aarch64_linux_gnu --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
build:aarch64_linux --cpu=aarch64 build:aarch64_linux_gnu --cpu=aarch64
build:aarch64_linux --copt -Wno-ignored-attributes build:aarch64_linux_gnu --copt -Wno-ignored-attributes
build:aarch64_linux --copt -Wno-unused-function build:aarch64_linux_gnu --copt -Wno-unused-function
build:aarch64_linux --copt -Wno-sequence-point build:aarch64_linux_gnu --copt -Wno-sequence-point
build:aarch64_linux --copt -Wno-implicit-fallthrough build:aarch64_linux_gnu --copt -Wno-implicit-fallthrough
# Usage example: bazel build --config optimization # Usage example: bazel build --config optimization
build:optimization -c opt build:optimization -c opt
build:optimization --copt=-O3 build:optimization --copt=-O3
build:optimization --linkopt=-Wl,--strip-all build:optimization --linkopt=-Wl,--strip-all
......
...@@ -26,9 +26,9 @@ import sys ...@@ -26,9 +26,9 @@ import sys
import sh_commands import sh_commands
from common import *
def stdout_processor(stdout, device_properties, abi): from device import DeviceWrapper, DeviceManager
pass
def unittest_stdout_processor(stdout, device_properties, abi): def unittest_stdout_processor(stdout, device_properties, abi):
...@@ -39,7 +39,7 @@ def unittest_stdout_processor(stdout, device_properties, abi): ...@@ -39,7 +39,7 @@ def unittest_stdout_processor(stdout, device_properties, abi):
raise Exception("Command failed") raise Exception("Command failed")
def ops_benchmark_stdout_processor(stdout, device_properties, abi): def ops_benchmark_stdout_processor(stdout, dev, abi):
stdout_lines = stdout.split("\n") stdout_lines = stdout.split("\n")
metrics = {} metrics = {}
for line in stdout_lines: for line in stdout_lines:
...@@ -52,13 +52,13 @@ def ops_benchmark_stdout_processor(stdout, device_properties, abi): ...@@ -52,13 +52,13 @@ def ops_benchmark_stdout_processor(stdout, device_properties, abi):
metrics["%s.input_mb_per_sec" % parts[0]] = parts[3] metrics["%s.input_mb_per_sec" % parts[0]] = parts[3]
metrics["%s.gmacc_per_sec" % parts[0]] = parts[4] metrics["%s.gmacc_per_sec" % parts[0]] = parts[4]
platform = device_properties["ro.board.platform"].replace(" ", "-") # platform = dev[YAMLKeyword.target_socs]
model = device_properties["ro.product.model"].replace(" ", "-") # model = dev[YAMLKeyword.device_name]
tags = { # tags = {
"ro.board.platform": platform, # "ro.board.platform": platform,
"ro.product.model": model, # "ro.product.model": model,
"abi": abi # "abi": abi
} # }
# sh_commands.falcon_push_metrics(server, # sh_commands.falcon_push_metrics(server,
# metrics, tags=tags, endpoint="mace_ops_benchmark") # metrics, tags=tags, endpoint="mace_ops_benchmark")
...@@ -87,7 +87,7 @@ def parse_args(): ...@@ -87,7 +87,7 @@ def parse_args():
type=str, type=str,
default="all", default="all",
help="SoCs (ro.board.platform from getprop) to build, " help="SoCs (ro.board.platform from getprop) to build, "
"comma seperated list or all/random") "comma seperated list or all/random")
parser.add_argument( parser.add_argument(
"--target", type=str, default="//...", help="Bazel target to build") "--target", type=str, default="//...", help="Bazel target to build")
parser.add_argument( parser.add_argument(
...@@ -99,7 +99,7 @@ def parse_args(): ...@@ -99,7 +99,7 @@ def parse_args():
parser.add_argument( parser.add_argument(
"--stdout_processor", "--stdout_processor",
type=str, type=str,
default="stdout_processor", default="unittest_stdout_processor",
help="Stdout processing function, default: stdout_processor") help="Stdout processing function, default: stdout_processor")
parser.add_argument( parser.add_argument(
"--enable_neon", "--enable_neon",
...@@ -115,14 +115,22 @@ def parse_args(): ...@@ -115,14 +115,22 @@ def parse_args():
type=str2bool, type=str2bool,
default=False, default=False,
help="Whether to use simpleperf stat") help="Whether to use simpleperf stat")
parser.add_argument(
'--device_yml',
type=str,
default='',
help='embedded linux device config yml file'
)
return parser.parse_known_args() return parser.parse_known_args()
def main(unused_args): def main(unused_args):
target_socs = None target_socs = None
target_devices = DeviceManager.list_devices(FLAGS.device_yml)
if FLAGS.target_socs != "all" and FLAGS.target_socs != "random": if FLAGS.target_socs != "all" and FLAGS.target_socs != "random":
target_socs = set(FLAGS.target_socs.split(',')) target_socs = set(FLAGS.target_socs.split(','))
target_devices = sh_commands.get_target_socs_serialnos(target_socs) target_devices = [dev for dev in target_devices
if dev[YAMLKeyword.target_socs] in target_socs]
if FLAGS.target_socs == "random": if FLAGS.target_socs == "random":
unlocked_devices = \ unlocked_devices = \
[d for d in target_devices if not sh_commands.is_device_locked(d)] [d for d in target_devices if not sh_commands.is_device_locked(d)]
...@@ -136,31 +144,29 @@ def main(unused_args): ...@@ -136,31 +144,29 @@ def main(unused_args):
target_abis = FLAGS.target_abis.split(',') target_abis = FLAGS.target_abis.split(',')
for target_abi in target_abis: for target_abi in target_abis:
toolchain = infer_toolchain(target_abi)
sh_commands.bazel_build(target, abi=target_abi, sh_commands.bazel_build(target, abi=target_abi,
toolchain=toolchain,
enable_neon=FLAGS.enable_neon, enable_neon=FLAGS.enable_neon,
address_sanitizer=FLAGS.address_sanitizer) address_sanitizer=FLAGS.address_sanitizer)
if FLAGS.run_target: if FLAGS.run_target:
for serialno in target_devices: for dev in target_devices:
if target_abi not in set( if target_abi not in dev[YAMLKeyword.target_abis]:
sh_commands.adb_supported_abis(serialno)):
print("Skip device %s which does not support ABI %s" % print("Skip device %s which does not support ABI %s" %
(serialno, target_abi)) (dev, target_abi))
continue continue
stdouts = sh_commands.adb_run( device_wrapper = DeviceWrapper(dev)
stdouts = device_wrapper.run(
target_abi, target_abi,
serialno,
host_bin_path, host_bin_path,
bin_name, bin_name,
args=FLAGS.args, args=FLAGS.args,
opencl_profiling=True, opencl_profiling=True,
vlog_level=0, vlog_level=0,
device_bin_path="/data/local/tmp/mace",
out_of_range_check=True, out_of_range_check=True,
address_sanitizer=FLAGS.address_sanitizer, address_sanitizer=FLAGS.address_sanitizer,
simpleperf=FLAGS.simpleperf) simpleperf=FLAGS.simpleperf)
device_properties = sh_commands.adb_getprop_by_serialno( globals()[FLAGS.stdout_processor](stdouts, dev,
serialno)
globals()[FLAGS.stdout_processor](stdouts, device_properties,
target_abi) target_abi)
......
...@@ -22,6 +22,14 @@ mkdir -p $LIB_DIR/arm64-v8a/cpu_gpu ...@@ -22,6 +22,14 @@ mkdir -p $LIB_DIR/arm64-v8a/cpu_gpu
rm -rf $LIB_DIR/linux-x86-64 rm -rf $LIB_DIR/linux-x86-64
mkdir -p $LIB_DIR/linux-x86-64 mkdir -p $LIB_DIR/linux-x86-64
rm -rf $LIB_DIR/arm_linux_gnueabihf
mkdir -p $LIB_DIR/arm_linux_gnueabihf/cpu_gpu
rm -rf $LIB_DIR/aarch64_linux_gnu
mkdir -p $LIB_DIR/aarch64_linux_gnu/cpu_gpu
# build shared libraries # build shared libraries
echo "build shared lib for armeabi-v7a + cpu_gpu_dsp" echo "build shared lib for armeabi-v7a + cpu_gpu_dsp"
bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --define hexagon=true --define quantize=true --cpu=armeabi-v7a bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --define hexagon=true --define quantize=true --cpu=armeabi-v7a
...@@ -36,6 +44,14 @@ echo "build shared lib for arm64-v8a + cpu_gpu" ...@@ -36,6 +44,14 @@ echo "build shared lib for arm64-v8a + cpu_gpu"
bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --define quantize=true --cpu=arm64-v8a bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --define quantize=true --cpu=arm64-v8a
cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/arm64-v8a/cpu_gpu/ cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/arm64-v8a/cpu_gpu/
echo "build shared lib for arm_linux_gnueabihf + cpu_gpu"
bazel build --config arm_linux_gnueabihf --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --define quantize=true
cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/arm_linux_gnueabihf/cpu_gpu/
echo "build shared lib for aarch64_linux_gnu + cpu_gpu"
bazel build --config aarch64_linux_gnu --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --define quantize=true
cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/aarch64_linux_gnu/cpu_gpu/
if [[ "$OSTYPE" != "darwin"* ]];then if [[ "$OSTYPE" != "darwin"* ]];then
echo "build shared lib for linux-x86-64" echo "build shared lib for linux-x86-64"
bazel build mace/libmace:libmace_dynamic --config optimization --define quantize=true --define openmp=true bazel build mace/libmace:libmace_dynamic --config optimization --define quantize=true --define openmp=true
...@@ -56,6 +72,14 @@ echo "build static lib for arm64-v8a + cpu_gpu" ...@@ -56,6 +72,14 @@ echo "build static lib for arm64-v8a + cpu_gpu"
bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true --define quantize=true --cpu=arm64-v8a bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true --define quantize=true --cpu=arm64-v8a
cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/arm64-v8a/cpu_gpu/ cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/arm64-v8a/cpu_gpu/
echo "build static lib for arm_linux_gnueabihf + cpu_gpu"
bazel build --config arm_linux_gnueabihf --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true --define quantize=true
cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/arm_linux_gnueabihf/cpu_gpu/
echo "build static lib for aarch64_linux_gnu + cpu_gpu"
bazel build --config aarch64_linux_gnu --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true --define quantize=true
cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/aarch64_linux_gnu/cpu_gpu/
if [[ "$OSTYPE" != "darwin"* ]];then if [[ "$OSTYPE" != "darwin"* ]];then
echo "build static lib for linux-x86-64" echo "build static lib for linux-x86-64"
bazel build mace/libmace:libmace_static --config optimization --define quantize=true --define openmp=true bazel build mace/libmace:libmace_static --config optimization --define quantize=true --define openmp=true
......
...@@ -13,7 +13,9 @@ ...@@ -13,7 +13,9 @@
# limitations under the License. # limitations under the License.
import enum import enum
import hashlib
import re import re
import os
import six import six
...@@ -135,3 +137,340 @@ def formatted_file_name(input_file_name, input_name): ...@@ -135,3 +137,340 @@ def formatted_file_name(input_file_name, input_name):
for c in input_name: for c in input_name:
res += c if c.isalnum() else '_' res += c if c.isalnum() else '_'
return res return res
def md5sum(s):
md5 = hashlib.md5()
md5.update(s.encode('utf-8'))
return md5.hexdigest()
def get_build_binary_dir(library_name, target_abi):
return "%s/%s/%s/%s" % (
BUILD_OUTPUT_DIR, library_name, BUILD_TMP_DIR_NAME, target_abi)
def get_model_lib_output_path(library_name, abi):
lib_output_path = os.path.join(BUILD_OUTPUT_DIR, library_name,
MODEL_OUTPUT_DIR_NAME, abi,
"%s.a" % library_name)
return lib_output_path
def check_model_converted(library_name, model_name,
model_graph_format, model_data_format,
abi):
model_output_dir = \
'%s/%s/%s' % (BUILD_OUTPUT_DIR, library_name, MODEL_OUTPUT_DIR_NAME)
if model_graph_format == ModelFormat.file:
mace_check(os.path.exists("%s/%s.pb" % (model_output_dir, model_name)),
ModuleName.RUN,
"You should convert model first.")
else:
model_lib_path = get_model_lib_output_path(library_name, abi)
mace_check(os.path.exists(model_lib_path),
ModuleName.RUN,
"You should convert model first.")
if model_data_format == ModelFormat.file:
mace_check(os.path.exists("%s/%s.data" %
(model_output_dir, model_name)),
ModuleName.RUN,
"You should convert model first.")
def parse_device_type(runtime):
device_type = ""
if runtime == RuntimeType.dsp:
device_type = DeviceType.HEXAGON
elif runtime == RuntimeType.gpu:
device_type = DeviceType.GPU
elif runtime == RuntimeType.cpu:
device_type = DeviceType.CPU
return device_type
def sha256_checksum(fname):
hash_func = hashlib.sha256()
with open(fname, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_func.update(chunk)
return hash_func.hexdigest()
def get_model_files(model_file_path,
model_sha256_checksum,
model_output_dir,
weight_file_path="",
weight_sha256_checksum=""):
model_file = model_file_path
weight_file = weight_file_path
if model_file_path.startswith("http://") or \
model_file_path.startswith("https://"):
model_file = model_output_dir + "/" + md5sum(model_file_path) + ".pb"
if not os.path.exists(model_file) or \
sha256_checksum(model_file) != model_sha256_checksum:
MaceLogger.info("Downloading model, please wait ...")
six.moves.urllib.request.urlretrieve(model_file_path, model_file)
MaceLogger.info("Model downloaded successfully.")
if sha256_checksum(model_file) != model_sha256_checksum:
MaceLogger.error(ModuleName.MODEL_CONVERTER,
"model file sha256checksum not match")
if weight_file_path.startswith("http://") or \
weight_file_path.startswith("https://"):
weight_file = \
model_output_dir + "/" + md5sum(weight_file_path) + ".caffemodel"
if not os.path.exists(weight_file) or \
sha256_checksum(weight_file) != weight_sha256_checksum:
MaceLogger.info("Downloading model weight, please wait ...")
six.moves.urllib.request.urlretrieve(weight_file_path, weight_file)
MaceLogger.info("Model weight downloaded successfully.")
if weight_file:
if sha256_checksum(weight_file) != weight_sha256_checksum:
MaceLogger.error(ModuleName.MODEL_CONVERTER,
"weight file sha256checksum not match")
return model_file, weight_file
def get_opencl_binary_output_path(library_name, target_abi, device):
target_soc = device.target_socs
device_name = device.device_name
return '%s/%s/%s/%s/%s_%s.%s.%s.bin' % \
(BUILD_OUTPUT_DIR,
library_name,
OUTPUT_OPENCL_BINARY_DIR_NAME,
target_abi,
library_name,
OUTPUT_OPENCL_BINARY_FILE_NAME,
device_name,
target_soc)
def get_opencl_parameter_output_path(library_name, target_abi, device):
target_soc = device.target_socs
device_name = device.device_name
return '%s/%s/%s/%s/%s_%s.%s.%s.bin' % \
(BUILD_OUTPUT_DIR,
library_name,
OUTPUT_OPENCL_BINARY_DIR_NAME,
target_abi,
library_name,
OUTPUT_OPENCL_PARAMETER_FILE_NAME,
device_name,
target_soc)
def get_build_model_dirs(library_name,
model_name,
target_abi,
device,
model_file_path):
device_name = device.device_name
target_socs = device.target_socs
model_path_digest = md5sum(model_file_path)
model_output_base_dir = '{}/{}/{}/{}/{}'.format(
BUILD_OUTPUT_DIR, library_name, BUILD_TMP_DIR_NAME,
model_name, model_path_digest)
if target_abi == ABIType.host:
model_output_dir = '%s/%s' % (model_output_base_dir, target_abi)
elif not target_socs or not device.address:
model_output_dir = '%s/%s/%s' % (model_output_base_dir,
BUILD_TMP_GENERAL_OUTPUT_DIR_NAME,
target_abi)
else:
model_output_dir = '{}/{}_{}/{}'.format(
model_output_base_dir,
device_name,
target_socs,
target_abi
)
mace_model_dir = '{}/{}/{}'.format(
BUILD_OUTPUT_DIR, library_name, MODEL_OUTPUT_DIR_NAME
)
return model_output_base_dir, model_output_dir, mace_model_dir
def abi_to_internal(abi):
if abi in [ABIType.armeabi_v7a, ABIType.arm64_v8a]:
return abi
if abi == ABIType.arm64:
return ABIType.aarch64
if abi == ABIType.armhf:
return ABIType.armeabi_v7a
def infer_toolchain(abi):
if abi in [ABIType.armeabi_v7a, ABIType.arm64_v8a]:
return ToolchainType.android
if abi == ABIType.armhf:
return ToolchainType.arm_linux_gnueabihf
if abi == ABIType.arm64:
return ToolchainType.aarch64_linux_gnu
return ''
################################
# YAML key word
################################
class YAMLKeyword(object):
library_name = 'library_name'
target_abis = 'target_abis'
target_socs = 'target_socs'
model_graph_format = 'model_graph_format'
model_data_format = 'model_data_format'
models = 'models'
platform = 'platform'
device_name = 'device_name'
system = 'system'
address = 'address'
username = 'username'
password = 'password'
model_file_path = 'model_file_path'
model_sha256_checksum = 'model_sha256_checksum'
weight_file_path = 'weight_file_path'
weight_sha256_checksum = 'weight_sha256_checksum'
subgraphs = 'subgraphs'
input_tensors = 'input_tensors'
input_shapes = 'input_shapes'
input_ranges = 'input_ranges'
output_tensors = 'output_tensors'
output_shapes = 'output_shapes'
check_tensors = 'check_tensors'
check_shapes = 'check_shapes'
runtime = 'runtime'
data_type = 'data_type'
input_data_types = 'input_data_types'
input_data_formats = 'input_data_formats'
output_data_formats = 'output_data_formats'
limit_opencl_kernel_time = 'limit_opencl_kernel_time'
nnlib_graph_mode = 'nnlib_graph_mode'
obfuscate = 'obfuscate'
winograd = 'winograd'
quantize = 'quantize'
quantize_range_file = 'quantize_range_file'
change_concat_ranges = 'change_concat_ranges'
validation_inputs_data = 'validation_inputs_data'
validation_threshold = 'validation_threshold'
graph_optimize_options = 'graph_optimize_options' # internal use for now
cl_mem_type = 'cl_mem_type'
################################
# SystemType
################################
class SystemType:
host = 'host'
android = 'android'
arm_linux = 'arm_linux'
################################
# common device str
################################
PHONE_DATA_DIR = '/data/local/tmp/mace_run'
DEVICE_DATA_DIR = '/tmp/data/mace_run'
DEVICE_INTERIOR_DIR = PHONE_DATA_DIR + "/interior"
BUILD_OUTPUT_DIR = 'builds'
BUILD_TMP_DIR_NAME = '_tmp'
BUILD_DOWNLOADS_DIR = BUILD_OUTPUT_DIR + '/downloads'
BUILD_TMP_GENERAL_OUTPUT_DIR_NAME = 'general'
MODEL_OUTPUT_DIR_NAME = 'model'
EXAMPLE_STATIC_NAME = "example_static"
EXAMPLE_DYNAMIC_NAME = "example_dynamic"
EXAMPLE_STATIC_TARGET = "//mace/examples/cli:" + EXAMPLE_STATIC_NAME
EXAMPLE_DYNAMIC_TARGET = "//mace/examples/cli:" + EXAMPLE_DYNAMIC_NAME
MACE_RUN_STATIC_NAME = "mace_run_static"
MACE_RUN_DYNAMIC_NAME = "mace_run_dynamic"
MACE_RUN_STATIC_TARGET = "//mace/tools/validation:" + MACE_RUN_STATIC_NAME
MACE_RUN_DYNAMIC_TARGET = "//mace/tools/validation:" + MACE_RUN_DYNAMIC_NAME
CL_COMPILED_BINARY_FILE_NAME = "mace_cl_compiled_program.bin"
BUILD_TMP_OPENCL_BIN_DIR = 'opencl_bin'
LIBMACE_DYNAMIC_PATH = "bazel-bin/mace/libmace/libmace.so"
CL_TUNED_PARAMETER_FILE_NAME = "mace_run.config"
MODEL_HEADER_DIR_PATH = 'include/mace/public'
OUTPUT_LIBRARY_DIR_NAME = 'lib'
OUTPUT_OPENCL_BINARY_DIR_NAME = 'opencl'
OUTPUT_OPENCL_BINARY_FILE_NAME = 'compiled_opencl_kernel'
OUTPUT_OPENCL_PARAMETER_FILE_NAME = 'tuned_opencl_parameter'
CODEGEN_BASE_DIR = 'mace/codegen'
MODEL_CODEGEN_DIR = CODEGEN_BASE_DIR + '/models'
ENGINE_CODEGEN_DIR = CODEGEN_BASE_DIR + '/engine'
LIB_CODEGEN_DIR = CODEGEN_BASE_DIR + '/lib'
LIBMACE_SO_TARGET = "//mace/libmace:libmace.so"
LIBMACE_STATIC_TARGET = "//mace/libmace:libmace_static"
LIBMACE_STATIC_PATH = "bazel-genfiles/mace/libmace/libmace.a"
MODEL_LIB_TARGET = "//mace/codegen:generated_models"
MODEL_LIB_PATH = "bazel-genfiles/mace/codegen/libgenerated_models.a"
QUANTIZE_STAT_TARGET = "//mace/tools/quantization:quantize_stat"
BM_MODEL_STATIC_NAME = "benchmark_model_static"
BM_MODEL_DYNAMIC_NAME = "benchmark_model_dynamic"
BM_MODEL_STATIC_TARGET = "//mace/benchmark:" + BM_MODEL_STATIC_NAME
BM_MODEL_DYNAMIC_TARGET = "//mace/benchmark:" + BM_MODEL_DYNAMIC_NAME
ALL_SOC_TAG = 'all'
################################
# Model File Format
################################
class ModelFormat(object):
file = 'file'
code = 'code'
################################
# ABI Type
################################
class ABIType(object):
armeabi_v7a = 'armeabi-v7a'
arm64_v8a = 'arm64-v8a'
arm64 = 'arm64'
aarch64 = 'aarch64'
armhf = 'armhf'
host = 'host'
################################
# Module name
################################
class ModuleName(object):
YAML_CONFIG = 'YAML CONFIG'
MODEL_CONVERTER = 'Model Converter'
RUN = 'RUN'
BENCHMARK = 'Benchmark'
#################################
# mace lib type
#################################
class MACELibType(object):
static = 0
dynamic = 1
#################################
# Run time type
#################################
class RuntimeType(object):
cpu = 'cpu'
gpu = 'gpu'
dsp = 'dsp'
cpu_gpu = 'cpu+gpu'
#################################
# Tool chain Type
#################################
class ToolchainType:
android = 'android'
arm_linux_gnueabihf = 'arm_linux_gnueabihf'
aarch64_linux_gnu = 'aarch64_linux_gnu'
...@@ -18,7 +18,6 @@ import hashlib ...@@ -18,7 +18,6 @@ import hashlib
import os import os
import re import re
import sh import sh
import subprocess
import sys import sys
import urllib import urllib
import yaml import yaml
...@@ -27,14 +26,9 @@ from enum import Enum ...@@ -27,14 +26,9 @@ from enum import Enum
import six import six
import sh_commands import sh_commands
from sh_commands import BuildType
from sh_commands import ModelFormat
from common import CaffeEnvType from common import *
from common import DeviceType from device import DeviceWrapper, DeviceManager
from common import mace_check
from common import MaceLogger
from common import StringFormatter
################################ ################################
# set environment # set environment
...@@ -44,69 +38,20 @@ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' ...@@ -44,69 +38,20 @@ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
################################ ################################
# common definitions # common definitions
################################ ################################
BUILD_OUTPUT_DIR = 'builds'
BUILD_DOWNLOADS_DIR = BUILD_OUTPUT_DIR + '/downloads'
PHONE_DATA_DIR = "/data/local/tmp/mace_run"
MODEL_OUTPUT_DIR_NAME = 'model'
MODEL_HEADER_DIR_PATH = 'include/mace/public'
BUILD_TMP_DIR_NAME = '_tmp'
BUILD_TMP_GENERAL_OUTPUT_DIR_NAME = 'general'
OUTPUT_LIBRARY_DIR_NAME = 'lib'
OUTPUT_OPENCL_BINARY_DIR_NAME = 'opencl'
OUTPUT_OPENCL_BINARY_FILE_NAME = 'compiled_opencl_kernel'
OUTPUT_OPENCL_PARAMETER_FILE_NAME = 'tuned_opencl_parameter'
CL_COMPILED_BINARY_FILE_NAME = "mace_cl_compiled_program.bin"
CL_TUNED_PARAMETER_FILE_NAME = "mace_run.config"
CODEGEN_BASE_DIR = 'mace/codegen'
MODEL_CODEGEN_DIR = CODEGEN_BASE_DIR + '/models'
ENGINE_CODEGEN_DIR = CODEGEN_BASE_DIR + '/engine'
LIB_CODEGEN_DIR = CODEGEN_BASE_DIR + '/lib'
LIBMACE_SO_TARGET = "//mace/libmace:libmace.so"
LIBMACE_STATIC_TARGET = "//mace/libmace:libmace_static"
LIBMACE_STATIC_PATH = "bazel-genfiles/mace/libmace/libmace.a"
LIBMACE_DYNAMIC_PATH = "bazel-bin/mace/libmace/libmace.so"
MODEL_LIB_TARGET = "//mace/codegen:generated_models"
MODEL_LIB_PATH = "bazel-genfiles/mace/codegen/libgenerated_models.a"
MACE_RUN_STATIC_NAME = "mace_run_static"
MACE_RUN_DYNAMIC_NAME = "mace_run_dynamic"
MACE_RUN_STATIC_TARGET = "//mace/tools/validation:" + MACE_RUN_STATIC_NAME
MACE_RUN_DYNAMIC_TARGET = "//mace/tools/validation:" + MACE_RUN_DYNAMIC_NAME
EXAMPLE_STATIC_NAME = "example_static"
EXAMPLE_DYNAMIC_NAME = "example_dynamic"
EXAMPLE_STATIC_TARGET = "//mace/examples/cli:" + EXAMPLE_STATIC_NAME
EXAMPLE_DYNAMIC_TARGET = "//mace/examples/cli:" + EXAMPLE_DYNAMIC_NAME
BM_MODEL_STATIC_NAME = "benchmark_model_static"
BM_MODEL_DYNAMIC_NAME = "benchmark_model_dynamic"
BM_MODEL_STATIC_TARGET = "//mace/benchmark:" + BM_MODEL_STATIC_NAME
BM_MODEL_DYNAMIC_TARGET = "//mace/benchmark:" + BM_MODEL_DYNAMIC_NAME
DEVICE_INTERIOR_DIR = PHONE_DATA_DIR + "/interior"
BUILD_TMP_OPENCL_BIN_DIR = 'opencl_bin'
ALL_SOC_TAG = 'all'
ABITypeStrs = [ ABITypeStrs = [
'armeabi-v7a', 'armeabi-v7a',
'arm64-v8a', 'arm64-v8a',
'arm64',
'armhf',
'host', 'host',
] ]
class ABIType(object):
armeabi_v7a = 'armeabi-v7a'
arm64_v8a = 'arm64-v8a'
host = 'host'
ModelFormatStrs = [ ModelFormatStrs = [
"file", "file",
"code", "code",
] ]
class MACELibType(object):
static = 0
dynamic = 1
PlatformTypeStrs = [ PlatformTypeStrs = [
"tensorflow", "tensorflow",
"caffe", "caffe",
...@@ -121,14 +66,6 @@ RuntimeTypeStrs = [ ...@@ -121,14 +66,6 @@ RuntimeTypeStrs = [
"cpu+gpu" "cpu+gpu"
] ]
class RuntimeType(object):
cpu = 'cpu'
gpu = 'gpu'
dsp = 'dsp'
cpu_gpu = 'cpu+gpu'
InputDataTypeStrs = [ InputDataTypeStrs = [
"int32", "int32",
"float32", "float32",
...@@ -174,49 +111,11 @@ class DefaultValues(object): ...@@ -174,49 +111,11 @@ class DefaultValues(object):
gpu_priority_hint = 3, gpu_priority_hint = 3,
class YAMLKeyword(object): class ValidationThreshold(object):
library_name = 'library_name' cpu_threshold = 0.999,
target_abis = 'target_abis' gpu_threshold = 0.995,
target_socs = 'target_socs' hexagon_threshold = 0.930,
model_graph_format = 'model_graph_format' cpu_quantize_threshold = 0.980,
model_data_format = 'model_data_format'
models = 'models'
platform = 'platform'
model_file_path = 'model_file_path'
model_sha256_checksum = 'model_sha256_checksum'
weight_file_path = 'weight_file_path'
weight_sha256_checksum = 'weight_sha256_checksum'
subgraphs = 'subgraphs'
input_tensors = 'input_tensors'
input_shapes = 'input_shapes'
input_ranges = 'input_ranges'
output_tensors = 'output_tensors'
output_shapes = 'output_shapes'
check_tensors = 'check_tensors'
check_shapes = 'check_shapes'
runtime = 'runtime'
data_type = 'data_type'
input_data_types = 'input_data_types'
input_data_formats = 'input_data_formats'
output_data_formats = 'output_data_formats'
limit_opencl_kernel_time = 'limit_opencl_kernel_time'
nnlib_graph_mode = 'nnlib_graph_mode'
obfuscate = 'obfuscate'
winograd = 'winograd'
quantize = 'quantize'
quantize_range_file = 'quantize_range_file'
change_concat_ranges = 'change_concat_ranges'
validation_inputs_data = 'validation_inputs_data'
validation_threshold = 'validation_threshold'
graph_optimize_options = 'graph_optimize_options' # internal use for now
cl_mem_type = 'cl_mem_type'
class ModuleName(object):
YAML_CONFIG = 'YAML CONFIG'
MODEL_CONVERTER = 'Model Converter'
RUN = 'RUN'
BENCHMARK = 'Benchmark'
CPP_KEYWORDS = [ CPP_KEYWORDS = [
...@@ -260,7 +159,7 @@ def parse_device_type(runtime): ...@@ -260,7 +159,7 @@ def parse_device_type(runtime):
def get_hexagon_mode(configs): def get_hexagon_mode(configs):
runtime_list = [] runtime_list = []
for model_name in configs[YAMLKeyword.models]: for model_name in configs[YAMLKeyword.models]:
model_runtime =\ model_runtime = \
configs[YAMLKeyword.models][model_name].get( configs[YAMLKeyword.models][model_name].get(
YAMLKeyword.runtime, "") YAMLKeyword.runtime, "")
runtime_list.append(model_runtime.lower()) runtime_list.append(model_runtime.lower())
...@@ -273,7 +172,7 @@ def get_hexagon_mode(configs): ...@@ -273,7 +172,7 @@ def get_hexagon_mode(configs):
def get_opencl_mode(configs): def get_opencl_mode(configs):
runtime_list = [] runtime_list = []
for model_name in configs[YAMLKeyword.models]: for model_name in configs[YAMLKeyword.models]:
model_runtime =\ model_runtime = \
configs[YAMLKeyword.models][model_name].get( configs[YAMLKeyword.models][model_name].get(
YAMLKeyword.runtime, "") YAMLKeyword.runtime, "")
runtime_list.append(model_runtime.lower()) runtime_list.append(model_runtime.lower())
...@@ -331,7 +230,7 @@ def format_model_config(flags): ...@@ -331,7 +230,7 @@ def format_model_config(flags):
target_socs = configs.get(YAMLKeyword.target_socs, "") target_socs = configs.get(YAMLKeyword.target_socs, "")
if flags.target_socs: if flags.target_socs:
configs[YAMLKeyword.target_socs] = \ configs[YAMLKeyword.target_socs] = \
[soc.lower() for soc in flags.target_socs.split(',')] [soc.lower() for soc in flags.target_socs.split(',')]
elif not target_socs: elif not target_socs:
configs[YAMLKeyword.target_socs] = [] configs[YAMLKeyword.target_socs] = []
elif not isinstance(target_socs, list): elif not isinstance(target_socs, list):
...@@ -347,7 +246,9 @@ def format_model_config(flags): ...@@ -347,7 +246,9 @@ def format_model_config(flags):
if ALL_SOC_TAG in target_socs: if ALL_SOC_TAG in target_socs:
mace_check(available_socs, mace_check(available_socs,
ModuleName.YAML_CONFIG, ModuleName.YAML_CONFIG,
"Build for all SOCs plugged in computer, " "Android abi is listed in config file and "
"build for all SOCs plugged in computer, "
"But no android phone found, "
"you at least plug in one phone") "you at least plug in one phone")
else: else:
for soc in target_socs: for soc in target_socs:
...@@ -412,7 +313,7 @@ def format_model_config(flags): ...@@ -412,7 +313,7 @@ def format_model_config(flags):
weight_file_path = model_config.get(YAMLKeyword.weight_file_path, "") weight_file_path = model_config.get(YAMLKeyword.weight_file_path, "")
if weight_file_path: if weight_file_path:
weight_checksum =\ weight_checksum = \
model_config.get(YAMLKeyword.weight_sha256_checksum, "") model_config.get(YAMLKeyword.weight_sha256_checksum, "")
mace_check(weight_checksum != "", ModuleName.YAML_CONFIG, mace_check(weight_checksum != "", ModuleName.YAML_CONFIG,
"'%s' is necessary" % "'%s' is necessary" %
...@@ -538,14 +439,15 @@ def format_model_config(flags): ...@@ -538,14 +439,15 @@ def format_model_config(flags):
YAMLKeyword.validation_threshold, {}) YAMLKeyword.validation_threshold, {})
if not isinstance(validation_threshold, dict): if not isinstance(validation_threshold, dict):
raise argparse.ArgumentTypeError( raise argparse.ArgumentTypeError(
'similarity threshold must be a dict.') 'similarity threshold must be a dict.')
threshold_dict = { threshold_dict = {
DeviceType.CPU: 0.999, DeviceType.CPU: ValidationThreshold.cpu_threshold,
DeviceType.GPU: 0.995, DeviceType.GPU: ValidationThreshold.gpu_threshold,
DeviceType.HEXAGON: 0.930, DeviceType.HEXAGON: ValidationThreshold.hexagon_threshold,
DeviceType.CPU + "_QUANTIZE": 0.980, DeviceType.CPU + "_QUANTIZE":
} ValidationThreshold.cpu_quantize_threshold,
}
for k, v in six.iteritems(validation_threshold): for k, v in six.iteritems(validation_threshold):
if k.upper() == 'DSP': if k.upper() == 'DSP':
k = DeviceType.HEXAGON k = DeviceType.HEXAGON
...@@ -554,7 +456,7 @@ def format_model_config(flags): ...@@ -554,7 +456,7 @@ def format_model_config(flags):
DeviceType.HEXAGON, DeviceType.HEXAGON,
DeviceType.CPU + "_QUANTIZE"): DeviceType.CPU + "_QUANTIZE"):
raise argparse.ArgumentTypeError( raise argparse.ArgumentTypeError(
'Unsupported validation threshold runtime: %s' % k) 'Unsupported validation threshold runtime: %s' % k)
threshold_dict[k.upper()] = v threshold_dict[k.upper()] = v
subgraph[YAMLKeyword.validation_threshold] = threshold_dict subgraph[YAMLKeyword.validation_threshold] = threshold_dict
...@@ -573,7 +475,7 @@ def format_model_config(flags): ...@@ -573,7 +475,7 @@ def format_model_config(flags):
subgraph[YAMLKeyword.input_ranges] = [input_ranges] subgraph[YAMLKeyword.input_ranges] = [input_ranges]
else: else:
subgraph[YAMLKeyword.input_ranges] = input_ranges subgraph[YAMLKeyword.input_ranges] = input_ranges
subgraph[YAMLKeyword.input_ranges] =\ subgraph[YAMLKeyword.input_ranges] = \
[str(v) for v in subgraph[YAMLKeyword.input_ranges]] [str(v) for v in subgraph[YAMLKeyword.input_ranges]]
for key in [YAMLKeyword.limit_opencl_kernel_time, for key in [YAMLKeyword.limit_opencl_kernel_time,
...@@ -598,67 +500,6 @@ def format_model_config(flags): ...@@ -598,67 +500,6 @@ def format_model_config(flags):
return configs return configs
def get_build_binary_dir(library_name, target_abi):
return "%s/%s/%s/%s" % (
BUILD_OUTPUT_DIR, library_name, BUILD_TMP_DIR_NAME, target_abi)
def get_build_model_dirs(library_name, model_name, target_abi, target_soc,
serial_num, model_file_path):
model_path_digest = md5sum(model_file_path)
model_output_base_dir = "%s/%s/%s/%s/%s" % (
BUILD_OUTPUT_DIR, library_name, BUILD_TMP_DIR_NAME,
model_name, model_path_digest)
if target_abi == ABIType.host:
model_output_dir = "%s/%s" % (model_output_base_dir, target_abi)
elif not target_soc or not serial_num:
model_output_dir = "%s/%s/%s" % (
model_output_base_dir, BUILD_TMP_GENERAL_OUTPUT_DIR_NAME,
target_abi)
else:
device_name = \
sh_commands.adb_get_device_name_by_serialno(serial_num)
model_output_dir = "%s/%s_%s/%s" % (
model_output_base_dir, device_name,
target_soc, target_abi)
mace_model_dir = \
'%s/%s/%s' % (BUILD_OUTPUT_DIR, library_name, MODEL_OUTPUT_DIR_NAME)
return model_output_base_dir, model_output_dir, mace_model_dir
def get_opencl_binary_output_path(library_name, target_abi,
target_soc, serial_num):
device_name = \
sh_commands.adb_get_device_name_by_serialno(serial_num)
return '%s/%s/%s/%s/%s_%s.%s.%s.bin' % \
(BUILD_OUTPUT_DIR,
library_name,
OUTPUT_OPENCL_BINARY_DIR_NAME,
target_abi,
library_name,
OUTPUT_OPENCL_BINARY_FILE_NAME,
device_name,
target_soc)
def get_opencl_parameter_output_path(library_name, target_abi,
target_soc, serial_num):
device_name = \
sh_commands.adb_get_device_name_by_serialno(serial_num)
return '%s/%s/%s/%s/%s_%s.%s.%s.bin' % \
(BUILD_OUTPUT_DIR,
library_name,
OUTPUT_OPENCL_BINARY_DIR_NAME,
target_abi,
library_name,
OUTPUT_OPENCL_PARAMETER_FILE_NAME,
device_name,
target_soc)
def clear_build_dirs(library_name): def clear_build_dirs(library_name):
# make build dir # make build dir
if not os.path.exists(BUILD_OUTPUT_DIR): if not os.path.exists(BUILD_OUTPUT_DIR):
...@@ -676,27 +517,6 @@ def clear_build_dirs(library_name): ...@@ -676,27 +517,6 @@ def clear_build_dirs(library_name):
sh.rm('-rf', lib_output_dir) sh.rm('-rf', lib_output_dir)
def check_model_converted(library_name, model_name,
model_graph_format, model_data_format,
abi):
model_output_dir = \
'%s/%s/%s' % (BUILD_OUTPUT_DIR, library_name, MODEL_OUTPUT_DIR_NAME)
if model_graph_format == ModelFormat.file:
mace_check(os.path.exists("%s/%s.pb" % (model_output_dir, model_name)),
ModuleName.RUN,
"You should convert model first.")
else:
model_lib_path = get_model_lib_output_path(library_name, abi)
mace_check(os.path.exists(model_lib_path),
ModuleName.RUN,
"You should convert model first.")
if model_data_format == ModelFormat.file:
mace_check(os.path.exists("%s/%s.data" %
(model_output_dir, model_name)),
ModuleName.RUN,
"You should convert model first.")
################################ ################################
# convert # convert
################################ ################################
...@@ -883,13 +703,6 @@ def convert_model(configs, cl_mem_type): ...@@ -883,13 +703,6 @@ def convert_model(configs, cl_mem_type):
StringFormatter.block("Model %s converted" % model_name)) StringFormatter.block("Model %s converted" % model_name))
def get_model_lib_output_path(library_name, abi):
lib_output_path = os.path.join(BUILD_OUTPUT_DIR, library_name,
MODEL_OUTPUT_DIR_NAME, abi,
"%s.a" % library_name)
return lib_output_path
def build_model_lib(configs, address_sanitizer): def build_model_lib(configs, address_sanitizer):
MaceLogger.header(StringFormatter.block("Building model library")) MaceLogger.header(StringFormatter.block("Building model library"))
...@@ -902,10 +715,11 @@ def build_model_lib(configs, address_sanitizer): ...@@ -902,10 +715,11 @@ def build_model_lib(configs, address_sanitizer):
library_out_dir = os.path.dirname(model_lib_output_path) library_out_dir = os.path.dirname(model_lib_output_path)
if not os.path.exists(library_out_dir): if not os.path.exists(library_out_dir):
os.makedirs(library_out_dir) os.makedirs(library_out_dir)
toolchain = infer_toolchain(target_abi)
sh_commands.bazel_build( sh_commands.bazel_build(
MODEL_LIB_TARGET, MODEL_LIB_TARGET,
abi=target_abi, abi=target_abi,
toolchain=toolchain,
hexagon_mode=hexagon_mode, hexagon_mode=hexagon_mode,
enable_opencl=get_opencl_mode(configs), enable_opencl=get_opencl_mode(configs),
enable_quantize=get_quantize_mode(configs), enable_quantize=get_quantize_mode(configs),
...@@ -994,8 +808,8 @@ def report_run_statistics(stdout, ...@@ -994,8 +808,8 @@ def report_run_statistics(stdout,
f.write(data_str) f.write(data_str)
def build_mace_run(configs, target_abi, enable_openmp, address_sanitizer, def build_mace_run(configs, target_abi, toolchain, enable_openmp,
mace_lib_type): address_sanitizer, mace_lib_type):
library_name = configs[YAMLKeyword.library_name] library_name = configs[YAMLKeyword.library_name]
hexagon_mode = get_hexagon_mode(configs) hexagon_mode = get_hexagon_mode(configs)
...@@ -1019,6 +833,7 @@ def build_mace_run(configs, target_abi, enable_openmp, address_sanitizer, ...@@ -1019,6 +833,7 @@ def build_mace_run(configs, target_abi, enable_openmp, address_sanitizer,
sh_commands.bazel_build( sh_commands.bazel_build(
mace_run_target, mace_run_target,
abi=target_abi, abi=target_abi,
toolchain=toolchain,
hexagon_mode=hexagon_mode, hexagon_mode=hexagon_mode,
enable_openmp=enable_openmp, enable_openmp=enable_openmp,
enable_opencl=get_opencl_mode(configs), enable_opencl=get_opencl_mode(configs),
...@@ -1031,8 +846,8 @@ def build_mace_run(configs, target_abi, enable_openmp, address_sanitizer, ...@@ -1031,8 +846,8 @@ def build_mace_run(configs, target_abi, enable_openmp, address_sanitizer,
mace_lib_type == MACELibType.dynamic) mace_lib_type == MACELibType.dynamic)
def build_example(configs, target_abi, enable_openmp, address_sanitizer, def build_example(configs, target_abi, toolchain,
mace_lib_type): enable_openmp, mace_lib_type):
library_name = configs[YAMLKeyword.library_name] library_name = configs[YAMLKeyword.library_name]
hexagon_mode = get_hexagon_mode(configs) hexagon_mode = get_hexagon_mode(configs)
...@@ -1042,6 +857,7 @@ def build_example(configs, target_abi, enable_openmp, address_sanitizer, ...@@ -1042,6 +857,7 @@ def build_example(configs, target_abi, enable_openmp, address_sanitizer,
os.makedirs(build_tmp_binary_dir) os.makedirs(build_tmp_binary_dir)
symbol_hidden = True symbol_hidden = True
libmace_target = LIBMACE_STATIC_TARGET libmace_target = LIBMACE_STATIC_TARGET
if mace_lib_type == MACELibType.dynamic: if mace_lib_type == MACELibType.dynamic:
symbol_hidden = False symbol_hidden = False
...@@ -1049,11 +865,12 @@ def build_example(configs, target_abi, enable_openmp, address_sanitizer, ...@@ -1049,11 +865,12 @@ def build_example(configs, target_abi, enable_openmp, address_sanitizer,
sh_commands.bazel_build(libmace_target, sh_commands.bazel_build(libmace_target,
abi=target_abi, abi=target_abi,
toolchain=toolchain,
enable_openmp=enable_openmp, enable_openmp=enable_openmp,
enable_opencl=get_opencl_mode(configs), enable_opencl=get_opencl_mode(configs),
enable_quantize=get_quantize_mode(configs), enable_quantize=get_quantize_mode(configs),
hexagon_mode=hexagon_mode, hexagon_mode=hexagon_mode,
address_sanitizer=address_sanitizer, address_sanitizer=flags.address_sanitizer,
symbol_hidden=symbol_hidden) symbol_hidden=symbol_hidden)
if os.path.exists(LIB_CODEGEN_DIR): if os.path.exists(LIB_CODEGEN_DIR):
...@@ -1079,11 +896,12 @@ def build_example(configs, target_abi, enable_openmp, address_sanitizer, ...@@ -1079,11 +896,12 @@ def build_example(configs, target_abi, enable_openmp, address_sanitizer,
sh_commands.bazel_build(example_target, sh_commands.bazel_build(example_target,
abi=target_abi, abi=target_abi,
toolchain=toolchain,
enable_openmp=enable_openmp, enable_openmp=enable_openmp,
enable_opencl=get_opencl_mode(configs), enable_opencl=get_opencl_mode(configs),
enable_quantize=get_quantize_mode(configs), enable_quantize=get_quantize_mode(configs),
hexagon_mode=hexagon_mode, hexagon_mode=hexagon_mode,
address_sanitizer=address_sanitizer, address_sanitizer=flags.address_sanitizer,
extra_args=build_arg) extra_args=build_arg)
target_bin = "/".join(sh_commands.bazel_target_to_bin(example_target)) target_bin = "/".join(sh_commands.bazel_target_to_bin(example_target))
...@@ -1092,296 +910,6 @@ def build_example(configs, target_abi, enable_openmp, address_sanitizer, ...@@ -1092,296 +910,6 @@ def build_example(configs, target_abi, enable_openmp, address_sanitizer,
sh.rm("-rf", LIB_CODEGEN_DIR) sh.rm("-rf", LIB_CODEGEN_DIR)
def tuning(library_name, model_name, model_config,
model_graph_format, model_data_format,
target_abi, target_soc, serial_num,
mace_lib_type):
six.print_('* Tuning, it may take some time...')
build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi)
mace_run_name = MACE_RUN_STATIC_NAME
link_dynamic = False
if mace_lib_type == MACELibType.dynamic:
mace_run_name = MACE_RUN_DYNAMIC_NAME
link_dynamic = True
embed_model_data = model_data_format == ModelFormat.code
model_output_base_dir, model_output_dir, mace_model_dir = \
get_build_model_dirs(library_name, model_name, target_abi,
target_soc, serial_num,
model_config[YAMLKeyword.model_file_path])
# build for specified soc
sh_commands.clear_phone_data_dir(serial_num, PHONE_DATA_DIR)
subgraphs = model_config[YAMLKeyword.subgraphs]
# generate input data
sh_commands.gen_random_input(
model_output_dir,
subgraphs[0][YAMLKeyword.input_tensors],
subgraphs[0][YAMLKeyword.input_shapes],
subgraphs[0][YAMLKeyword.validation_inputs_data],
input_ranges=subgraphs[0][YAMLKeyword.input_ranges],
input_data_types=subgraphs[0][YAMLKeyword.input_data_types])
sh_commands.tuning_run(
abi=target_abi,
serialno=serial_num,
target_dir=build_tmp_binary_dir,
target_name=mace_run_name,
vlog_level=0,
embed_model_data=embed_model_data,
model_output_dir=model_output_dir,
input_nodes=subgraphs[0][YAMLKeyword.input_tensors],
output_nodes=subgraphs[0][YAMLKeyword.output_tensors],
input_shapes=subgraphs[0][YAMLKeyword.input_shapes],
output_shapes=subgraphs[0][YAMLKeyword.output_shapes],
mace_model_dir=mace_model_dir,
model_tag=model_name,
device_type=DeviceType.GPU,
running_round=0,
restart_round=1,
limit_opencl_kernel_time=model_config[YAMLKeyword.limit_opencl_kernel_time], # noqa
tuning=True,
out_of_range_check=False,
phone_data_dir=PHONE_DATA_DIR,
model_graph_format=model_graph_format,
opencl_binary_file="",
opencl_parameter_file="",
libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH,
link_dynamic=link_dynamic,
)
# pull opencl binary
sh_commands.pull_file_from_device(
serial_num,
DEVICE_INTERIOR_DIR,
CL_COMPILED_BINARY_FILE_NAME,
"%s/%s" % (model_output_dir, BUILD_TMP_OPENCL_BIN_DIR))
# pull opencl parameter
sh_commands.pull_file_from_device(
serial_num,
PHONE_DATA_DIR,
CL_TUNED_PARAMETER_FILE_NAME,
"%s/%s" % (model_output_dir, BUILD_TMP_OPENCL_BIN_DIR))
six.print_('Tuning done\n')
def run_specific_target(flags, configs, target_abi,
target_soc, serial_num):
library_name = configs[YAMLKeyword.library_name]
mace_lib_type = flags.mace_lib_type
embed_model_data = \
configs[YAMLKeyword.model_data_format] == ModelFormat.code
build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi)
# get target name for run
if flags.example:
if mace_lib_type == MACELibType.static:
target_name = EXAMPLE_STATIC_NAME
else:
target_name = EXAMPLE_DYNAMIC_NAME
else:
if mace_lib_type == MACELibType.static:
target_name = MACE_RUN_STATIC_NAME
else:
target_name = MACE_RUN_DYNAMIC_NAME
link_dynamic = mace_lib_type == MACELibType.dynamic
model_output_dirs = []
for model_name in configs[YAMLKeyword.models]:
check_model_converted(library_name, model_name,
configs[YAMLKeyword.model_graph_format],
configs[YAMLKeyword.model_data_format],
target_abi)
if target_abi == ABIType.host:
device_name = ABIType.host
else:
device_name = \
sh_commands.adb_get_device_name_by_serialno(serial_num)
sh_commands.clear_phone_data_dir(serial_num, PHONE_DATA_DIR)
MaceLogger.header(
StringFormatter.block(
"Run model %s on %s" % (model_name, device_name)))
model_config = configs[YAMLKeyword.models][model_name]
model_runtime = model_config[YAMLKeyword.runtime]
subgraphs = model_config[YAMLKeyword.subgraphs]
if not configs[YAMLKeyword.target_socs] or target_abi == ABIType.host:
model_output_base_dir, model_output_dir, mace_model_dir = \
get_build_model_dirs(library_name, model_name, target_abi,
None, None,
model_config[YAMLKeyword.model_file_path])
else:
model_output_base_dir, model_output_dir, mace_model_dir = \
get_build_model_dirs(library_name, model_name, target_abi,
target_soc, serial_num,
model_config[YAMLKeyword.model_file_path])
# clear temp model output dir
if os.path.exists(model_output_dir):
sh.rm("-rf", model_output_dir)
os.makedirs(model_output_dir)
is_tuned = False
model_opencl_output_bin_path = ""
model_opencl_parameter_path = ""
# tuning for specified soc
if not flags.address_sanitizer \
and not flags.example \
and target_abi != ABIType.host \
and configs[YAMLKeyword.target_socs] \
and target_soc \
and model_runtime in [RuntimeType.gpu, RuntimeType.cpu_gpu] \
and not flags.disable_tuning:
tuning(library_name, model_name, model_config,
configs[YAMLKeyword.model_graph_format],
configs[YAMLKeyword.model_data_format],
target_abi, target_soc, serial_num,
mace_lib_type)
model_output_dirs.append(model_output_dir)
model_opencl_output_bin_path =\
"%s/%s/%s" % (model_output_dir,
BUILD_TMP_OPENCL_BIN_DIR,
CL_COMPILED_BINARY_FILE_NAME)
model_opencl_parameter_path = \
"%s/%s/%s" % (model_output_dir,
BUILD_TMP_OPENCL_BIN_DIR,
CL_TUNED_PARAMETER_FILE_NAME)
sh_commands.clear_phone_data_dir(serial_num, PHONE_DATA_DIR)
is_tuned = True
elif target_abi != ABIType.host and target_soc:
model_opencl_output_bin_path = get_opencl_binary_output_path(
library_name, target_abi, target_soc, serial_num
)
model_opencl_parameter_path = get_opencl_parameter_output_path(
library_name, target_abi, target_soc, serial_num
)
# generate input data
sh_commands.gen_random_input(
model_output_dir,
subgraphs[0][YAMLKeyword.input_tensors],
subgraphs[0][YAMLKeyword.input_shapes],
subgraphs[0][YAMLKeyword.validation_inputs_data],
input_ranges=subgraphs[0][YAMLKeyword.input_ranges],
input_data_types=subgraphs[0][YAMLKeyword.input_data_types])
runtime_list = []
if target_abi == ABIType.host:
runtime_list.extend([RuntimeType.cpu])
elif model_runtime == RuntimeType.cpu_gpu:
runtime_list.extend([RuntimeType.cpu, RuntimeType.gpu])
else:
runtime_list.extend([model_runtime])
for runtime in runtime_list:
device_type = parse_device_type(runtime)
# run for specified soc
if not subgraphs[0][YAMLKeyword.check_tensors]:
output_nodes = subgraphs[0][YAMLKeyword.output_tensors]
output_shapes = subgraphs[0][YAMLKeyword.output_shapes]
else:
output_nodes = subgraphs[0][YAMLKeyword.check_tensors]
output_shapes = subgraphs[0][YAMLKeyword.check_shapes]
run_output = sh_commands.tuning_run(
abi=target_abi,
serialno=serial_num,
target_dir=build_tmp_binary_dir,
target_name=target_name,
vlog_level=flags.vlog_level,
embed_model_data=embed_model_data,
model_output_dir=model_output_dir,
input_nodes=subgraphs[0][YAMLKeyword.input_tensors],
output_nodes=output_nodes,
input_shapes=subgraphs[0][YAMLKeyword.input_shapes],
output_shapes=output_shapes,
mace_model_dir=mace_model_dir,
model_tag=model_name,
device_type=device_type,
running_round=flags.round,
restart_round=flags.restart_round,
limit_opencl_kernel_time=model_config[YAMLKeyword.limit_opencl_kernel_time], # noqa
tuning=False,
out_of_range_check=flags.gpu_out_of_range_check,
phone_data_dir=PHONE_DATA_DIR,
model_graph_format=configs[YAMLKeyword.model_graph_format],
omp_num_threads=flags.omp_num_threads,
cpu_affinity_policy=flags.cpu_affinity_policy,
gpu_perf_hint=flags.gpu_perf_hint,
gpu_priority_hint=flags.gpu_priority_hint,
input_dir=flags.input_dir,
output_dir=flags.output_dir,
runtime_failure_ratio=flags.runtime_failure_ratio,
address_sanitizer=flags.address_sanitizer,
opencl_binary_file=model_opencl_output_bin_path,
opencl_parameter_file=model_opencl_parameter_path,
libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH,
link_dynamic=link_dynamic,
quantize_stat=flags.quantize_stat,
)
if flags.validate:
model_file_path, weight_file_path = get_model_files(
model_config[YAMLKeyword.model_file_path],
model_config[YAMLKeyword.model_sha256_checksum],
BUILD_DOWNLOADS_DIR,
model_config[YAMLKeyword.weight_file_path],
model_config[YAMLKeyword.weight_sha256_checksum])
validate_type = device_type
if model_config[YAMLKeyword.quantize] == 1 \
and device_type == DeviceType.CPU:
validate_type = device_type + "_QUANTIZE"
sh_commands.validate_model(
abi=target_abi,
serialno=serial_num,
model_file_path=model_file_path,
weight_file_path=weight_file_path,
platform=model_config[YAMLKeyword.platform],
device_type=device_type,
input_nodes=subgraphs[0][YAMLKeyword.input_tensors],
output_nodes=output_nodes,
input_shapes=subgraphs[0][YAMLKeyword.input_shapes],
output_shapes=output_shapes,
model_output_dir=model_output_dir,
phone_data_dir=PHONE_DATA_DIR,
input_data_types=subgraphs[0][YAMLKeyword.input_data_types], # noqa
caffe_env=flags.caffe_env,
validation_threshold=subgraphs[0][YAMLKeyword.validation_threshold][validate_type]) # noqa
if flags.report and flags.round > 0:
tuned = is_tuned and device_type == DeviceType.GPU
report_run_statistics(
run_output, target_abi, serial_num,
model_name, device_type, flags.report_dir,
tuned)
if model_output_dirs:
opencl_output_bin_path = get_opencl_binary_output_path(
library_name, target_abi, target_soc, serial_num
)
opencl_parameter_bin_path = get_opencl_parameter_output_path(
library_name, target_abi, target_soc, serial_num
)
# clear opencl output dir
if os.path.exists(opencl_output_bin_path):
sh.rm('-rf', opencl_output_bin_path)
if os.path.exists(opencl_parameter_bin_path):
sh.rm('-rf', opencl_parameter_bin_path)
# merge all models' OpenCL binaries together
sh_commands.merge_opencl_binaries(
model_output_dirs, CL_COMPILED_BINARY_FILE_NAME,
opencl_output_bin_path)
# merge all models' OpenCL parameters together
sh_commands.merge_opencl_parameters(
model_output_dirs, CL_TUNED_PARAMETER_FILE_NAME,
opencl_parameter_bin_path)
def print_package_summary(package_path): def print_package_summary(package_path):
title = "Library" title = "Library"
header = ["key", "value"] header = ["key", "value"]
...@@ -1398,36 +926,37 @@ def run_mace(flags): ...@@ -1398,36 +926,37 @@ def run_mace(flags):
clear_build_dirs(configs[YAMLKeyword.library_name]) clear_build_dirs(configs[YAMLKeyword.library_name])
target_socs = configs[YAMLKeyword.target_socs] target_socs = configs[YAMLKeyword.target_socs]
if not target_socs or ALL_SOC_TAG in target_socs: device_list = DeviceManager.list_devices(flags.device_yml)
target_socs = sh_commands.adb_get_all_socs() if target_socs and ALL_SOC_TAG not in target_socs:
device_list = [dev for dev in device_list
if dev[YAMLKeyword.target_socs].lower() in target_socs]
for target_abi in configs[YAMLKeyword.target_abis]: for target_abi in configs[YAMLKeyword.target_abis]:
# build target # build target
if flags.example: for dev in device_list:
build_example(configs, target_abi, if target_abi in dev[YAMLKeyword.target_abis]:
not flags.disable_openmp, # get toolchain
flags.address_sanitizer, toolchain = infer_toolchain(target_abi)
flags.mace_lib_type) if flags.example:
else: build_example(configs,
build_mace_run(configs, target_abi, target_abi,
not flags.disable_openmp, toolchain,
flags.address_sanitizer, not flags.disable_openmp,
flags.mace_lib_type) flags.mace_lib_type)
else:
# run build_mace_run(configs,
if target_abi == ABIType.host: target_abi,
run_specific_target(flags, configs, target_abi, None, None) toolchain,
else: not flags.disable_openmp,
for target_soc in target_socs: flags.address_sanitizer,
serial_nums = \ flags.mace_lib_type)
sh_commands.get_target_socs_serialnos([target_soc]) # run
mace_check(serial_nums, device = DeviceWrapper(dev)
ModuleName.RUN, with device.lock():
'There is no device with soc: ' + target_soc) device.run_specify_abi(flags, configs, target_abi)
for serial_num in serial_nums: elif dev[YAMLKeyword.device_name] != SystemType.host:
with sh_commands.device_lock(serial_num): six.print_('The device with soc %s do not support abi %s' %
run_specific_target(flags, configs, target_abi, (dev[YAMLKeyword.target_socs], target_abi),
target_soc, serial_num) file=sys.stderr)
# package the output files # package the output files
package_path = sh_commands.packaging_lib(BUILD_OUTPUT_DIR, package_path = sh_commands.packaging_lib(BUILD_OUTPUT_DIR,
...@@ -1438,7 +967,11 @@ def run_mace(flags): ...@@ -1438,7 +967,11 @@ def run_mace(flags):
################################ ################################
# benchmark model # benchmark model
################################ ################################
def build_benchmark_model(configs, target_abi, enable_openmp, mace_lib_type): def build_benchmark_model(configs,
target_abi,
toolchain,
enable_openmp,
mace_lib_type):
library_name = configs[YAMLKeyword.library_name] library_name = configs[YAMLKeyword.library_name]
hexagon_mode = get_hexagon_mode(configs) hexagon_mode = get_hexagon_mode(configs)
...@@ -1459,6 +992,7 @@ def build_benchmark_model(configs, target_abi, enable_openmp, mace_lib_type): ...@@ -1459,6 +992,7 @@ def build_benchmark_model(configs, target_abi, enable_openmp, mace_lib_type):
sh_commands.bazel_build(benchmark_target, sh_commands.bazel_build(benchmark_target,
abi=target_abi, abi=target_abi,
toolchain=toolchain,
enable_openmp=enable_openmp, enable_openmp=enable_openmp,
enable_opencl=get_opencl_mode(configs), enable_opencl=get_opencl_mode(configs),
enable_quantize=get_quantize_mode(configs), enable_quantize=get_quantize_mode(configs),
...@@ -1475,133 +1009,34 @@ def build_benchmark_model(configs, target_abi, enable_openmp, mace_lib_type): ...@@ -1475,133 +1009,34 @@ def build_benchmark_model(configs, target_abi, enable_openmp, mace_lib_type):
sh.cp("-f", target_bin, build_tmp_binary_dir) sh.cp("-f", target_bin, build_tmp_binary_dir)
def bm_specific_target(flags, configs, target_abi, target_soc, serial_num):
library_name = configs[YAMLKeyword.library_name]
embed_model_data = \
configs[YAMLKeyword.model_data_format] == ModelFormat.code
opencl_output_bin_path = ""
opencl_parameter_path = ""
link_dynamic = flags.mace_lib_type == MACELibType.dynamic
if link_dynamic:
bm_model_binary_name = BM_MODEL_DYNAMIC_NAME
else:
bm_model_binary_name = BM_MODEL_STATIC_NAME
build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi)
if configs[YAMLKeyword.target_socs] and target_abi != ABIType.host:
opencl_output_bin_path = get_opencl_binary_output_path(
library_name, target_abi, target_soc, serial_num
)
opencl_parameter_path = get_opencl_parameter_output_path(
library_name, target_abi, target_soc, serial_num
)
for model_name in configs[YAMLKeyword.models]:
check_model_converted(library_name, model_name,
configs[YAMLKeyword.model_graph_format],
configs[YAMLKeyword.model_data_format],
target_abi)
if target_abi == ABIType.host:
device_name = ABIType.host
else:
device_name = \
sh_commands.adb_get_device_name_by_serialno(serial_num)
MaceLogger.header(
StringFormatter.block(
"Benchmark model %s on %s" % (model_name, device_name)))
model_config = configs[YAMLKeyword.models][model_name]
model_runtime = model_config[YAMLKeyword.runtime]
subgraphs = model_config[YAMLKeyword.subgraphs]
if not configs[YAMLKeyword.target_socs] or target_abi == ABIType.host:
model_output_base_dir, model_output_dir, mace_model_dir = \
get_build_model_dirs(library_name, model_name, target_abi,
None, None,
model_config[YAMLKeyword.model_file_path])
else:
model_output_base_dir, model_output_dir, mace_model_dir = \
get_build_model_dirs(library_name, model_name, target_abi,
target_soc, serial_num,
model_config[YAMLKeyword.model_file_path])
if os.path.exists(model_output_dir):
sh.rm("-rf", model_output_dir)
os.makedirs(model_output_dir)
if target_abi != ABIType.host:
sh_commands.clear_phone_data_dir(serial_num, PHONE_DATA_DIR)
sh_commands.gen_random_input(
model_output_dir,
subgraphs[0][YAMLKeyword.input_tensors],
subgraphs[0][YAMLKeyword.input_shapes],
subgraphs[0][YAMLKeyword.validation_inputs_data],
input_ranges=subgraphs[0][YAMLKeyword.input_ranges],
input_data_types=subgraphs[0][YAMLKeyword.input_data_types])
runtime_list = []
if target_abi == ABIType.host:
runtime_list.extend([RuntimeType.cpu])
elif model_runtime == RuntimeType.cpu_gpu:
runtime_list.extend([RuntimeType.cpu, RuntimeType.gpu])
else:
runtime_list.extend([model_runtime])
for runtime in runtime_list:
device_type = parse_device_type(runtime)
sh_commands.benchmark_model(
abi=target_abi,
serialno=serial_num,
benchmark_binary_dir=build_tmp_binary_dir,
benchmark_binary_name=bm_model_binary_name,
vlog_level=0,
embed_model_data=embed_model_data,
model_output_dir=model_output_dir,
input_nodes=subgraphs[0][YAMLKeyword.input_tensors],
output_nodes=subgraphs[0][YAMLKeyword.output_tensors],
input_shapes=subgraphs[0][YAMLKeyword.input_shapes],
output_shapes=subgraphs[0][YAMLKeyword.output_shapes],
mace_model_dir=mace_model_dir,
model_tag=model_name,
device_type=device_type,
phone_data_dir=PHONE_DATA_DIR,
model_graph_format=configs[YAMLKeyword.model_graph_format],
omp_num_threads=flags.omp_num_threads,
cpu_affinity_policy=flags.cpu_affinity_policy,
gpu_perf_hint=flags.gpu_perf_hint,
gpu_priority_hint=flags.gpu_priority_hint,
opencl_binary_file=opencl_output_bin_path,
opencl_parameter_file=opencl_parameter_path,
libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH,
link_dynamic=link_dynamic)
def benchmark_model(flags): def benchmark_model(flags):
configs = format_model_config(flags) configs = format_model_config(flags)
clear_build_dirs(configs[YAMLKeyword.library_name]) clear_build_dirs(configs[YAMLKeyword.library_name])
target_socs = configs[YAMLKeyword.target_socs] target_socs = configs[YAMLKeyword.target_socs]
if not target_socs or ALL_SOC_TAG in target_socs: device_list = DeviceManager.list_devices(flags.device_yml)
target_socs = sh_commands.adb_get_all_socs() if target_socs and ALL_SOC_TAG not in target_socs:
device_list = [dev for dev in device_list
if dev[YAMLKeyword.target_socs].lower() in target_socs]
for target_abi in configs[YAMLKeyword.target_abis]: for target_abi in configs[YAMLKeyword.target_abis]:
# build benchmark_model binary # build benchmark_model binary
build_benchmark_model(configs, target_abi, for dev in device_list:
not flags.disable_openmp, if target_abi in dev[YAMLKeyword.target_abis]:
flags.mace_lib_type) toolchain = infer_toolchain(target_abi)
build_benchmark_model(configs,
if target_abi == ABIType.host: target_abi,
bm_specific_target(flags, configs, target_abi, None, None) toolchain,
else: not flags.disable_openmp,
for target_soc in target_socs: flags.mace_lib_type)
serial_nums = \ device = DeviceWrapper(dev)
sh_commands.get_target_socs_serialnos([target_soc]) with device.lock():
mace_check(serial_nums, device.bm_specific_target(flags, configs, target_abi)
ModuleName.BENCHMARK, else:
'There is no device with soc: ' + target_soc) six.print_('There is no abi %s with soc %s' %
for serial_num in serial_nums: (target_abi, dev[YAMLKeyword.target_socs]),
with sh_commands.device_lock(serial_num): file=sys.stderr)
bm_specific_target(flags, configs, target_abi,
target_soc, serial_num)
################################ ################################
...@@ -1698,7 +1133,12 @@ def parse_args(): ...@@ -1698,7 +1133,12 @@ def parse_args():
type=int, type=int,
default=DefaultValues.gpu_priority_hint, default=DefaultValues.gpu_priority_hint,
help="0:DEFAULT/1:LOW/2:NORMAL/3:HIGH") help="0:DEFAULT/1:LOW/2:NORMAL/3:HIGH")
run_bm_parent_parser.add_argument(
"--device_yml",
type=str,
default='',
help='embedded linux device config yml file'
)
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
subparsers = parser.add_subparsers() subparsers = parser.add_subparsers()
convert = subparsers.add_parser( convert = subparsers.add_parser(
......
# Copyright 2018 Xiaomi, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import socket
import subprocess
import time
import six
import sh
import yaml
import common
from common import *
import sh_commands
class DeviceWrapper:
allow_scheme = ('ssh', 'adb')
def __init__(self, device_dict):
"""
init device with device dict info
:type device_dict: Device
:param device_dict: a key-value dict that holds the device information,
which attribute has:
device_name, target_abis, target_socs, system,
address, username
"""
diff = set(device_dict.keys()) - set(YAMLKeyword.__dict__.keys())
if len(diff) > 0:
six.print_('Wrong key detected: ')
six.print_(diff)
raise KeyError(str(diff))
self.__dict__.update(device_dict)
if self.system == SystemType.android:
self.data_dir = PHONE_DATA_DIR
self.interior_dir = self.data_dir + '/interior'
elif self.system == SystemType.arm_linux:
try:
sh.ssh('-q', '{}@{}'.format(self.username, self.address),
'exit')
except sh.ErrorReturnCode as e:
six.print_('device connect failed, '
'please check your authentication')
raise e
self.data_dir = DEVICE_DATA_DIR
self.interior_dir = self.data_dir + '/interior'
##################
# internal use #
##################
def exec_command(self, command, *args, **kwargs):
if self.system == SystemType.android:
sh.adb('-s', self.address, 'shell', command, *args, **kwargs)
elif self.system == SystemType.arm_linux:
sh.ssh('{}@{}'.format(self.username, self.address),
command, *args, **kwargs)
#####################
# public interface #
#####################
def is_lock(self):
return sh_commands.is_device_locked(self.address)
def lock(self):
return sh_commands.device_lock(self.address)
def clear_data_dir(self):
if self.system == SystemType.android:
sh_commands.clear_phone_data_dir(self.address, PHONE_DATA_DIR)
elif self.system == SystemType.arm_linux:
self.exec_command('rm -rf {}'.format(self.data_dir))
def pull_from_data_dir(self, filename, dst_path):
if self.system == SystemType.android:
self.pull(PHONE_DATA_DIR, filename, dst_path)
elif self.system == SystemType.arm_linux:
self.pull(DEVICE_DATA_DIR, filename, dst_path)
def create_internal_storage_dir(self):
internal_storage_dir = '{}/interior/'.format(self.data_dir)
if self.system == SystemType.android:
sh_commands.create_internal_storage_dir(self.address,
internal_storage_dir)
elif self.system == SystemType.arm_linux:
self.exec_command('mkdir -p {}'.format(internal_storage_dir))
return internal_storage_dir
def rm(self, file):
if self.system == SystemType.android:
sh.adb('-s', self.address, 'shell', 'rm', '-rf', file, _fg=True)
elif self.system == SystemType.arm_linux:
self.exec_command('rm -rf {}'.format(file), _fg=True)
def push(self, src_path, dst_path):
mace_check(os.path.exists(src_path), "Device",
'{} not found'.format(src_path))
six.print_("Push %s to %s" % (src_path, dst_path))
if self.system == SystemType.android:
sh_commands.adb_push(src_path, dst_path, self.address)
elif self.system == SystemType.arm_linux:
try:
sh.scp(src_path, '{}@{}:{}'.format(self.username,
self.address,
dst_path))
except sh.ErrorReturnCode_1 as e:
six.print_('Push Failed !', e, file=sys.stderr)
raise e
def pull(self, src_path, file_name, dst_path='.'):
if not os.path.exists(dst_path):
sh.mkdir("-p", dst_path)
src_file = "%s/%s" % (src_path, file_name)
dst_file = "%s/%s" % (dst_path, file_name)
if os.path.exists(dst_file):
sh.rm('-f', dst_file)
six.print_("Pull %s to %s" % (src_path, dst_path))
if self.system == SystemType.android:
sh_commands.adb_pull(
src_file, dst_file, self.address)
elif self.system == SystemType.arm_linux:
try:
sh.scp('-r', '%s@%s:%s' % (self.username,
self.address,
src_file),
dst_file)
except sh.ErrorReturnCode_1 as e:
six.print_("Pull Failed !", file=sys.stderr)
raise e
def tuning_run(self,
abi,
target_dir,
target_name,
vlog_level,
embed_model_data,
model_output_dir,
input_nodes,
output_nodes,
input_shapes,
output_shapes,
mace_model_dir,
model_tag,
device_type,
running_round,
restart_round,
limit_opencl_kernel_time,
tuning,
out_of_range_check,
model_graph_format,
opencl_binary_file,
opencl_parameter_file,
libmace_dynamic_library_path,
omp_num_threads=-1,
cpu_affinity_policy=1,
gpu_perf_hint=3,
gpu_priority_hint=3,
input_file_name='model_input',
output_file_name='model_out',
runtime_failure_ratio=0.0,
address_sanitizer=False,
link_dynamic=False
):
six.print_("* Run '%s' with round=%s, restart_round=%s, tuning=%s, "
"out_of_range_check=%s, omp_num_threads=%s, "
"cpu_affinity_policy=%s, gpu_perf_hint=%s, "
"gpu_priority_hint=%s" %
(model_tag, running_round, restart_round, str(tuning),
str(out_of_range_check), omp_num_threads,
cpu_affinity_policy, gpu_perf_hint, gpu_priority_hint))
mace_model_path = ""
if model_graph_format == ModelFormat.file:
mace_model_path = "%s/%s.pb" % (mace_model_dir, model_tag)
if self.system == SystemType.host:
libmace_dynamic_lib_path = \
os.path.dirname(libmace_dynamic_library_path)
p = subprocess.Popen(
[
"env",
"LD_LIBRARY_PATH=%s" % libmace_dynamic_lib_path,
"MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
"MACE_RUNTIME_FAILURE_RATIO=%f" % runtime_failure_ratio,
"%s/%s" % (target_dir, target_name),
"--model_name=%s" % model_tag,
"--input_node=%s" % ",".join(input_nodes),
"--output_node=%s" % ",".join(output_nodes),
"--input_shape=%s" % ":".join(input_shapes),
"--output_shape=%s" % ":".join(output_shapes),
"--input_file=%s/%s" % (model_output_dir,
input_file_name),
"--output_file=%s/%s" % (model_output_dir,
output_file_name),
"--model_data_file=%s/%s.data" % (mace_model_dir,
model_tag),
"--device=%s" % device_type,
"--round=%s" % running_round,
"--restart_round=%s" % restart_round,
"--omp_num_threads=%s" % omp_num_threads,
"--cpu_affinity_policy=%s" % cpu_affinity_policy,
"--gpu_perf_hint=%s" % gpu_perf_hint,
"--gpu_priority_hint=%s" % gpu_priority_hint,
"--model_file=%s" % mace_model_path,
],
stderr=subprocess.PIPE,
stdout=subprocess.PIPE)
out, err = p.communicate()
self.stdout = err + out
six.print_(self.stdout)
six.print_("Running finished!\n")
elif self.system in [SystemType.android, SystemType.arm_linux]:
self.rm(self.data_dir)
self.exec_command('mkdir -p {}'.format(self.data_dir))
internal_storage_dir = self.create_internal_storage_dir()
for input_name in input_nodes:
formatted_name = common.formatted_file_name(input_file_name,
input_name)
self.push("%s/%s" % (model_output_dir, formatted_name),
self.data_dir)
if self.system == SystemType.android and address_sanitizer:
self.push(sh_commands.find_asan_rt_library(abi),
self.data_dir)
if not embed_model_data:
model_data_path = "%s/%s.data" % (mace_model_dir, model_tag)
mace_check(os.path.exists(model_data_path), "Device",
'model data file not found,'
' please convert model first')
self.push(model_data_path, self.data_dir)
if device_type == common.DeviceType.GPU:
if os.path.exists(opencl_binary_file):
self.push(opencl_binary_file, self.data_dir)
if os.path.exists(opencl_parameter_file):
self.push(opencl_parameter_file, self.data_dir)
self.push("third_party/nnlib/libhexagon_controller.so",
self.data_dir)
mace_model_phone_path = ""
if model_graph_format == ModelFormat.file:
mace_model_phone_path = "%s/%s.pb" % (self.data_dir,
model_tag)
self.push(mace_model_path, mace_model_phone_path)
if link_dynamic:
self.push(libmace_dynamic_library_path, self.data_dir)
if self.system == SystemType.android:
sh_commands.push_depended_so_libs(
libmace_dynamic_library_path, abi, self.data_dir,
self.address)
self.push("%s/%s" % (target_dir, target_name), self.data_dir)
stdout_buff = []
process_output = sh_commands.make_output_processor(stdout_buff)
cmd = [
"LD_LIBRARY_PATH=%s" % self.data_dir,
"MACE_TUNING=%s" % int(tuning),
"MACE_OUT_OF_RANGE_CHECK=%s" % int(out_of_range_check),
"MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
"MACE_RUN_PARAMETER_PATH=%s/mace_run.config" % self.data_dir,
"MACE_INTERNAL_STORAGE_PATH=%s" % internal_storage_dir,
"MACE_LIMIT_OPENCL_KERNEL_TIME=%s" % limit_opencl_kernel_time,
"MACE_RUNTIME_FAILURE_RATIO=%f" % runtime_failure_ratio,
]
if self.system == SystemType.android and address_sanitizer:
cmd.extend([
"LD_PRELOAD=%s/%s" %
(self.data_dir,
sh_commands.asan_rt_library_names(abi))
])
cmd.extend([
"%s/%s" % (self.data_dir, target_name),
"--model_name=%s" % model_tag,
"--input_node=%s" % ",".join(input_nodes),
"--output_node=%s" % ",".join(output_nodes),
"--input_shape=%s" % ":".join(input_shapes),
"--output_shape=%s" % ":".join(output_shapes),
"--input_file=%s/%s" % (self.data_dir, input_file_name),
"--output_file=%s/%s" % (self.data_dir, output_file_name),
"--model_data_file=%s/%s.data" % (self.data_dir, model_tag),
"--device=%s" % device_type,
"--round=%s" % running_round,
"--restart_round=%s" % restart_round,
"--omp_num_threads=%s" % omp_num_threads,
"--cpu_affinity_policy=%s" % cpu_affinity_policy,
"--gpu_perf_hint=%s" % gpu_perf_hint,
"--gpu_priority_hint=%s" % gpu_priority_hint,
"--model_file=%s" % mace_model_phone_path,
"--opencl_binary_file=%s/%s" %
(self.data_dir, os.path.basename(opencl_binary_file)),
"--opencl_parameter_file=%s/%s" %
(self.data_dir, os.path.basename(opencl_parameter_file)),
])
cmd = ' '.join(cmd)
cmd_file_name = "%s-%s-%s" % ('cmd_file',
model_tag,
str(time.time()))
cmd_file = "%s/%s" % (self.data_dir, cmd_file_name)
tmp_cmd_file = "%s/%s" % ('/tmp', cmd_file_name)
with open(tmp_cmd_file, 'w') as file:
file.write(cmd)
self.push(tmp_cmd_file, cmd_file)
os.remove(tmp_cmd_file)
self.exec_command('sh {}'.format(cmd_file),
_tty_in=True,
_out=process_output,
_err_to_out=True)
self.stdout = "".join(stdout_buff)
if not sh_commands.stdout_success(self.stdout):
common.MaceLogger.error("Mace Run", "Mace run failed.")
six.print_("Running finished!\n")
else:
six.print_('Unsupported system %s' % self.system, file=sys.stderr)
raise Exception('Wrong device')
return self.stdout
def tuning(self, library_name, model_name, model_config,
model_graph_format, model_data_format,
target_abi, mace_lib_type):
six.print_('* Tuning, it may take some time')
build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi)
mace_run_name = MACE_RUN_STATIC_NAME
link_dynamic = False
if mace_lib_type == MACELibType.dynamic:
mace_run_name = MACE_RUN_DYNAMIC_NAME
link_dynamic = True
embed_model_data = model_data_format == ModelFormat.code
# build for specified soc
# device_wrapper = DeviceWrapper(device)
model_output_base_dir, model_output_dir, mace_model_dir = \
get_build_model_dirs(
library_name, model_name, target_abi, self,
model_config[YAMLKeyword.model_file_path])
self.clear_data_dir()
subgraphs = model_config[YAMLKeyword.subgraphs]
# generate input data
sh_commands.gen_random_input(
model_output_dir,
subgraphs[0][YAMLKeyword.input_tensors],
subgraphs[0][YAMLKeyword.input_shapes],
subgraphs[0][YAMLKeyword.validation_inputs_data],
input_ranges=subgraphs[0][YAMLKeyword.input_ranges],
input_data_types=subgraphs[0][YAMLKeyword.input_data_types]
)
self.tuning_run(
abi=target_abi,
target_dir=build_tmp_binary_dir,
target_name=mace_run_name,
vlog_level=0,
embed_model_data=embed_model_data,
model_output_dir=model_output_dir,
input_nodes=subgraphs[0][YAMLKeyword.input_tensors],
output_nodes=subgraphs[0][YAMLKeyword.output_tensors],
input_shapes=subgraphs[0][YAMLKeyword.input_shapes],
output_shapes=subgraphs[0][YAMLKeyword.output_shapes],
mace_model_dir=mace_model_dir,
model_tag=model_name,
device_type=DeviceType.GPU,
running_round=0,
restart_round=1,
limit_opencl_kernel_time=model_config[
YAMLKeyword.limit_opencl_kernel_time],
tuning=True,
out_of_range_check=False,
model_graph_format=model_graph_format,
opencl_binary_file='',
opencl_parameter_file='',
libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH,
link_dynamic=link_dynamic,
)
# pull opencl library
self.pull(self.interior_dir, CL_COMPILED_BINARY_FILE_NAME,
'{}/{}'.format(model_output_dir,
BUILD_TMP_OPENCL_BIN_DIR))
# pull opencl parameter
self.pull_from_data_dir(CL_TUNED_PARAMETER_FILE_NAME,
'{}/{}'.format(model_output_dir,
BUILD_TMP_OPENCL_BIN_DIR))
six.print_('Tuning done! \n')
def run_specify_abi(self, flags, configs, target_abi):
if target_abi not in self.target_abis:
six.print_('There is no device with soc: %s abi: %s' %
(self.target_socs, target_abi))
return
library_name = configs[YAMLKeyword.library_name]
mace_lib_type = flags.mace_lib_type
embed_model_data = \
configs[YAMLKeyword.model_data_format] == ModelFormat.code
build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi)
# get target name for run
if flags.example:
if mace_lib_type == MACELibType.static:
target_name = EXAMPLE_STATIC_NAME
else:
target_name = EXAMPLE_DYNAMIC_NAME
else:
if mace_lib_type == MACELibType.static:
target_name = MACE_RUN_STATIC_NAME
else:
target_name = MACE_RUN_DYNAMIC_NAME
link_dynamic = mace_lib_type == MACELibType.dynamic
model_output_dirs = []
for model_name in configs[YAMLKeyword.models]:
check_model_converted(library_name, model_name,
configs[YAMLKeyword.model_graph_format],
configs[YAMLKeyword.model_data_format],
target_abi)
if target_abi != ABIType.host:
self.clear_data_dir()
MaceLogger.header(
StringFormatter.block(
'Run model {} on {}'.format(model_name, self.device_name)))
model_config = configs[YAMLKeyword.models][model_name]
model_runtime = model_config[YAMLKeyword.runtime]
subgraphs = model_config[YAMLKeyword.subgraphs]
if not configs[YAMLKeyword.target_socs] \
or target_abi == ABIType.host:
model_output_base_dir, model_output_dir, mace_model_dir = \
get_build_model_dirs(
library_name, model_name, target_abi, self,
model_config[YAMLKeyword.model_file_path])
else:
model_output_base_dir, model_output_dir, mace_model_dir = \
get_build_model_dirs(
library_name, model_name, target_abi, self,
model_config[YAMLKeyword.model_file_path])
# clear temp model output dir
if os.path.exists(model_output_dir):
sh.rm('-rf', model_output_dir)
os.makedirs(model_output_dir)
is_tuned = False
model_opencl_output_bin_path = ''
model_opencl_parameter_path = ''
if not flags.address_sanitizer \
and not flags.example \
and target_abi != ABIType.host \
and configs[YAMLKeyword.target_socs] \
and self.target_socs \
and model_runtime in [RuntimeType.gpu,
RuntimeType.cpu_gpu] \
and not flags.disable_tuning:
self.tuning(library_name, model_name, model_config,
configs[YAMLKeyword.model_graph_format],
configs[YAMLKeyword.model_data_format],
target_abi, mace_lib_type)
model_output_dirs.append(model_output_dir)
model_opencl_output_bin_path = \
'{}/{}/{}'.format(model_output_dir,
BUILD_TMP_OPENCL_BIN_DIR,
CL_COMPILED_BINARY_FILE_NAME)
model_opencl_parameter_path = \
'{}/{}/{}'.format(model_output_dir,
BUILD_TMP_OPENCL_BIN_DIR,
CL_TUNED_PARAMETER_FILE_NAME)
self.clear_data_dir()
is_tuned = True
elif target_abi != ABIType.host and self.target_socs:
model_opencl_output_bin_path = get_opencl_binary_output_path(
library_name, target_abi, self
)
model_opencl_parameter_path = get_opencl_parameter_output_path(
library_name, target_abi, self
)
sh_commands.gen_random_input(
model_output_dir,
subgraphs[0][YAMLKeyword.input_tensors],
subgraphs[0][YAMLKeyword.input_shapes],
subgraphs[0][YAMLKeyword.validation_inputs_data],
input_ranges=subgraphs[0][YAMLKeyword.input_ranges],
input_data_types=subgraphs[0][YAMLKeyword.input_data_types]
)
runtime_list = []
if target_abi == ABIType.host:
runtime_list.append(RuntimeType.cpu)
elif model_runtime == RuntimeType.cpu_gpu:
runtime_list.extend([RuntimeType.cpu, RuntimeType.gpu])
else:
runtime_list.append(model_runtime)
for runtime in runtime_list:
device_type = parse_device_type(runtime)
# run for specified soc
run_output = self.tuning_run(
abi=target_abi,
target_dir=build_tmp_binary_dir,
target_name=target_name,
vlog_level=flags.vlog_level,
embed_model_data=embed_model_data,
model_output_dir=model_output_dir,
input_nodes=subgraphs[0][YAMLKeyword.input_tensors],
output_nodes=subgraphs[0][YAMLKeyword.output_tensors],
input_shapes=subgraphs[0][YAMLKeyword.input_shapes],
output_shapes=subgraphs[0][YAMLKeyword.output_shapes],
mace_model_dir=mace_model_dir,
model_tag=model_name,
device_type=device_type,
running_round=flags.round,
restart_round=flags.restart_round,
limit_opencl_kernel_time=model_config[
YAMLKeyword.limit_opencl_kernel_time],
tuning=False,
out_of_range_check=flags.gpu_out_of_range_check,
model_graph_format=configs[YAMLKeyword.model_graph_format],
omp_num_threads=flags.omp_num_threads,
cpu_affinity_policy=flags.cpu_affinity_policy,
gpu_perf_hint=flags.gpu_perf_hint,
gpu_priority_hint=flags.gpu_priority_hint,
runtime_failure_ratio=flags.runtime_failure_ratio,
address_sanitizer=flags.address_sanitizer,
opencl_binary_file=model_opencl_output_bin_path,
opencl_parameter_file=model_opencl_parameter_path,
libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH,
link_dynamic=link_dynamic
)
if flags.validate:
model_file_path, weight_file_path = get_model_files(
model_config[YAMLKeyword.model_file_path],
model_config[YAMLKeyword.model_sha256_checksum],
BUILD_DOWNLOADS_DIR,
model_config[YAMLKeyword.weight_file_path],
model_config[YAMLKeyword.weight_sha256_checksum]
)
validate_type = device_type
if model_config[YAMLKeyword.quantize] == 1:
validate_type = device_type + '_QUANTIZE'
sh_commands.validate_model(
abi=target_abi,
device=self,
model_file_path=model_file_path,
weight_file_path=weight_file_path,
platform=model_config[YAMLKeyword.platform],
device_type=device_type,
input_nodes=subgraphs[0][YAMLKeyword.input_tensors],
output_nodes=subgraphs[0][YAMLKeyword.output_tensors],
input_shapes=subgraphs[0][YAMLKeyword.input_shapes],
output_shapes=subgraphs[0][YAMLKeyword.output_shapes],
model_output_dir=model_output_dir,
input_data_types=subgraphs[0][
YAMLKeyword.input_data_types],
caffe_env=flags.caffe_env,
validation_threshold=subgraphs[0][
YAMLKeyword.validation_threshold][validate_type]
)
if flags.report and flags.round > 0:
tuned = is_tuned and device_type == DeviceType.GPU
self.report_run_statistics(
target_abi=target_abi,
model_name=model_name,
device_type=device_type,
output_dir=flags.report_dir,
tuned=tuned
)
if model_output_dirs:
opencl_output_bin_path = get_opencl_binary_output_path(
library_name, target_abi, self
)
opencl_parameter_bin_path = get_opencl_parameter_output_path(
library_name, target_abi, self
)
# clear opencl output dir
if os.path.exists(opencl_output_bin_path):
sh.rm('-rf', opencl_output_bin_path)
if os.path.exists(opencl_parameter_bin_path):
sh.rm('-rf', opencl_parameter_bin_path)
# merge all model's opencl binaries together
sh_commands.merge_opencl_binaries(
model_output_dirs, CL_COMPILED_BINARY_FILE_NAME,
opencl_output_bin_path
)
# merge all model's opencl parameter together
sh_commands.merge_opencl_parameters(
model_output_dirs, CL_TUNED_PARAMETER_FILE_NAME,
opencl_parameter_bin_path
)
def report_run_statistics(self,
target_abi,
model_name,
device_type,
output_dir,
tuned):
metrics = [0] * 3
for line in self.stdout.split('\n'):
line = line.strip()
parts = line.split()
if len(parts) == 4 and parts[0].startswith('time'):
metrics[0] = str(float(parts[1]))
metrics[1] = str(float(parts[2]))
metrics[2] = str(float(parts[3]))
break
report_filename = output_dir + '/report.csv'
if not os.path.exists(report_filename):
with open(report_filename, 'w') as f:
f.write('model_name,device_name,soc,abi,runtime,'
'init(ms),warmup(ms),run_avg(ms),tuned\n')
data_str = '{model_name},{device_name},{soc},{abi},{device_type},' \
'{init},{warmup},{run_avg},{tuned}\n'.format(
model_name=model_name,
device_name=self.device_name,
soc=self.target_socs,
abi=target_abi,
device_type=device_type,
init=metrics[0],
warmup=metrics[1],
run_avg=metrics[2],
tuned=tuned)
with open(report_filename, 'a') as f:
f.write(data_str)
def benchmark_model(self,
abi,
benchmark_binary_dir,
benchmark_binary_name,
vlog_level,
embed_model_data,
model_output_dir,
mace_model_dir,
input_nodes,
output_nodes,
input_shapes,
output_shapes,
model_tag,
device_type,
model_graph_format,
opencl_binary_file,
opencl_parameter_file,
libmace_dynamic_library_path,
omp_num_threads=-1,
cpu_affinity_policy=1,
gpu_perf_hint=3,
gpu_priority_hint=3,
input_file_name='model_input',
link_dynamic=False):
six.print_('* Benchmark for %s' % model_tag)
mace_model_path = ''
if model_graph_format == ModelFormat.file:
mace_model_path = '%s/%s.pb' % (mace_model_dir, model_tag)
if abi == ABIType.host:
libmace_dynamic_lib_dir_path = \
os.path.dirname(libmace_dynamic_library_path)
p = subprocess.Popen(
[
'env',
'LD_LIBRARY_PATH=%s' % libmace_dynamic_lib_dir_path,
'MACE_CPP_MIN_VLOG_LEVEL=%s' % vlog_level,
'%s/%s' % (benchmark_binary_dir, benchmark_binary_name),
'--model_name=%s' % model_tag,
'--input_node=%s' % ','.join(input_nodes),
'--output_node=%s' % ','.join(output_nodes),
'--input_shape=%s' % ':'.join(input_shapes),
'--output_shapes=%s' % ':'.join(output_shapes),
'--input_file=%s/%s' % (model_output_dir, input_file_name),
'--model_data_file=%s/%s.data' % (mace_model_dir,
model_tag),
'--device=%s' % device_type,
'--omp_num_threads=%s' % omp_num_threads,
'--cpu_addinity_policy=%s' % cpu_affinity_policy,
'--gpu_perf_hint=%s' % gpu_perf_hint,
'--gpu_priority_hint=%s' % gpu_priority_hint,
'--model_file=%s' % mace_model_path
])
p.wait()
elif self.system in [SystemType.android, SystemType.arm_linux]:
self.exec_command('mkdir -p %s' % self.data_dir)
internal_storage_dir = self.create_internal_storage_dir()
for input_name in input_nodes:
formatted_name = formatted_file_name(input_file_name,
input_name)
self.push('%s/%s' % (model_output_dir, formatted_name),
self.data_dir)
if not embed_model_data:
self.push('%s/%s.data' % (mace_model_dir, model_tag),
self.data_dir)
if device_type == common.DeviceType.GPU:
if os.path.exists(opencl_binary_file):
self.push(opencl_binary_file, self.data_dir)
if os.path.exists(opencl_parameter_file):
self.push(opencl_parameter_file, self.data_dir)
mace_model_device_path = ''
if model_graph_format == ModelFormat.file:
mace_model_device_path = '%s/%s.pb' % \
(self.data_dir, model_tag)
self.push(mace_model_path, mace_model_device_path)
if link_dynamic:
self.push(libmace_dynamic_library_path, self.data_dir)
if self.system == SystemType.android:
sh_commands.push_depended_so_libs(
libmace_dynamic_library_path, abi, self.data_dir,
self.address)
self.rm('%s/%s' % (self.data_dir, benchmark_binary_name))
self.push('%s/%s' % (benchmark_binary_dir, benchmark_binary_name),
self.data_dir)
cmd = [
'LD_LIBRARY_PATH=%s' % self.data_dir,
'MACE_CPP_MIN_VLOG_LEVEL=%s' % vlog_level,
'MACE_RUN_PARAMETER_PATH=%s/mace_run.config' % self.data_dir,
'MACE_INTERNAL_STORAGE_PATH=%s' % internal_storage_dir,
'MACE_OPENCL_PROFILING=1',
'%s/%s' % (self.data_dir, benchmark_binary_name),
'--model_name=%s' % model_tag,
'--input_node=%s' % ','.join(input_nodes),
'--output_node=%s' % ','.join(output_nodes),
'--input_shape=%s' % ':'.join(input_shapes),
'--output_shape=%s' % ':'.join(output_shapes),
'--input_file=%s/%s' % (self.data_dir, input_file_name),
'--model_data_file=%s/%s.data' % (self.data_dir, model_tag),
'--device=%s' % device_type,
'--omp_num_threads=%s' % omp_num_threads,
'--cpu_affinity_policy=%s' % cpu_affinity_policy,
'--gpu_perf_hint=%s' % gpu_perf_hint,
'--gpu_priority_hint=%s' % gpu_priority_hint,
'--model_file=%s' % mace_model_device_path,
'--opencl_binary_file=%s/%s' %
(self.data_dir, os.path.basename(opencl_binary_file)),
'--opencl_parameter_file=%s/%s' %
(self.data_dir, os.path.basename(opencl_parameter_file))
]
cmd = ' '.join(cmd)
cmd_file_name = '%s-%s-%s' % \
('cmd_file', model_tag, str(time.time()))
cmd_file_path = '%s/%s' % (self.data_dir, cmd_file_name)
tmp_cmd_file = '%s/%s' % ('/tmp', cmd_file_name)
with open(tmp_cmd_file, 'w') as f:
f.write(cmd)
self.push(tmp_cmd_file, cmd_file_path)
os.remove(tmp_cmd_file)
if self.system == SystemType.android:
sh.adb('-s', self.address, 'shell', 'sh', cmd_file_path,
_fg=True)
elif self.system == SystemType.arm_linux:
sh.ssh('%s@%s' % (self.username, self.address),
'sh', cmd_file_path, _fg=True)
self.rm(cmd_file_path)
six.print_('Benchmark done! \n')
def bm_specific_target(self, flags, configs, target_abi):
library_name = configs[YAMLKeyword.library_name]
embed_model_data = \
configs[YAMLKeyword.model_data_format] == ModelFormat.code
opencl_output_bin_path = ''
opencl_parameter_path = ''
link_dynamic = flags.mace_lib_type == MACELibType.dynamic
if link_dynamic:
bm_model_binary_name = BM_MODEL_DYNAMIC_NAME
else:
bm_model_binary_name = BM_MODEL_STATIC_NAME
build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi)
if configs[YAMLKeyword.target_socs] and target_abi != ABIType.host:
opencl_output_bin_path = get_opencl_binary_output_path(
library_name, target_abi, self
)
opencl_parameter_path = get_opencl_parameter_output_path(
library_name, target_abi, self
)
for model_name in configs[YAMLKeyword.models]:
check_model_converted(library_name,
model_name,
configs[YAMLKeyword.model_graph_format],
configs[YAMLKeyword.model_data_format],
target_abi)
MaceLogger.header(
StringFormatter.block(
'Benchmark model %s on %s' % (model_name,
self.device_name)))
model_config = configs[YAMLKeyword.models][model_name]
model_runtime = model_config[YAMLKeyword.runtime]
subgraphs = model_config[YAMLKeyword.subgraphs]
model_output_base_dir, model_output_dir, mace_model_dir = \
get_build_model_dirs(library_name, model_name,
target_abi, self,
model_config[YAMLKeyword.model_file_path])
if os.path.exists(model_output_dir):
sh.rm('-rf', model_output_dir)
os.makedirs(model_output_dir)
if target_abi != ABIType.host:
self.clear_data_dir()
sh_commands.gen_random_input(
model_output_dir,
subgraphs[0][YAMLKeyword.input_tensors],
subgraphs[0][YAMLKeyword.input_shapes],
subgraphs[0][YAMLKeyword.validation_inputs_data],
input_ranges=subgraphs[0][YAMLKeyword.input_ranges],
input_data_types=subgraphs[0][YAMLKeyword.input_data_types]
)
runtime_list = []
if target_abi == ABIType.host:
runtime_list.append(RuntimeType.cpu)
elif model_runtime == RuntimeType.cpu_gpu:
runtime_list.extend([RuntimeType.cpu, RuntimeType.cpu_gpu])
else:
runtime_list.append(model_runtime)
for runtime in runtime_list:
device_type = parse_device_type(runtime)
self.benchmark_model(
abi=target_abi,
benchmark_binary_dir=build_tmp_binary_dir,
benchmark_binary_name=bm_model_binary_name,
vlog_level=0,
embed_model_data=embed_model_data,
model_output_dir=model_output_dir,
input_nodes=subgraphs[0][YAMLKeyword.input_tensors],
output_nodes=subgraphs[0][YAMLKeyword.output_tensors],
input_shapes=subgraphs[0][YAMLKeyword.input_shapes],
output_shapes=subgraphs[0][YAMLKeyword.output_shapes],
mace_model_dir=mace_model_dir,
model_tag=model_name,
device_type=device_type,
model_graph_format=configs[YAMLKeyword.model_graph_format],
omp_num_threads=flags.omp_num_threads,
cpu_affinity_policy=flags.cpu_affinity_policy,
gpu_perf_hint=flags.gpu_perf_hint,
gpu_priority_hint=flags.gpu_priority_hint,
opencl_binary_file=opencl_output_bin_path,
opencl_parameter_file=opencl_parameter_path,
libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH,
link_dynamic=link_dynamic
)
def run(self,
abi,
host_bin_path,
bin_name,
args='',
opencl_profiling=True,
vlog_level=0,
out_of_range_check=True,
address_sanitizer=False,
simpleperf=False):
host_bin_full_path = '%s/%s' % (host_bin_path, bin_name)
device_bin_full_path = '%s/%s' % (self.data_dir, bin_name)
print(
'================================================================'
)
print('Trying to lock device %s' % self.address)
with self.lock():
print('Run on device: %s, %s, %s' %
(self.address, self.target_socs, self.device_name))
self.rm(self.data_dir)
self.exec_command('mkdir -p %s' % self.data_dir)
self.push(host_bin_full_path, device_bin_full_path)
ld_preload = ''
if address_sanitizer:
self.push(sh_commands.find_asan_rt_library(abi),
self.data_dir)
ld_preload = 'LD_PRELOAD=%s/%s' % \
(self.data_dir,
sh_commands.asan_rt_library_names(abi))
opencl_profiling = 1 if opencl_profiling else 0
out_of_range_check = 1 if out_of_range_check else 0
print('Run %s' % device_bin_full_path)
stdout_buf = []
process_output = sh_commands.make_output_processor(stdout_buf)
if simpleperf and self.system == SystemType.android:
self.push(sh_commands.find_simpleperf_library(abi),
self.data_dir)
simpleperf_cmd = '%s/simpleperf' % self.data_dir
exec_cmd = [
ld_preload,
'MACE_OUT_OF_RANGE_CHECK=%s' % out_of_range_check,
'MACE_OPENCL_PROFILING=%d' % opencl_profiling,
'MACE_CPP_MIN_VLOG_LEVEL=%d' % vlog_level,
simpleperf_cmd,
'stat',
'--group',
'raw-l1-dcache,raw-l1-dcache-refill',
'--group',
'raw-l2-dcache,raw-l2-dcache-refill',
'--group',
'raw-l1-dtlb,raw-l1-dtlb-refill',
'--group',
'raw-l2-dtlb,raw-l2-dtlb-refill',
device_bin_full_path,
args,
]
else:
exec_cmd = [
ld_preload,
'MACE_OUT_OF_RANGE_CHECK=%d' % out_of_range_check,
'MACE_OPENCL_PROFILNG=%d' % opencl_profiling,
'MACE_CPP_MIN_VLOG_LEVEL=%d' % vlog_level,
device_bin_full_path,
args
]
exec_cmd = ' '.join(exec_cmd)
self.exec_command(exec_cmd, _tty_in=True,
_out=process_output, _err_to_out=True)
return ''.join(stdout_buf)
class DeviceManager:
@classmethod
def list_adb_device(cls):
adb_list = sh.adb('devices').stdout.decode('utf-8'). \
strip().split('\n')[1:]
adb_list = [tuple(pair.split('\t')) for pair in adb_list]
devices = []
for adb in adb_list:
prop = sh_commands.adb_getprop_by_serialno(adb[0])
android = {
YAMLKeyword.device_name:
prop['ro.product.model'].replace(' ', ''),
YAMLKeyword.target_abis:
prop['ro.product.cpu.abilist'].split(','),
YAMLKeyword.target_socs: prop['ro.board.platform'],
YAMLKeyword.system: SystemType.android,
YAMLKeyword.address: adb[0],
YAMLKeyword.username: '',
}
devices.append(android)
return devices
@classmethod
def list_ssh_device(cls, yml):
with open(yml) as f:
devices = yaml.load(f.read())
devices = devices['devices']
device_list = []
for name, dev in six.iteritems(devices):
dev[YAMLKeyword.device_name] = \
dev[YAMLKeyword.models].replace(' ', '')
dev[YAMLKeyword.system] = SystemType.arm_linux
device_list.append(dev)
return device_list
@classmethod
def list_devices(cls, yml):
devices_list = []
devices_list.extend(cls.list_adb_device())
if not yml:
if os.path.exists('devices.yml'):
devices_list.extend(cls.list_ssh_device('devices.yml'))
else:
if os.path.exists(yml):
devices_list.extend(cls.list_ssh_device(yml))
else:
MaceLogger.error(ModuleName.RUN,
'no ARM linux device config file found')
host = {
YAMLKeyword.device_name: SystemType.host,
YAMLKeyword.target_abis: [ABIType.host],
YAMLKeyword.target_socs: '',
YAMLKeyword.system: SystemType.host,
YAMLKeyword.address: None,
}
devices_list.append(host)
return devices_list
if __name__ == '__main__':
pass
import argparse import argparse
import os import os
import sys import sys
import six
import tensorflow as tf import tensorflow as tf
# TODO(liyin): use dataset api and estimator with distributed strategy # TODO(liyin): use dataset api and estimator with distributed strategy
...@@ -70,7 +73,7 @@ def images_to_tensors(input_files, image_shape, mean_values=None): ...@@ -70,7 +73,7 @@ def images_to_tensors(input_files, image_shape, mean_values=None):
def main(unused_args): def main(unused_args):
if not os.path.exists(FLAGS.input): if not os.path.exists(FLAGS.input):
print ("input does not exist: %s" % FLAGS.input) print("input does not exist: %s" % FLAGS.input)
sys.exit(-1) sys.exit(-1)
input_files = [] input_files = []
......
import argparse import argparse
import os import os
import sys import sys
import six
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
...@@ -53,7 +56,7 @@ def tensors_to_images(input_files, image_shape): ...@@ -53,7 +56,7 @@ def tensors_to_images(input_files, image_shape):
def main(unused_args): def main(unused_args):
if not os.path.exists(FLAGS.input): if not os.path.exists(FLAGS.input):
print ("input does not exist: %s" % FLAGS.input) print("input does not exist: %s" % FLAGS.input)
sys.exit(-1) sys.exit(-1)
input_files = [] input_files = []
......
...@@ -20,16 +20,14 @@ import os ...@@ -20,16 +20,14 @@ import os
import re import re
import sh import sh
import struct import struct
import subprocess
import sys import sys
import time import time
import urllib
import platform import platform
from enum import Enum
import six import six
import common import common
from common import abi_to_internal
sys.path.insert(0, "mace/python/tools") sys.path.insert(0, "mace/python/tools")
try: try:
...@@ -89,11 +87,6 @@ class BuildType(object): ...@@ -89,11 +87,6 @@ class BuildType(object):
code = 'code' code = 'code'
class ModelFormat(object):
file = 'file'
code = 'code'
def stdout_success(stdout): def stdout_success(stdout):
stdout_lines = stdout.split("\n") stdout_lines = stdout.split("\n")
for line in stdout_lines: for line in stdout_lines:
...@@ -181,97 +174,14 @@ def adb_get_all_socs(): ...@@ -181,97 +174,14 @@ def adb_get_all_socs():
def adb_push(src_path, dst_path, serialno): def adb_push(src_path, dst_path, serialno):
six.print_("Push %s to %s" % (src_path, dst_path))
sh.adb("-s", serialno, "push", src_path, dst_path) sh.adb("-s", serialno, "push", src_path, dst_path)
def adb_pull(src_path, dst_path, serialno): def adb_pull(src_path, dst_path, serialno):
six.print_("Pull %s to %s" % (src_path, dst_path))
try: try:
sh.adb("-s", serialno, "pull", src_path, dst_path) sh.adb("-s", serialno, "pull", src_path, dst_path)
except Exception as e: except Exception as e:
six.print_("Error msg: %s" % e.stderr) six.print_("Error msg: %s" % e, file=sys.stderr)
def adb_run(abi,
serialno,
host_bin_path,
bin_name,
args="",
opencl_profiling=True,
vlog_level=0,
device_bin_path="/data/local/tmp/mace",
out_of_range_check=True,
address_sanitizer=False,
simpleperf=False):
host_bin_full_path = "%s/%s" % (host_bin_path, bin_name)
device_bin_full_path = "%s/%s" % (device_bin_path, bin_name)
props = adb_getprop_by_serialno(serialno)
six.print_(
"====================================================================="
)
six.print_("Trying to lock device %s" % serialno)
with device_lock(serialno):
six.print_("Run on device: %s, %s, %s" %
(serialno, props["ro.board.platform"],
props["ro.product.model"]))
sh.adb("-s", serialno, "shell", "rm -rf %s" % device_bin_path)
sh.adb("-s", serialno, "shell", "mkdir -p %s" % device_bin_path)
adb_push(host_bin_full_path, device_bin_full_path, serialno)
ld_preload = ""
if address_sanitizer:
adb_push(find_asan_rt_library(abi), device_bin_path, serialno)
ld_preload = "LD_PRELOAD=%s/%s" % (device_bin_path,
asan_rt_library_names(abi)),
opencl_profiling = 1 if opencl_profiling else 0
out_of_range_check = 1 if out_of_range_check else 0
six.print_("Run %s" % device_bin_full_path)
stdout_buff = []
process_output = make_output_processor(stdout_buff)
if simpleperf:
adb_push(find_simpleperf_library(abi), device_bin_path, serialno)
simpleperf_cmd = "%s/simpleperf" % device_bin_path
sh.adb(
"-s",
serialno,
"shell",
ld_preload,
"MACE_OUT_OF_RANGE_CHECK=%d" % out_of_range_check,
"MACE_OPENCL_PROFILING=%d" % opencl_profiling,
"MACE_CPP_MIN_VLOG_LEVEL=%d" % vlog_level,
simpleperf_cmd,
"stat",
"--group",
"raw-l1-dcache,raw-l1-dcache-refill",
"--group",
"raw-l2-dcache,raw-l2-dcache-refill",
"--group",
"raw-l1-dtlb,raw-l1-dtlb-refill",
"--group",
"raw-l2-dtlb,raw-l2-dtlb-refill",
device_bin_full_path,
args,
_tty_in=True,
_out=process_output,
_err_to_out=True)
else:
sh.adb(
"-s",
serialno,
"shell",
ld_preload,
"MACE_OUT_OF_RANGE_CHECK=%d" % out_of_range_check,
"MACE_OPENCL_PROFILING=%d" % opencl_profiling,
"MACE_CPP_MIN_VLOG_LEVEL=%d" % vlog_level,
device_bin_full_path,
args,
_tty_in=True,
_out=process_output,
_err_to_out=True)
return "".join(stdout_buff)
################################ ################################
...@@ -293,7 +203,7 @@ def find_asan_rt_library(abi, asan_rt_path=''): ...@@ -293,7 +203,7 @@ def find_asan_rt_library(abi, asan_rt_path=''):
if len(candidates) == 0: if len(candidates) == 0:
common.MaceLogger.error( common.MaceLogger.error(
"Toolchain", "Toolchain",
"Can't find AddressSanitizer runtime library in % s" % "Can't find AddressSanitizer runtime library in %s" %
find_path) find_path)
elif len(candidates) > 1: elif len(candidates) > 1:
common.MaceLogger.info( common.MaceLogger.info(
...@@ -338,6 +248,7 @@ def find_simpleperf_library(abi, simpleperf_path=''): ...@@ -338,6 +248,7 @@ def find_simpleperf_library(abi, simpleperf_path=''):
################################ ################################
def bazel_build(target, def bazel_build(target,
abi="armeabi-v7a", abi="armeabi-v7a",
toolchain='android',
hexagon_mode=False, hexagon_mode=False,
enable_openmp=True, enable_openmp=True,
enable_neon=True, enable_neon=True,
...@@ -361,8 +272,8 @@ def bazel_build(target, ...@@ -361,8 +272,8 @@ def bazel_build(target,
"build", "build",
target, target,
"--config", "--config",
"android", toolchain,
"--cpu=%s" % abi, "--cpu=%s" % abi_to_internal(abi),
"--define", "--define",
"neon=%s" % str(enable_neon).lower(), "neon=%s" % str(enable_neon).lower(),
"--define", "--define",
...@@ -434,15 +345,6 @@ def gen_mace_engine_factory_source(model_tags, ...@@ -434,15 +345,6 @@ def gen_mace_engine_factory_source(model_tags,
six.print_("Generate mace engine creator source done!\n") six.print_("Generate mace engine creator source done!\n")
def pull_file_from_device(serial_num, file_path, file_name, output_dir):
if not os.path.exists(output_dir):
sh.mkdir("-p", output_dir)
output_path = "%s/%s" % (output_dir, file_path)
if os.path.exists(output_path):
sh.rm('-rf', output_path)
adb_pull(file_path + '/' + file_name, output_dir, serial_num)
def merge_opencl_binaries(binaries_dirs, def merge_opencl_binaries(binaries_dirs,
cl_compiled_program_file_name, cl_compiled_program_file_name,
output_file_path): output_file_path):
...@@ -691,233 +593,21 @@ def push_depended_so_libs(libmace_dynamic_library_path, ...@@ -691,233 +593,21 @@ def push_depended_so_libs(libmace_dynamic_library_path,
abi, phone_data_dir, serialno): abi, phone_data_dir, serialno):
dep_so_libs = sh.bash(os.environ["ANDROID_NDK_HOME"] + "/ndk-depends", dep_so_libs = sh.bash(os.environ["ANDROID_NDK_HOME"] + "/ndk-depends",
libmace_dynamic_library_path) libmace_dynamic_library_path)
src_file = ""
for dep in split_stdout(dep_so_libs): for dep in split_stdout(dep_so_libs):
if dep == "libgnustl_shared.so": if dep == "libgnustl_shared.so":
adb_push( src_file = "%s/sources/cxx-stl/gnu-libstdc++/4.9/libs/" \
"%s/sources/cxx-stl/gnu-libstdc++/4.9/libs/%s/libgnustl_shared.so" # noqa "%s/libgnustl_shared.so"\
% (os.environ["ANDROID_NDK_HOME"], abi), % (os.environ["ANDROID_NDK_HOME"], abi)
phone_data_dir,
serialno)
elif dep == "libc++_shared.so": elif dep == "libc++_shared.so":
adb_push( src_file = "%s/sources/cxx-stl/llvm-libc++/libs/" \
"%s/sources/cxx-stl/llvm-libc++/libs/%s/libc++_shared.so" # noqa "%s/libc++_shared.so" % (os.environ["ANDROID_NDK_HOME"], abi)
% (os.environ["ANDROID_NDK_HOME"], abi), print("push %s to %s" % (src_file, phone_data_dir))
phone_data_dir, adb_push(src_file, phone_data_dir, serialno)
serialno)
def tuning_run(abi,
serialno,
target_dir,
target_name,
vlog_level,
embed_model_data,
model_output_dir,
input_nodes,
output_nodes,
input_shapes,
output_shapes,
mace_model_dir,
model_tag,
device_type,
running_round,
restart_round,
limit_opencl_kernel_time,
tuning,
out_of_range_check,
phone_data_dir,
model_graph_format,
opencl_binary_file,
opencl_parameter_file,
libmace_dynamic_library_path,
omp_num_threads=-1,
cpu_affinity_policy=1,
gpu_perf_hint=3,
gpu_priority_hint=3,
input_file_name="model_input",
output_file_name="model_out",
input_dir="",
output_dir="",
runtime_failure_ratio=0.0,
address_sanitizer=False,
link_dynamic=False,
quantize_stat=False):
six.print_("* Run '%s' with round=%s, restart_round=%s, tuning=%s, "
"out_of_range_check=%s, omp_num_threads=%s, "
"cpu_affinity_policy=%s, gpu_perf_hint=%s, "
"gpu_priority_hint=%s" %
(model_tag, running_round, restart_round, str(tuning),
str(out_of_range_check), omp_num_threads, cpu_affinity_policy,
gpu_perf_hint, gpu_priority_hint))
sys.stdout.flush()
mace_model_path = ""
if model_graph_format == ModelFormat.file:
mace_model_path = "%s/%s.pb" % (mace_model_dir, model_tag)
if abi == "host":
libmace_dynamic_lib_path = \
os.path.dirname(libmace_dynamic_library_path)
cmd = [
"env",
"LD_LIBRARY_PATH=%s" % libmace_dynamic_lib_path,
"MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
"MACE_RUNTIME_FAILURE_RATIO=%f" % runtime_failure_ratio,
]
if quantize_stat:
cmd.append("MACE_LOG_TENSOR_RANGE=1")
cmd.extend([
"%s/%s" % (target_dir, target_name),
"--model_name=%s" % model_tag,
"--input_node=%s" % ",".join(input_nodes),
"--output_node=%s" % ",".join(output_nodes),
"--input_shape=%s" % ":".join(input_shapes),
"--output_shape=%s" % ":".join(output_shapes),
"--input_file=%s/%s" % (model_output_dir, input_file_name),
"--output_file=%s/%s" % (model_output_dir, output_file_name),
"--input_dir=%s" % input_dir,
"--output_dir=%s" % output_dir,
"--model_data_file=%s/%s.data" % (mace_model_dir, model_tag),
"--device=%s" % device_type,
"--round=%s" % running_round,
"--restart_round=%s" % restart_round,
"--omp_num_threads=%s" % omp_num_threads,
"--cpu_affinity_policy=%s" % cpu_affinity_policy,
"--gpu_perf_hint=%s" % gpu_perf_hint,
"--gpu_priority_hint=%s" % gpu_priority_hint,
"--model_file=%s" % mace_model_path,
])
p = subprocess.Popen(
cmd,
stderr=subprocess.PIPE,
stdout=subprocess.PIPE)
out, err = p.communicate()
stdout = err + out
six.print_(stdout)
six.print_("Running finished!\n")
else:
sh.adb("-s", serialno, "shell", "mkdir", "-p", phone_data_dir)
internal_storage_dir = create_internal_storage_dir(
serialno, phone_data_dir)
for input_name in input_nodes:
formatted_name = common.formatted_file_name(input_file_name,
input_name)
adb_push("%s/%s" % (model_output_dir, formatted_name),
phone_data_dir, serialno)
if address_sanitizer:
adb_push(find_asan_rt_library(abi), phone_data_dir, serialno)
if not embed_model_data:
adb_push("%s/%s.data" % (mace_model_dir, model_tag),
phone_data_dir, serialno)
if device_type == common.DeviceType.GPU:
if os.path.exists(opencl_binary_file):
adb_push(opencl_binary_file, phone_data_dir, serialno)
if os.path.exists(opencl_parameter_file):
adb_push(opencl_parameter_file, phone_data_dir, serialno)
adb_push("third_party/nnlib/libhexagon_controller.so",
phone_data_dir, serialno)
mace_model_phone_path = ""
if model_graph_format == ModelFormat.file:
mace_model_phone_path = "%s/%s.pb" % (phone_data_dir, model_tag)
adb_push(mace_model_path,
mace_model_phone_path,
serialno)
if link_dynamic:
adb_push(libmace_dynamic_library_path, phone_data_dir,
serialno)
push_depended_so_libs(libmace_dynamic_library_path, abi,
phone_data_dir, serialno)
adb_push("%s/%s" % (target_dir, target_name), phone_data_dir,
serialno)
stdout_buff = []
process_output = make_output_processor(stdout_buff)
adb_cmd = [
"LD_LIBRARY_PATH=%s" % phone_data_dir,
"MACE_TUNING=%s" % int(tuning),
"MACE_OUT_OF_RANGE_CHECK=%s" % int(out_of_range_check),
"MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
"MACE_RUN_PARAMETER_PATH=%s/mace_run.config" % phone_data_dir,
"MACE_INTERNAL_STORAGE_PATH=%s" % internal_storage_dir,
"MACE_LIMIT_OPENCL_KERNEL_TIME=%s" % limit_opencl_kernel_time,
"MACE_RUNTIME_FAILURE_RATIO=%f" % runtime_failure_ratio,
]
if quantize_stat:
adb_cmd.append("MACE_LOG_TENSOR_RANGE=1")
if address_sanitizer:
adb_cmd.extend([
"LD_PRELOAD=%s/%s" % (phone_data_dir,
asan_rt_library_names(abi))
])
adb_cmd.extend([
"%s/%s" % (phone_data_dir, target_name),
"--model_name=%s" % model_tag,
"--input_node=%s" % ",".join(input_nodes),
"--output_node=%s" % ",".join(output_nodes),
"--input_shape=%s" % ":".join(input_shapes),
"--output_shape=%s" % ":".join(output_shapes),
"--input_file=%s/%s" % (phone_data_dir, input_file_name),
"--output_file=%s/%s" % (phone_data_dir, output_file_name),
"--input_dir=%s" % input_dir,
"--output_dir=%s" % output_dir,
"--model_data_file=%s/%s.data" % (phone_data_dir, model_tag),
"--device=%s" % device_type,
"--round=%s" % running_round,
"--restart_round=%s" % restart_round,
"--omp_num_threads=%s" % omp_num_threads,
"--cpu_affinity_policy=%s" % cpu_affinity_policy,
"--gpu_perf_hint=%s" % gpu_perf_hint,
"--gpu_priority_hint=%s" % gpu_priority_hint,
"--model_file=%s" % mace_model_phone_path,
"--opencl_binary_file=%s/%s" %
(phone_data_dir, os.path.basename(opencl_binary_file)),
"--opencl_parameter_file=%s/%s" %
(phone_data_dir, os.path.basename(opencl_parameter_file)),
])
adb_cmd = ' '.join(adb_cmd)
cmd_file_name = "%s-%s-%s" % ('cmd_file', model_tag, str(time.time()))
adb_cmd_file = "%s/%s" % (phone_data_dir, cmd_file_name)
tmp_cmd_file = "%s/%s" % ('/tmp', cmd_file_name)
with open(tmp_cmd_file, 'w') as cmd_file:
cmd_file.write(adb_cmd)
adb_push(tmp_cmd_file, adb_cmd_file, serialno)
os.remove(tmp_cmd_file)
sh.adb(
"-s",
serialno,
"shell",
"sh",
adb_cmd_file,
_tty_in=True,
_out=process_output,
_err_to_out=True)
stdout = "".join(stdout_buff)
if not stdout_success(stdout):
common.MaceLogger.error("Mace Run", "Mace run failed.")
sh.adb(
"-s",
serialno,
"shell",
"rm",
adb_cmd_file,
_fg=True)
six.print_("Running finished!\n")
sys.stdout.flush()
return stdout
def validate_model(abi, def validate_model(abi,
serialno, device,
model_file_path, model_file_path,
weight_file_path, weight_file_path,
platform, platform,
...@@ -927,7 +617,6 @@ def validate_model(abi, ...@@ -927,7 +617,6 @@ def validate_model(abi,
input_shapes, input_shapes,
output_shapes, output_shapes,
model_output_dir, model_output_dir,
phone_data_dir,
input_data_types, input_data_types,
caffe_env, caffe_env,
input_file_name="model_input", input_file_name="model_input",
...@@ -941,8 +630,7 @@ def validate_model(abi, ...@@ -941,8 +630,7 @@ def validate_model(abi,
if os.path.exists("%s/%s" % (model_output_dir, if os.path.exists("%s/%s" % (model_output_dir,
formatted_name)): formatted_name)):
sh.rm("-rf", "%s/%s" % (model_output_dir, formatted_name)) sh.rm("-rf", "%s/%s" % (model_output_dir, formatted_name))
adb_pull("%s/%s" % (phone_data_dir, formatted_name), device.pull_from_data_dir(formatted_name, model_output_dir)
model_output_dir, serialno)
if platform == "tensorflow": if platform == "tensorflow":
validate(platform, model_file_path, "", validate(platform, model_file_path, "",
...@@ -956,11 +644,10 @@ def validate_model(abi, ...@@ -956,11 +644,10 @@ def validate_model(abi,
container_name = "mace_caffe_validator" container_name = "mace_caffe_validator"
if caffe_env == common.CaffeEnvType.LOCAL: if caffe_env == common.CaffeEnvType.LOCAL:
import imp
try: try:
imp.find_module('caffe') import caffe
except ImportError: except ImportError:
logger.error('There is no caffe python module.') logging.error('There is no caffe python module.')
validate(platform, model_file_path, weight_file_path, validate(platform, model_file_path, weight_file_path,
"%s/%s" % (model_output_dir, input_file_name), "%s/%s" % (model_output_dir, input_file_name),
"%s/%s" % (model_output_dir, output_file_name), "%s/%s" % (model_output_dir, output_file_name),
...@@ -1075,149 +762,6 @@ def packaging_lib(libmace_output_dir, project_name): ...@@ -1075,149 +762,6 @@ def packaging_lib(libmace_output_dir, project_name):
################################ ################################
# benchmark # benchmark
################################ ################################
def benchmark_model(abi,
serialno,
benchmark_binary_dir,
benchmark_binary_name,
vlog_level,
embed_model_data,
model_output_dir,
mace_model_dir,
input_nodes,
output_nodes,
input_shapes,
output_shapes,
model_tag,
device_type,
phone_data_dir,
model_graph_format,
opencl_binary_file,
opencl_parameter_file,
libmace_dynamic_library_path,
omp_num_threads=-1,
cpu_affinity_policy=1,
gpu_perf_hint=3,
gpu_priority_hint=3,
input_file_name="model_input",
link_dynamic=False):
six.print_("* Benchmark for %s" % model_tag)
mace_model_path = ""
if model_graph_format == ModelFormat.file:
mace_model_path = "%s/%s.pb" % (mace_model_dir, model_tag)
if abi == "host":
libmace_dynamic_lib_dir_path = \
os.path.dirname(libmace_dynamic_library_path)
p = subprocess.Popen(
[
"env",
"LD_LIBRARY_PATH=%s" % libmace_dynamic_lib_dir_path,
"MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
"%s/%s" % (benchmark_binary_dir, benchmark_binary_name),
"--model_name=%s" % model_tag,
"--input_node=%s" % ",".join(input_nodes),
"--output_node=%s" % ",".join(output_nodes),
"--input_shape=%s" % ":".join(input_shapes),
"--output_shape=%s" % ":".join(output_shapes),
"--input_file=%s/%s" % (model_output_dir, input_file_name),
"--model_data_file=%s/%s.data" % (mace_model_dir, model_tag),
"--device=%s" % device_type,
"--omp_num_threads=%s" % omp_num_threads,
"--cpu_affinity_policy=%s" % cpu_affinity_policy,
"--gpu_perf_hint=%s" % gpu_perf_hint,
"--gpu_priority_hint=%s" % gpu_priority_hint,
"--model_file=%s" % mace_model_path,
])
p.wait()
else:
sh.adb("-s", serialno, "shell", "mkdir", "-p", phone_data_dir)
internal_storage_dir = create_internal_storage_dir(
serialno, phone_data_dir)
for input_name in input_nodes:
formatted_name = common.formatted_file_name(input_file_name,
input_name)
adb_push("%s/%s" % (model_output_dir, formatted_name),
phone_data_dir, serialno)
if not embed_model_data:
adb_push("%s/%s.data" % (mace_model_dir, model_tag),
phone_data_dir, serialno)
if device_type == common.DeviceType.GPU:
if os.path.exists(opencl_binary_file):
adb_push(opencl_binary_file, phone_data_dir, serialno)
if os.path.exists(opencl_parameter_file):
adb_push(opencl_parameter_file, phone_data_dir, serialno)
mace_model_phone_path = ""
if model_graph_format == ModelFormat.file:
mace_model_phone_path = "%s/%s.pb" % (phone_data_dir, model_tag)
adb_push(mace_model_path,
mace_model_phone_path,
serialno)
if link_dynamic:
adb_push(libmace_dynamic_library_path, phone_data_dir,
serialno)
push_depended_so_lib(libmace_dynamic_library_path, abi,
phone_data_dir, serialno)
adb_push("%s/%s" % (benchmark_binary_dir, benchmark_binary_name),
phone_data_dir,
serialno)
adb_cmd = [
"LD_LIBRARY_PATH=%s" % phone_data_dir,
"MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
"MACE_RUN_PARAMETER_PATH=%s/mace_run.config" %
phone_data_dir,
"MACE_INTERNAL_STORAGE_PATH=%s" % internal_storage_dir,
"MACE_OPENCL_PROFILING=1",
"%s/%s" % (phone_data_dir, benchmark_binary_name),
"--model_name=%s" % model_tag,
"--input_node=%s" % ",".join(input_nodes),
"--output_node=%s" % ",".join(output_nodes),
"--input_shape=%s" % ":".join(input_shapes),
"--output_shape=%s" % ":".join(output_shapes),
"--input_file=%s/%s" % (phone_data_dir, input_file_name),
"--model_data_file=%s/%s.data" % (phone_data_dir, model_tag),
"--device=%s" % device_type,
"--omp_num_threads=%s" % omp_num_threads,
"--cpu_affinity_policy=%s" % cpu_affinity_policy,
"--gpu_perf_hint=%s" % gpu_perf_hint,
"--gpu_priority_hint=%s" % gpu_priority_hint,
"--model_file=%s" % mace_model_phone_path,
"--opencl_binary_file=%s/%s" %
(phone_data_dir, os.path.basename(opencl_binary_file)),
"--opencl_parameter_file=%s/%s" %
(phone_data_dir, os.path.basename(opencl_parameter_file)),
]
adb_cmd = ' '.join(adb_cmd)
cmd_file_name = "%s-%s-%s" % ('cmd_file', model_tag, str(time.time()))
adb_cmd_file = "%s/%s" % (phone_data_dir, cmd_file_name)
tmp_cmd_file = "%s/%s" % ('/tmp', cmd_file_name)
with open(tmp_cmd_file, 'w') as cmd_file:
cmd_file.write(adb_cmd)
adb_push(tmp_cmd_file, adb_cmd_file, serialno)
os.remove(tmp_cmd_file)
sh.adb(
"-s",
serialno,
"shell",
"sh",
adb_cmd_file,
_fg=True)
sh.adb(
"-s",
serialno,
"shell",
"rm",
adb_cmd_file,
_fg=True)
six.print_("Benchmark done!\n")
def build_run_throughput_test(abi, def build_run_throughput_test(abi,
serialno, serialno,
vlog_level, vlog_level,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册