提交 51b14100 编写于 作者: L liuqi

feature: support arm linux device

1. Abstact android and arm linux to one format
2. Support cross compilation for ARM linux
3. Related issue #36
上级 66cf184f
......@@ -47,8 +47,13 @@ ops_test:
stage: ops_test
script:
- if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi
- python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS
- python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS --enable_neon=false
- >
if ping -c 1 v9.git.n.xiaomi.com 1>/dev/null 2>&1; then
GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git
DEVICE_CONF_FILE=generic-mobile-devices/devices.yml
fi
- python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64,armhf --target_socs=$TARGET_SOCS
- python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64,armhf --target_socs=$TARGET_SOCS --enable_neon=false
api_test:
stage: api_test
......@@ -68,14 +73,19 @@ extra_tests:
stage: extra_tests
script:
- if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi
- python tools/bazel_adb_run.py --target="//mace/utils:tuner_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS
- >
if ping -c 1 v9.git.n.xiaomi.com 1>/dev/null 2>&1; then
GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git
DEVICE_CONF_FILE=generic-mobile-devices/devices.yml
fi
- python tools/bazel_adb_run.py --target="//mace/utils:tuner_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64 --target_socs=$TARGET_SOCS
platform_compatible_tests:
stage: platform_compatible_tests
script:
- bazel build mace/core:core --define openmp=true
- bazel build --config arm_linux --define openmp=true --define opencl=true --define neon=true //mace/libmace:libmace.so
- bazel build --config aarch64_linux --define openmp=true --define opencl=true --define neon=true //mace/libmace:libmace.so
- bazel build --config arm_linux_gnueabihf --define openmp=true --define opencl=true --define neon=true //mace/libmace:libmace.so
- bazel build --config aarch64_linux_gnu --define openmp=true --define opencl=true --define neon=true //mace/libmace:libmace.so
build_libraries:
stage: build_libraries
......@@ -87,6 +97,11 @@ ndk_versions_compatible_tests:
script:
- DEFAULT_NDK_PATH=$ANDROID_NDK_HOME
- prefix_path=${DEFAULT_NDK_PATH%android-ndk-*}
- >
if ping -c 1 v9.git.n.xiaomi.com 1>/dev/null 2>&1; then
GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git
DEVICE_CONF_FILE=generic-mobile-devices/devices.yml
fi
- >
for ndk in android-ndk-r12b android-ndk-r15c android-ndk-r16 android-ndk-r17b;
do
......@@ -96,8 +111,8 @@ ndk_versions_compatible_tests:
export PATH=$ANDROID_NDK_HOME:$PATH;
echo "ndk path: $ANDROID_NDK_HOME";
if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi
python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS --args="--gtest_filter=ActivationOpTest*";
python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS --enable_neon=false --args="--gtest_filter=ActivationOpTest*";
python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64 --target_socs=$TARGET_SOCS --args="--gtest_filter=ActivationOpTest*";
python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a,arm64 --target_socs=$TARGET_SOCS --enable_neon=false --args="--gtest_filter=ActivationOpTest*";
fi
done
- export ANDROID_NDK_HOME=$DEFAULT_NDK_PATH
......@@ -111,16 +126,27 @@ python_tools_tests:
- GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@github.com:XiaoMi/mace-models.git
- CONF_FILE=mace-models/mobilenet-v2/mobilenet-v2.yml
- >
python tools/converter.py convert --config=${CONF_FILE} --target_abis=armeabi-v7a --model_graph_format=file --model_data_format=file || exit 1;
python tools/converter.py run --config=${CONF_FILE} --round=1 --target_abis=armeabi-v7a --validate --model_graph_format=file --model_data_format=file || exit 1;
python tools/converter.py run --config=${CONF_FILE} --example --target_abis=armeabi-v7a --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
if ping -c 1 v9.git.n.xiaomi.com 1>/dev/null 2>&1; then
GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git
DEVICE_CONF_FILE=generic-mobile-devices/devices.yml
fi
- >
python tools/converter.py convert --config=${CONF_FILE} --target_abis=armeabi-v7a,arm64 --model_graph_format=file --model_data_format=file || exit 1;
python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --round=1 --target_abis=armeabi-v7a,arm64 --validate --model_graph_format=file --model_data_format=file || exit 1;
python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --example --target_abis=armeabi-v7a,arm64 --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
model_tests:
stage: model_tests
script:
- pwd
- rm -rf mace-models
- rm -rf generic-mobile-devices
- GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@github.com:XiaoMi/mace-models.git
- >
if ping -c 1 v9.git.n.xiaomi.com 1>/dev/null 2>&1; then
GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git
DEVICE_CONF_FILE=generic-mobile-devices/devices.yml
fi
- >
for CONF_FILE in mace-models/mobilenet-v1/mobilenet-v1.yml mace-models/mobilenet-v1/mobilenet-v1-quantize-retrain.yml;
do
......@@ -131,8 +157,8 @@ model_tests:
- CONF_FILE=mace-models/mobilenet-v2/mobilenet-v2-host.yml
- >
python tools/converter.py convert --config=${CONF_FILE} --model_graph_format=file --model_data_format=file || exit 1;
python tools/converter.py run --config=${CONF_FILE} --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
python tools/converter.py run --config=${CONF_FILE} --example --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --example --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
- rm -rf mace-models
build_android_demo:
......
......@@ -35,7 +35,7 @@ Required dependencies
- Required by model validation
* - six
- pip install -I six==1.11.0
- Required for Python 2 and 3 compatibility (TODO)
- Required for Python 2 and 3 compatibility
Optional dependencies
---------------------
......
......@@ -109,13 +109,75 @@ in one deployment file.
sha256sum /path/to/your/file
Advanced usage
--------------
There are two common advanced use cases:
There are three common advanced use cases:
- run your model on the embedded device
- converting model to C++ code.
- tuning GPU kernels for a specific SoC.
Run you model on the embedded device
------------------
MACE use ssh to connect embedded device, in this case we recommend you to push ``$HOME/.ssh/id_rsa.pub``
to your device ``$HOME/.ssh/authorized_keys``
.. code:: bash
cat ~/.ssh/id_rsa.pub | ssh -q {user}@{ip} "cat >> ~/.ssh/authorized_keys"
This part will show you how to write your own device yaml config file.
**Device yaml config file**
The way to run your model on the embedded device is nearly the same as run on android, except you need give a device yaml config file.
MACE get this yaml config via ``--device_yml`` argument, default config value is ``devices.yml``
, when the yaml config file is not found. we treat as there is no available arm linux device, give a message
and continue on other device such as plugged android phone.
* **Example**
Here is an device yaml config demo.
.. literalinclude:: devices/demo_device_nanopi.yml
:language: yaml
* **Configuration**
.. list-table::
:header-rows: 1
* - Options
- Usage
* - target_abis
- Device supported abis, you can get it via ``dpkg --print-architecture`` and
``dpkg --print-foreign-architectures`` command, if more than one abi is supported,
separate them by commas.
* - target_socs
- device soc, you can get it from device manual, we haven't found a way to get it in shell.
* - models
- device models full name, you can get via get ``lshw`` command (third party package, install it via your package manager).
see it's product value.
* - address
- Since we use ssh to connect device, ip address is required.
* - username
- login username, required.
* - password
- login password, optional when you can login into device without password
.. note::
Some command tools:
.. code:: bash
# specify device yaml config file via --device_yml argument or put the file under working directory
python tools/converter.py run --config=/path/to/mace-models/mobilenet-v2/mobilenet-v2.yml --device_yml=/path/to/devices.yml
Convert model(s) to C++ code
--------------------------------
......@@ -403,6 +465,7 @@ Reduce Library Size
- It is recommended to use ``version script`` and ``strip`` feature when linking mace static library. The effect is remarkable.
* Remove the unused ops.
Remove the registration of the ops unused for your models in the ``mace/ops/ops_register.cc``,
which will reduce the library size significantly. the final binary just link the registered ops' code.
......
......@@ -68,7 +68,8 @@ Here we use the mobilenet-v2 model as an example.
.. note::
If you want to run on device/phone, please plug in at least one device/phone.
If you want to run on phone, please plug in at least one phone.
Or if you want to run on embedded device, please give a :doc:`advanced_usage`.
.. code:: sh
......@@ -245,7 +246,10 @@ to run and validate your model.
# Test model run time
python tools/converter.py run --config=/path/to/your/model_deployment_file.yml --round=100
# Validate the correctness by comparing the results against the
# If you want to run model on specified arm linux device, you should put device config file in the working directory or run with flag `--device_yml`
python tools/converter.py run --config=/path/to/mace-models/mobilenet-v2/mobilenet-v2.yml --device_yml=/path/to/devices.yml --example
# Validate the correctness by comparing the results against the
# original model and framework, measured with cosine distance for similarity.
python tools/converter.py run --config=/path/to/your/model_deployment_file.yml --validate
......
......@@ -24,6 +24,24 @@ config_setting(
visibility = ["//visibility:public"],
)
config_setting(
name = "arm_linux_aarch64",
values = {
"crosstool_top": "//tools/aarch64_compiler:toolchain",
"cpu": "aarch64",
},
visibility = ["//visibility:public"],
)
config_setting(
name = "arm_linux_armhf",
values = {
"crosstool_top": "//tools/arm_compiler:toolchain",
"cpu": "armeabi-v7a",
},
visibility = ["//visibility:public"],
)
config_setting(
name = "neon_enabled",
define_values = {
......
# Examples
load(
"//mace:mace.bzl",
"if_openmp_enabled",
"if_android",
"if_hexagon_enabled",
"if_opencl_enabled",
"if_openmp_enabled",
)
cc_binary(
......@@ -18,8 +18,9 @@ cc_binary(
]),
linkopts = [
"-lm",
"-ldl",
] + if_openmp_enabled([
"-fopenmp"
"-fopenmp",
]) + if_android([
"-ldl",
"-pie",
......@@ -47,6 +48,7 @@ cc_binary(
]),
linkopts = [
"-lm",
"-ldl",
] + if_android([
"-ldl",
"-pie",
......@@ -55,8 +57,7 @@ cc_binary(
linkstatic = 0,
deps = [
"//external:gflags_nothreads",
"//mace/codegen:generated_mace_engine_factory",
"//mace/codegen:generated_libmace",
"//mace/codegen:generated_mace_engine_factory",
],
)
......@@ -24,6 +24,18 @@ def if_android_arm64(a):
"//conditions:default": [],
})
def if_arm_linux_aarch64(a):
return select({
"//mace:arm_linux_aarch64": a,
"//conditions:default": [],
})
def if_arm_linux_armhf(a):
return select({
"//mace:arm_linux_armhf": a,
"//conditions:default": []
})
def if_neon_enabled(a):
return select({
"//mace:neon_enabled": a,
......@@ -81,4 +93,3 @@ def encrypt_opencl_kernel_genrule():
outs = ["opencl/encrypt_opencl_kernel.cc"],
cmd = "cat $(SRCS) > $@;"
)
......@@ -233,7 +233,7 @@ void TestNxNS12(const index_t height, const index_t width) {
auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w,
Padding type) {
// generate random input
static unsigned int seed = time(NULL);
// static unsigned int seed = time(NULL);
index_t batch = 1;
index_t channel = 32;
index_t multiplier = 1;
......
......@@ -13,6 +13,7 @@
// limitations under the License.
#include <algorithm>
#include <cmath>
#include "mace/core/operator.h"
......
......@@ -15,6 +15,7 @@
#include "mace/ops/resize_bicubic.h"
#include <algorithm>
#include <cmath>
#include <memory>
#include <vector>
......
......@@ -13,6 +13,7 @@
// limitations under the License.
#include <algorithm>
#include <cmath>
#include <vector>
#include "mace/core/operator.h"
......
......@@ -13,6 +13,7 @@
// limitations under the License.
#include <algorithm>
#include <cmath>
#include <limits>
#include <memory>
#include <vector>
......@@ -106,7 +107,7 @@ class SoftmaxOp<DeviceType::CPU, float> : public Operation {
float sum = 0;
for (index_t c = 0; c < class_count; ++c) {
float exp_value = ::exp(input_ptr[c] - max_val);
float exp_value = std::exp(input_ptr[c] - max_val);
sum += exp_value;
output_ptr[c] = exp_value;
}
......
......@@ -13,6 +13,7 @@
// limitations under the License.
#include <algorithm>
#include <cmath>
#include <vector>
#include "mace/core/operator.h"
......
......@@ -16,8 +16,9 @@
#include <arm_neon.h>
#endif
#include <vector>
#include <algorithm>
#include <cmath>
#include <vector>
#include "mace/core/operator.h"
#include "mace/ops/transpose.h"
......
......@@ -112,6 +112,8 @@ TFSupportedOps = [
TFOpType = Enum('TFOpType', [(op, op) for op in TFSupportedOps], type=str)
TFSupportedOps = [six.b(op) for op in TFSupportedOps]
class TensorflowConverter(base_converter.ConverterInterface):
"""A class for convert tensorflow frozen model to mace model.
......
# Copyright 2018 Xiaomi, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import operator
import six
from six.moves import reduce
from mace.proto import mace_pb2
from mace.python.tools.converter_tool import base_converter as cvt
from mace.python.tools.converter_tool.base_converter import DeviceType
from mace.python.tools.converter_tool.base_converter import ConverterUtil
from mace.python.tools.converter_tool.base_converter import MaceKeyword
from mace.python.tools.convert_util import calculate_image_shape
from mace.python.tools.convert_util import OpenCLBufferType
def MemoryTypeToStr(mem_type):
if mem_type == mace_pb2.CPU_BUFFER:
return 'CPU_BUFFER'
elif mem_type == mace_pb2.GPU_BUFFER:
return 'GPU_BUFFER'
elif mem_type == mace_pb2.GPU_IMAGE:
return 'GPU_IMAGE'
else:
return 'UNKNOWN'
class MemoryBlock(object):
def __init__(self, mem_type, block):
self._mem_type = mem_type
self._block = block
@property
def mem_type(self):
return self._mem_type
@property
def block(self):
return self._block
class MemoryOptimizer(object):
def __init__(self, net_def):
self.net_def = net_def
self.idle_mem = set()
self.op_mem = {} # op_name->mem_id
self.mem_block = {} # mem_id->[size] or mem_id->[x, y]
self.total_mem_count = 0
self.input_ref_counter = {}
self.mem_ref_counter = {}
ocl_mem_type_arg = ConverterUtil.get_arg(
net_def, MaceKeyword.mace_opencl_mem_type)
self.cl_mem_type = ocl_mem_type_arg.i if ocl_mem_type_arg is not None \
else None
consumers = {}
for op in net_def.op:
if not self.op_need_optimize_memory(op):
continue
for ipt in op.input:
if ipt not in consumers:
consumers[ipt] = []
consumers[ipt].append(op)
# only ref op's output tensor
for op in net_def.op:
if not self.op_need_optimize_memory(op):
continue
for output in op.output:
tensor_name = output
if tensor_name in consumers:
self.input_ref_counter[tensor_name] = \
len(consumers[tensor_name])
else:
self.input_ref_counter[tensor_name] = 0
def op_need_optimize_memory(self, op):
return True
def get_op_mem_block(self, op_type, output_shape, output_type):
data_type_size = 4
if output_type == mace_pb2.DT_UINT8:
data_type_size = 1
return MemoryBlock(mace_pb2.CPU_BUFFER,
[reduce(operator.mul, output_shape, 1) *
data_type_size])
def mem_size(self, memory_block):
return memory_block.block[0]
def sub_mem_block(self, mem_block1, mem_block2):
return self.mem_size(mem_block1) - self.mem_size(mem_block2)
def resize_mem_block(self, old_mem_block, op_mem_block):
return MemoryBlock(
old_mem_block.mem_type,
[max(old_mem_block.block[0], op_mem_block.block[0])])
def add_net_mem_blocks(self):
for mem in self.mem_block:
arena = self.net_def.mem_arena
block = arena.mem_block.add()
block.mem_id = mem
block.device_type = DeviceType.CPU.value
block.mem_type = self.mem_block[mem].mem_type
block.x = self.mem_block[mem].block[0]
block.y = 1
def get_total_origin_mem_size(self):
origin_mem_size = 0
for op in self.net_def.op:
if not self.op_need_optimize_memory(op):
continue
origin_mem_size += reduce(operator.mul,
op.output_shape[0].dims,
1)
return origin_mem_size
def get_total_optimized_mem_size(self):
optimized_mem_size = 0
for mem in self.mem_block:
print(mem, MemoryTypeToStr(self.mem_block[mem].mem_type),
self.mem_block[mem].block)
optimized_mem_size += self.mem_size(self.mem_block[mem])
return optimized_mem_size
@staticmethod
def is_memory_reuse_op(op):
return op.type == 'Reshape' or op.type == 'Identity' \
or op.type == 'Squeeze' or op.type == 'ExpandDims'
def optimize(self):
for op in self.net_def.op:
if not self.op_need_optimize_memory(op):
continue
if not op.output_shape:
six.print_("WARNING: There is no output shape information to "
"do memory optimization. %s (%s)" %
(op.name, op.type), file=sys.stderr)
return
if len(op.output_shape) != len(op.output):
six.print_('WARNING: the number of output shape is '
'not equal to the number of output.',
file=sys.stderr)
return
for i in range(len(op.output)):
if self.is_memory_reuse_op(op):
# make these ops reuse memory of input tensor
mem_id = self.op_mem.get(op.input[0], -1)
else:
output_type = mace_pb2.DT_FLOAT
for arg in op.arg:
if arg.name == 'T':
output_type = arg.i
if len(op.output_type) > i:
output_type = op.output_type[i]
op_mem_block = self.get_op_mem_block(
op.type,
op.output_shape[i].dims,
output_type)
mem_id = -1
if len(self.idle_mem) > 0:
best_mem_add_size = six.MAXSIZE
best_mem_waste_size = six.MAXSIZE
for mid in self.idle_mem:
old_mem_block = self.mem_block[mid]
if old_mem_block.mem_type != op_mem_block.mem_type:
continue
new_mem_block = self.resize_mem_block(
old_mem_block, op_mem_block)
add_mem_size = self.sub_mem_block(new_mem_block,
old_mem_block)
waste_mem_size = self.sub_mem_block(new_mem_block,
op_mem_block)
# minimize add_mem_size; if best_mem_add_size is 0,
# then minimize waste_mem_size
if (best_mem_add_size > 0 and
add_mem_size < best_mem_add_size) \
or (best_mem_add_size == 0 and
waste_mem_size < best_mem_waste_size):
best_mem_id = mid
best_mem_add_size = add_mem_size
best_mem_waste_size = waste_mem_size
best_mem_block = new_mem_block
# if add mem size < op mem size, then reuse it
if best_mem_add_size <= self.mem_size(op_mem_block):
self.mem_block[best_mem_id] = best_mem_block
mem_id = best_mem_id
self.idle_mem.remove(mem_id)
if mem_id == -1:
mem_id = self.total_mem_count
self.total_mem_count += 1
self.mem_block[mem_id] = op_mem_block
if mem_id != -1:
op.mem_id.extend([mem_id])
self.op_mem[op.output[i]] = mem_id
if mem_id not in self.mem_ref_counter:
self.mem_ref_counter[mem_id] = 1
else:
self.mem_ref_counter[mem_id] += 1
# de-ref input tensor mem
for idx in six.moves.range(len(op.input)):
ipt = op.input[idx]
if ipt in self.input_ref_counter:
self.input_ref_counter[ipt] -= 1
if self.input_ref_counter[ipt] == 0 \
and ipt in self.op_mem:
mem_id = self.op_mem[ipt]
self.mem_ref_counter[mem_id] -= 1
if self.mem_ref_counter[mem_id] == 0:
self.idle_mem.add(self.op_mem[ipt])
elif self.input_ref_counter[ipt] < 0:
raise Exception('ref count is less than 0')
self.add_net_mem_blocks()
print("total op: %d" % len(self.net_def.op))
print("origin mem: %d, optimized mem: %d" % (
self.get_total_origin_mem_size(),
self.get_total_optimized_mem_size()))
class GPUMemoryOptimizer(MemoryOptimizer):
def op_need_optimize_memory(self, op):
if op.type == MaceKeyword.mace_buffer_transform:
for arg in op.arg:
if arg.name == 'mode' and arg.i == 0:
return False
return op.type != MaceKeyword.mace_buffer_inverse_transform
def get_op_image_mem_block(self, op_type, output_shape):
if op_type == 'WinogradTransform' or op_type == 'MatMul':
buffer_shape = list(output_shape) + [1]
mem_block = MemoryBlock(
mace_pb2.GPU_IMAGE,
calculate_image_shape(OpenCLBufferType.IN_OUT_HEIGHT,
buffer_shape))
elif op_type in ['Shape',
'InferConv2dShape',
'StridedSlice',
'Stack',
'ScalarMath']:
if len(output_shape) == 1:
mem_block = MemoryBlock(mace_pb2.CPU_BUFFER,
[output_shape[0], 1])
elif len(output_shape) == 0:
mem_block = MemoryBlock(mace_pb2.CPU_BUFFER,
[1, 1])
else:
raise Exception('%s output shape dim size is not 0 or 1.' %
op_type)
else:
if len(output_shape) == 2: # only support fc/softmax
buffer_shape = [output_shape[0], output_shape[1]]
elif len(output_shape) == 4:
buffer_shape = output_shape
else:
raise Exception('%s output shape dim size is not 2 or 4.' %
op_type)
mem_block = MemoryBlock(
mace_pb2.GPU_IMAGE,
calculate_image_shape(OpenCLBufferType.IN_OUT_CHANNEL,
buffer_shape))
return mem_block
def get_op_buffer_mem_block(self, output_shape):
return MemoryBlock(mace_pb2.GPU_BUFFER,
[reduce(operator.mul, output_shape, 1), 1])
def get_op_mem_block(self, op_type, output_shape, output_type):
if self.cl_mem_type == mace_pb2.GPU_IMAGE:
return self.get_op_image_mem_block(op_type, output_shape)
else:
return self.get_op_buffer_mem_block(output_shape)
def mem_size(self, memory_block):
if memory_block.mem_type == mace_pb2.GPU_IMAGE:
return memory_block.block[0] * memory_block.block[1] * 4
else:
return memory_block.block[0]
def resize_mem_block(self, old_mem_block, op_mem_block):
resize_mem_block = MemoryBlock(
old_mem_block.mem_type,
[
max(old_mem_block.block[0], op_mem_block.block[0]),
max(old_mem_block.block[1], op_mem_block.block[1])
])
return resize_mem_block
def add_net_mem_blocks(self):
max_image_size_x = 0
max_image_size_y = 0
for mem in self.mem_block:
arena = self.net_def.mem_arena
block = arena.mem_block.add()
block.mem_id = mem
block.device_type = DeviceType.GPU.value
block.mem_type = self.mem_block[mem].mem_type
block.x = self.mem_block[mem].block[0]
block.y = self.mem_block[mem].block[1]
if self.mem_block[mem].mem_type == mace_pb2.GPU_IMAGE:
max_image_size_x = max(max_image_size_x, block.x)
max_image_size_y = max(max_image_size_y, block.y)
if self.cl_mem_type == mace_pb2.GPU_IMAGE:
# Update OpenCL max image size
net_ocl_max_img_size_arg = None
for arg in self.net_def.arg:
if arg.name == cvt.MaceKeyword.mace_opencl_max_image_size:
net_ocl_max_img_size_arg = arg
max_image_size_x = max(arg.ints[0], max_image_size_x)
max_image_size_y = max(arg.ints[1], max_image_size_y)
break
if net_ocl_max_img_size_arg is None:
net_ocl_max_img_size_arg = self.net_def.arg.add()
net_ocl_max_img_size_arg.name = \
cvt.MaceKeyword.mace_opencl_max_image_size
net_ocl_max_img_size_arg.ints[:] = [max_image_size_x,
max_image_size_y]
def optimize_gpu_memory(net_def):
mem_optimizer = GPUMemoryOptimizer(net_def)
mem_optimizer.optimize()
def optimize_cpu_memory(net_def):
mem_optimizer = MemoryOptimizer(net_def)
mem_optimizer.optimize()
......@@ -14,6 +14,7 @@
import datetime
import os
import six
import uuid
import numpy as np
import hashlib
......@@ -34,8 +35,8 @@ class ModelFormat(object):
def generate_obfuscated_name(namespace, name):
md5 = hashlib.md5()
md5.update(namespace)
md5.update(name)
md5.update(six.b(namespace))
md5.update(six.b(name))
md5_digest = md5.hexdigest()
name = md5_digest[:8]
......
......@@ -15,8 +15,9 @@
#ifndef MACE_UTILS_QUANTIZE_H_
#define MACE_UTILS_QUANTIZE_H_
#include <limits>
#include <algorithm>
#include <cmath>
#include <limits>
namespace mace {
......
......@@ -21,30 +21,29 @@ build:android --config=cross_compile
build:android --crosstool_top=//external:android/crosstool
build:android --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
# Usage example: bazel build --config arm_linux
build:arm_linux --config=cross_compile
build:arm_linux --crosstool_top=//tools/arm_compiler:toolchain
build:arm_linux --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
build:arm_linux --cpu=armeabi-v7a
build:arm_linux --copt -mfloat-abi=hard
build:arm_linux --copt -mfpu=neon
build:arm_linux --copt -Wno-ignored-attributes
build:arm_linux --copt -Wno-unused-function
build:arm_linux --copt -Wno-sequence-point
build:arm_linux --copt -Wno-implicit-fallthrough
build:arm_linux --copt -Wno-psabi
# Usage example: bazel build --config arm_linux_gnueabihf
build:arm_linux_gnueabihf --config=cross_compile
build:arm_linux_gnueabihf --crosstool_top=//tools/arm_compiler:toolchain
build:arm_linux_gnueabihf --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
build:arm_linux_gnueabihf --cpu=armeabi-v7a
build:arm_linux_gnueabihf --copt -mfloat-abi=hard
build:arm_linux_gnueabihf --copt -mfpu=neon
build:arm_linux_gnueabihf --copt -Wno-ignored-attributes
build:arm_linux_gnueabihf --copt -Wno-unused-function
build:arm_linux_gnueabihf --copt -Wno-sequence-point
build:arm_linux_gnueabihf --copt -Wno-implicit-fallthrough
# Usage example: bazel build --config aarch64_linux
build:aarch64_linux --config=cross_compile
build:aarch64_linux --crosstool_top=//tools/aarch64_compiler:toolchain
build:aarch64_linux --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
build:aarch64_linux --cpu=aarch64
build:aarch64_linux --copt -Wno-ignored-attributes
build:aarch64_linux --copt -Wno-unused-function
build:aarch64_linux --copt -Wno-sequence-point
build:aarch64_linux --copt -Wno-implicit-fallthrough
# Usage example: bazel build --config aarch64_linux_gnu
build:aarch64_linux_gnu --config=cross_compile
build:aarch64_linux_gnu --crosstool_top=//tools/aarch64_compiler:toolchain
build:aarch64_linux_gnu --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
build:aarch64_linux_gnu --cpu=aarch64
build:aarch64_linux_gnu --copt -Wno-ignored-attributes
build:aarch64_linux_gnu --copt -Wno-unused-function
build:aarch64_linux_gnu --copt -Wno-sequence-point
build:aarch64_linux_gnu --copt -Wno-implicit-fallthrough
# Usage example: bazel build --config optimization
# Usage example: bazel build --config optimization
build:optimization -c opt
build:optimization --copt=-O3
build:optimization --linkopt=-Wl,--strip-all
......
......@@ -26,9 +26,9 @@ import sys
import sh_commands
from common import *
def stdout_processor(stdout, device_properties, abi):
pass
from device import DeviceWrapper, DeviceManager
def unittest_stdout_processor(stdout, device_properties, abi):
......@@ -39,7 +39,7 @@ def unittest_stdout_processor(stdout, device_properties, abi):
raise Exception("Command failed")
def ops_benchmark_stdout_processor(stdout, device_properties, abi):
def ops_benchmark_stdout_processor(stdout, dev, abi):
stdout_lines = stdout.split("\n")
metrics = {}
for line in stdout_lines:
......@@ -52,8 +52,8 @@ def ops_benchmark_stdout_processor(stdout, device_properties, abi):
metrics["%s.input_mb_per_sec" % parts[0]] = parts[3]
metrics["%s.gmacc_per_sec" % parts[0]] = parts[4]
platform = device_properties["ro.board.platform"].replace(" ", "-")
model = device_properties["ro.product.model"].replace(" ", "-")
platform = dev[YAMLKeyword.target_socs]
model = dev[YAMLKeyword.models]
tags = {
"ro.board.platform": platform,
"ro.product.model": model,
......@@ -87,7 +87,7 @@ def parse_args():
type=str,
default="all",
help="SoCs (ro.board.platform from getprop) to build, "
"comma seperated list or all/random")
"comma seperated list or all/random")
parser.add_argument(
"--target", type=str, default="//...", help="Bazel target to build")
parser.add_argument(
......@@ -115,14 +115,22 @@ def parse_args():
type=str2bool,
default=False,
help="Whether to use simpleperf stat")
parser.add_argument(
'--device_yml',
type=str,
default='',
help='embedded linux device config yml file'
)
return parser.parse_known_args()
def main(unused_args):
target_socs = None
target_devices = DeviceManager.list_devices(FLAGS.device_yml)
if FLAGS.target_socs != "all" and FLAGS.target_socs != "random":
target_socs = set(FLAGS.target_socs.split(','))
target_devices = sh_commands.get_target_socs_serialnos(target_socs)
target_devices = [dev for dev in target_devices
if dev[YAMLKeyword.target_socs] in target_socs]
if FLAGS.target_socs == "random":
unlocked_devices = \
[d for d in target_devices if not sh_commands.is_device_locked(d)]
......@@ -136,31 +144,29 @@ def main(unused_args):
target_abis = FLAGS.target_abis.split(',')
for target_abi in target_abis:
toolchain = infer_toolchain(target_abi)
sh_commands.bazel_build(target, abi=target_abi,
toolchain=toolchain,
enable_neon=FLAGS.enable_neon,
address_sanitizer=FLAGS.address_sanitizer)
if FLAGS.run_target:
for serialno in target_devices:
if target_abi not in set(
sh_commands.adb_supported_abis(serialno)):
for dev in target_devices:
if target_abi not in dev[YAMLKeyword.target_abis]:
print("Skip device %s which does not support ABI %s" %
(serialno, target_abi))
(dev, target_abi))
continue
stdouts = sh_commands.adb_run(
device_wrapper = DeviceWrapper(dev)
stdouts = device_wrapper.run(
target_abi,
serialno,
host_bin_path,
bin_name,
args=FLAGS.args,
opencl_profiling=True,
vlog_level=0,
device_bin_path="/data/local/tmp/mace",
out_of_range_check=True,
address_sanitizer=FLAGS.address_sanitizer,
simpleperf=FLAGS.simpleperf)
device_properties = sh_commands.adb_getprop_by_serialno(
serialno)
globals()[FLAGS.stdout_processor](stdouts, device_properties,
globals()[FLAGS.stdout_processor](stdouts, dev,
target_abi)
......
......@@ -22,6 +22,14 @@ mkdir -p $LIB_DIR/arm64-v8a/cpu_gpu
rm -rf $LIB_DIR/linux-x86-64
mkdir -p $LIB_DIR/linux-x86-64
rm -rf $LIB_DIR/arm_linux_gnueabihf
mkdir -p $LIB_DIR/arm_linux_gnueabihf/cpu_gpu
rm -rf $LIB_DIR/aarch64_linux_gnu
mkdir -p $LIB_DIR/aarch64_linux_gnu/cpu_gpu
# build shared libraries
echo "build shared lib for armeabi-v7a + cpu_gpu_dsp"
bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --define hexagon=true --define quantize=true --cpu=armeabi-v7a
......@@ -36,6 +44,14 @@ echo "build shared lib for arm64-v8a + cpu_gpu"
bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true --define quantize=true --cpu=arm64-v8a
cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/arm64-v8a/cpu_gpu/
echo "build shared lib for arm_linux_gnueabihf + cpu_gpu"
bazel build --config arm_linux_gnueabihf --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true
cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/arm_linux_gnueabihf/cpu_gpu/
echo "build shared lib for aarch64_linux_gnu + cpu_gpu"
bazel build --config aarch64_linux_gnu --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=true --define opencl=true
cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/aarch64_linux_gnu/cpu_gpu/
if [[ "$OSTYPE" != "darwin"* ]];then
echo "build shared lib for linux-x86-64"
bazel build mace/libmace:libmace_dynamic --config optimization --define quantize=true --define openmp=true
......@@ -56,6 +72,14 @@ echo "build static lib for arm64-v8a + cpu_gpu"
bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true --define quantize=true --cpu=arm64-v8a
cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/arm64-v8a/cpu_gpu/
echo "build static lib for arm_linux_gnueabihf + cpu_gpu"
bazel build --config arm_linux_gnueabihf --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true
cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/arm_linux_gnueabihf/cpu_gpu/
echo "build static lib for aarch64_linux_gnu + cpu_gpu"
bazel build --config aarch64_linux_gnu --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=true --define opencl=true
cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/aarch64_linux_gnu/cpu_gpu/
if [[ "$OSTYPE" != "darwin"* ]];then
echo "build static lib for linux-x86-64"
bazel build mace/libmace:libmace_static --config optimization --define quantize=true --define openmp=true
......
......@@ -13,7 +13,9 @@
# limitations under the License.
import enum
import hashlib
import re
import os
import six
......@@ -135,3 +137,340 @@ def formatted_file_name(input_file_name, input_name):
for c in input_name:
res += c if c.isalnum() else '_'
return res
def md5sum(s):
md5 = hashlib.md5()
md5.update(s.encode('utf-8'))
return md5.hexdigest()
def get_build_binary_dir(library_name, target_abi):
return "%s/%s/%s/%s" % (
BUILD_OUTPUT_DIR, library_name, BUILD_TMP_DIR_NAME, target_abi)
def get_model_lib_output_path(library_name, abi):
lib_output_path = os.path.join(BUILD_OUTPUT_DIR, library_name,
MODEL_OUTPUT_DIR_NAME, abi,
"%s.a" % library_name)
return lib_output_path
def check_model_converted(library_name, model_name,
model_graph_format, model_data_format,
abi):
model_output_dir = \
'%s/%s/%s' % (BUILD_OUTPUT_DIR, library_name, MODEL_OUTPUT_DIR_NAME)
if model_graph_format == ModelFormat.file:
mace_check(os.path.exists("%s/%s.pb" % (model_output_dir, model_name)),
ModuleName.RUN,
"You should convert model first.")
else:
model_lib_path = get_model_lib_output_path(library_name, abi)
mace_check(os.path.exists(model_lib_path),
ModuleName.RUN,
"You should convert model first.")
if model_data_format == ModelFormat.file:
mace_check(os.path.exists("%s/%s.data" %
(model_output_dir, model_name)),
ModuleName.RUN,
"You should convert model first.")
def parse_device_type(runtime):
device_type = ""
if runtime == RuntimeType.dsp:
device_type = DeviceType.HEXAGON
elif runtime == RuntimeType.gpu:
device_type = DeviceType.GPU
elif runtime == RuntimeType.cpu:
device_type = DeviceType.CPU
return device_type
def sha256_checksum(fname):
hash_func = hashlib.sha256()
with open(fname, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_func.update(chunk)
return hash_func.hexdigest()
def get_model_files(model_file_path,
model_sha256_checksum,
model_output_dir,
weight_file_path="",
weight_sha256_checksum=""):
model_file = model_file_path
weight_file = weight_file_path
if model_file_path.startswith("http://") or \
model_file_path.startswith("https://"):
model_file = model_output_dir + "/" + md5sum(model_file_path) + ".pb"
if not os.path.exists(model_file) or \
sha256_checksum(model_file) != model_sha256_checksum:
MaceLogger.info("Downloading model, please wait ...")
six.moves.urllib.request.urlretrieve(model_file_path, model_file)
MaceLogger.info("Model downloaded successfully.")
if sha256_checksum(model_file) != model_sha256_checksum:
MaceLogger.error(ModuleName.MODEL_CONVERTER,
"model file sha256checksum not match")
if weight_file_path.startswith("http://") or \
weight_file_path.startswith("https://"):
weight_file = \
model_output_dir + "/" + md5sum(weight_file_path) + ".caffemodel"
if not os.path.exists(weight_file) or \
sha256_checksum(weight_file) != weight_sha256_checksum:
MaceLogger.info("Downloading model weight, please wait ...")
six.moves.urllib.request.urlretrieve(weight_file_path, weight_file)
MaceLogger.info("Model weight downloaded successfully.")
if weight_file:
if sha256_checksum(weight_file) != weight_sha256_checksum:
MaceLogger.error(ModuleName.MODEL_CONVERTER,
"weight file sha256checksum not match")
return model_file, weight_file
def get_opencl_binary_output_path(library_name, target_abi, device):
target_soc = device.target_socs
device_model = device.models
return '%s/%s/%s/%s/%s_%s.%s.%s.bin' % \
(BUILD_OUTPUT_DIR,
library_name,
OUTPUT_OPENCL_BINARY_DIR_NAME,
target_abi,
library_name,
OUTPUT_OPENCL_BINARY_FILE_NAME,
device_model,
target_soc)
def get_opencl_parameter_output_path(library_name, target_abi, device):
target_soc = device.target_socs
device_model = device.models
return '%s/%s/%s/%s/%s_%s.%s.%s.bin' % \
(BUILD_OUTPUT_DIR,
library_name,
OUTPUT_OPENCL_BINARY_DIR_NAME,
target_abi,
library_name,
OUTPUT_OPENCL_PARAMETER_FILE_NAME,
device_model,
target_soc)
def get_build_model_dirs(library_name,
model_name,
target_abi,
device,
model_file_path):
models = device.models
target_socs = device.target_socs
model_path_digest = md5sum(model_file_path)
model_output_base_dir = '{}/{}/{}/{}/{}'.format(
BUILD_OUTPUT_DIR, library_name, BUILD_TMP_DIR_NAME,
model_name, model_path_digest)
if target_abi == ABIType.host:
model_output_dir = '%s/%s' % (model_output_base_dir, target_abi)
elif not target_socs or not device.address:
model_output_dir = '%s/%s/%s' % (model_output_base_dir,
BUILD_TMP_GENERAL_OUTPUT_DIR_NAME,
target_abi)
else:
model_output_dir = '{}/{}_{}/{}'.format(
model_output_base_dir,
models,
target_socs,
target_abi
)
mace_model_dir = '{}/{}/{}'.format(
BUILD_OUTPUT_DIR, library_name, MODEL_OUTPUT_DIR_NAME
)
return model_output_base_dir, model_output_dir, mace_model_dir
def abi_to_internal(abi):
if abi in [ABIType.armeabi_v7a, ABIType.arm64_v8a]:
return abi
if abi == ABIType.arm64:
return ABIType.aarch64
if abi == ABIType.armhf:
return ABIType.armeabi_v7a
def infer_toolchain(abi):
if abi in [ABIType.armeabi_v7a, ABIType.arm64_v8a]:
return ToolchainType.android
if abi == ABIType.armhf:
return ToolchainType.arm_linux_gnueabihf
if abi == ABIType.arm64:
return ToolchainType.aarch64_linux_gnu
return ''
################################
# YAML key word
################################
class YAMLKeyword(object):
library_name = 'library_name'
target_abis = 'target_abis'
target_socs = 'target_socs'
model_graph_format = 'model_graph_format'
model_data_format = 'model_data_format'
models = 'models'
platform = 'platform'
device_name = 'device_name'
system = 'system'
address = 'address'
username = 'username'
password = 'password'
model_file_path = 'model_file_path'
model_sha256_checksum = 'model_sha256_checksum'
weight_file_path = 'weight_file_path'
weight_sha256_checksum = 'weight_sha256_checksum'
subgraphs = 'subgraphs'
input_tensors = 'input_tensors'
input_shapes = 'input_shapes'
input_ranges = 'input_ranges'
output_tensors = 'output_tensors'
output_shapes = 'output_shapes'
check_tensors = 'check_tensors'
check_shapes = 'check_shapes'
runtime = 'runtime'
data_type = 'data_type'
input_data_types = 'input_data_types'
input_data_formats = 'input_data_formats'
output_data_formats = 'output_data_formats'
limit_opencl_kernel_time = 'limit_opencl_kernel_time'
nnlib_graph_mode = 'nnlib_graph_mode'
obfuscate = 'obfuscate'
winograd = 'winograd'
quantize = 'quantize'
quantize_range_file = 'quantize_range_file'
change_concat_ranges = 'change_concat_ranges'
validation_inputs_data = 'validation_inputs_data'
validation_threshold = 'validation_threshold'
graph_optimize_options = 'graph_optimize_options' # internal use for now
cl_mem_type = 'cl_mem_type'
################################
# SystemType
################################
class SystemType:
host = 'host'
android = 'android'
arm_linux = 'arm_linux'
################################
# common device str
################################
PHONE_DATA_DIR = '/data/local/tmp/mace_run'
DEVICE_DATA_DIR = '/tmp/data/mace_run'
DEVICE_INTERIOR_DIR = PHONE_DATA_DIR + "/interior"
BUILD_OUTPUT_DIR = 'builds'
BUILD_TMP_DIR_NAME = '_tmp'
BUILD_DOWNLOADS_DIR = BUILD_OUTPUT_DIR + '/downloads'
BUILD_TMP_GENERAL_OUTPUT_DIR_NAME = 'general'
MODEL_OUTPUT_DIR_NAME = 'model'
EXAMPLE_STATIC_NAME = "example_static"
EXAMPLE_DYNAMIC_NAME = "example_dynamic"
EXAMPLE_STATIC_TARGET = "//mace/examples/cli:" + EXAMPLE_STATIC_NAME
EXAMPLE_DYNAMIC_TARGET = "//mace/examples/cli:" + EXAMPLE_DYNAMIC_NAME
MACE_RUN_STATIC_NAME = "mace_run_static"
MACE_RUN_DYNAMIC_NAME = "mace_run_dynamic"
MACE_RUN_STATIC_TARGET = "//mace/tools/validation:" + MACE_RUN_STATIC_NAME
MACE_RUN_DYNAMIC_TARGET = "//mace/tools/validation:" + MACE_RUN_DYNAMIC_NAME
CL_COMPILED_BINARY_FILE_NAME = "mace_cl_compiled_program.bin"
BUILD_TMP_OPENCL_BIN_DIR = 'opencl_bin'
LIBMACE_DYNAMIC_PATH = "bazel-bin/mace/libmace/libmace.so"
CL_TUNED_PARAMETER_FILE_NAME = "mace_run.config"
MODEL_HEADER_DIR_PATH = 'include/mace/public'
OUTPUT_LIBRARY_DIR_NAME = 'lib'
OUTPUT_OPENCL_BINARY_DIR_NAME = 'opencl'
OUTPUT_OPENCL_BINARY_FILE_NAME = 'compiled_opencl_kernel'
OUTPUT_OPENCL_PARAMETER_FILE_NAME = 'tuned_opencl_parameter'
CODEGEN_BASE_DIR = 'mace/codegen'
MODEL_CODEGEN_DIR = CODEGEN_BASE_DIR + '/models'
ENGINE_CODEGEN_DIR = CODEGEN_BASE_DIR + '/engine'
LIB_CODEGEN_DIR = CODEGEN_BASE_DIR + '/lib'
LIBMACE_SO_TARGET = "//mace/libmace:libmace.so"
LIBMACE_STATIC_TARGET = "//mace/libmace:libmace_static"
LIBMACE_STATIC_PATH = "bazel-genfiles/mace/libmace/libmace.a"
MODEL_LIB_TARGET = "//mace/codegen:generated_models"
MODEL_LIB_PATH = "bazel-genfiles/mace/codegen/libgenerated_models.a"
QUANTIZE_STAT_TARGET = "//mace/tools/quantization:quantize_stat"
BM_MODEL_STATIC_NAME = "benchmark_model_static"
BM_MODEL_DYNAMIC_NAME = "benchmark_model_dynamic"
BM_MODEL_STATIC_TARGET = "//mace/benchmark:" + BM_MODEL_STATIC_NAME
BM_MODEL_DYNAMIC_TARGET = "//mace/benchmark:" + BM_MODEL_DYNAMIC_NAME
ALL_SOC_TAG = 'all'
################################
# Model File Format
################################
class ModelFormat(object):
file = 'file'
code = 'code'
################################
# ABI Type
################################
class ABIType(object):
armeabi_v7a = 'armeabi-v7a'
arm64_v8a = 'arm64-v8a'
arm64 = 'arm64'
aarch64 = 'aarch64'
armhf = 'armhf'
host = 'host'
################################
# Module name
################################
class ModuleName(object):
YAML_CONFIG = 'YAML CONFIG'
MODEL_CONVERTER = 'Model Converter'
RUN = 'RUN'
BENCHMARK = 'Benchmark'
#################################
# mace lib type
#################################
class MACELibType(object):
static = 0
dynamic = 1
#################################
# Run time type
#################################
class RuntimeType(object):
cpu = 'cpu'
gpu = 'gpu'
dsp = 'dsp'
cpu_gpu = 'cpu+gpu'
#################################
# Tool chain Type
#################################
class ToolchainType:
android = 'android'
arm_linux_gnueabihf = 'arm_linux_gnueabihf'
aarch64_linux_gnu = 'aarch64_linux_gnu'
此差异已折叠。
此差异已折叠。
import argparse
import os
import sys
import six
import tensorflow as tf
# TODO(liyin): use dataset api and estimator with distributed strategy
......@@ -70,7 +73,7 @@ def images_to_tensors(input_files, image_shape, mean_values=None):
def main(unused_args):
if not os.path.exists(FLAGS.input):
print ("input does not exist: %s" % FLAGS.input)
print("input does not exist: %s" % FLAGS.input)
sys.exit(-1)
input_files = []
......
import argparse
import os
import sys
import six
import numpy as np
import tensorflow as tf
......@@ -53,7 +56,7 @@ def tensors_to_images(input_files, image_shape):
def main(unused_args):
if not os.path.exists(FLAGS.input):
print ("input does not exist: %s" % FLAGS.input)
print("input does not exist: %s" % FLAGS.input)
sys.exit(-1)
input_files = []
......
......@@ -23,13 +23,16 @@ import struct
import subprocess
import sys
import time
import urllib
import platform
from enum import Enum
import six
import common
from common import ModelFormat
from common import ABIType
from common import SystemType
from common import YAMLKeyword
from common import abi_to_internal
sys.path.insert(0, "mace/python/tools")
try:
......@@ -89,11 +92,6 @@ class BuildType(object):
code = 'code'
class ModelFormat(object):
file = 'file'
code = 'code'
def stdout_success(stdout):
stdout_lines = stdout.split("\n")
for line in stdout_lines:
......@@ -190,7 +188,7 @@ def adb_pull(src_path, dst_path, serialno):
try:
sh.adb("-s", serialno, "pull", src_path, dst_path)
except Exception as e:
six.print_("Error msg: %s" % e.stderr)
six.print_("Error msg: %s" % e, file=sys.stderr)
def adb_run(abi,
......@@ -293,7 +291,7 @@ def find_asan_rt_library(abi, asan_rt_path=''):
if len(candidates) == 0:
common.MaceLogger.error(
"Toolchain",
"Can't find AddressSanitizer runtime library in % s" %
"Can't find AddressSanitizer runtime library in %s" %
find_path)
elif len(candidates) > 1:
common.MaceLogger.info(
......@@ -338,6 +336,7 @@ def find_simpleperf_library(abi, simpleperf_path=''):
################################
def bazel_build(target,
abi="armeabi-v7a",
toolchain='android',
hexagon_mode=False,
enable_openmp=True,
enable_neon=True,
......@@ -361,8 +360,8 @@ def bazel_build(target,
"build",
target,
"--config",
"android",
"--cpu=%s" % abi,
toolchain,
"--cpu=%s" % abi_to_internal(abi),
"--define",
"neon=%s" % str(enable_neon).lower(),
"--define",
......@@ -694,230 +693,20 @@ def push_depended_so_libs(libmace_dynamic_library_path,
for dep in split_stdout(dep_so_libs):
if dep == "libgnustl_shared.so":
adb_push(
"%s/sources/cxx-stl/gnu-libstdc++/4.9/libs/%s/libgnustl_shared.so" # noqa
% (os.environ["ANDROID_NDK_HOME"], abi),
phone_data_dir,
serialno)
"%s/sources/cxx-stl/gnu-libstdc++/4.9/libs/%s/libgnustl_shared.so" # noqa
% (os.environ["ANDROID_NDK_HOME"], abi),
phone_data_dir,
serialno)
elif dep == "libc++_shared.so":
adb_push(
"%s/sources/cxx-stl/llvm-libc++/libs/%s/libc++_shared.so" # noqa
% (os.environ["ANDROID_NDK_HOME"], abi),
phone_data_dir,
serialno)
def tuning_run(abi,
serialno,
target_dir,
target_name,
vlog_level,
embed_model_data,
model_output_dir,
input_nodes,
output_nodes,
input_shapes,
output_shapes,
mace_model_dir,
model_tag,
device_type,
running_round,
restart_round,
limit_opencl_kernel_time,
tuning,
out_of_range_check,
phone_data_dir,
model_graph_format,
opencl_binary_file,
opencl_parameter_file,
libmace_dynamic_library_path,
omp_num_threads=-1,
cpu_affinity_policy=1,
gpu_perf_hint=3,
gpu_priority_hint=3,
input_file_name="model_input",
output_file_name="model_out",
input_dir="",
output_dir="",
runtime_failure_ratio=0.0,
address_sanitizer=False,
link_dynamic=False,
quantize_stat=False):
six.print_("* Run '%s' with round=%s, restart_round=%s, tuning=%s, "
"out_of_range_check=%s, omp_num_threads=%s, "
"cpu_affinity_policy=%s, gpu_perf_hint=%s, "
"gpu_priority_hint=%s" %
(model_tag, running_round, restart_round, str(tuning),
str(out_of_range_check), omp_num_threads, cpu_affinity_policy,
gpu_perf_hint, gpu_priority_hint))
sys.stdout.flush()
mace_model_path = ""
if model_graph_format == ModelFormat.file:
mace_model_path = "%s/%s.pb" % (mace_model_dir, model_tag)
if abi == "host":
libmace_dynamic_lib_path = \
os.path.dirname(libmace_dynamic_library_path)
cmd = [
"env",
"LD_LIBRARY_PATH=%s" % libmace_dynamic_lib_path,
"MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
"MACE_RUNTIME_FAILURE_RATIO=%f" % runtime_failure_ratio,
]
if quantize_stat:
cmd.append("MACE_LOG_TENSOR_RANGE=1")
cmd.extend([
"%s/%s" % (target_dir, target_name),
"--model_name=%s" % model_tag,
"--input_node=%s" % ",".join(input_nodes),
"--output_node=%s" % ",".join(output_nodes),
"--input_shape=%s" % ":".join(input_shapes),
"--output_shape=%s" % ":".join(output_shapes),
"--input_file=%s/%s" % (model_output_dir, input_file_name),
"--output_file=%s/%s" % (model_output_dir, output_file_name),
"--input_dir=%s" % input_dir,
"--output_dir=%s" % output_dir,
"--model_data_file=%s/%s.data" % (mace_model_dir, model_tag),
"--device=%s" % device_type,
"--round=%s" % running_round,
"--restart_round=%s" % restart_round,
"--omp_num_threads=%s" % omp_num_threads,
"--cpu_affinity_policy=%s" % cpu_affinity_policy,
"--gpu_perf_hint=%s" % gpu_perf_hint,
"--gpu_priority_hint=%s" % gpu_priority_hint,
"--model_file=%s" % mace_model_path,
])
p = subprocess.Popen(
cmd,
stderr=subprocess.PIPE,
stdout=subprocess.PIPE)
out, err = p.communicate()
stdout = err + out
six.print_(stdout)
six.print_("Running finished!\n")
else:
sh.adb("-s", serialno, "shell", "mkdir", "-p", phone_data_dir)
internal_storage_dir = create_internal_storage_dir(
serialno, phone_data_dir)
for input_name in input_nodes:
formatted_name = common.formatted_file_name(input_file_name,
input_name)
adb_push("%s/%s" % (model_output_dir, formatted_name),
phone_data_dir, serialno)
if address_sanitizer:
adb_push(find_asan_rt_library(abi), phone_data_dir, serialno)
if not embed_model_data:
adb_push("%s/%s.data" % (mace_model_dir, model_tag),
phone_data_dir, serialno)
if device_type == common.DeviceType.GPU:
if os.path.exists(opencl_binary_file):
adb_push(opencl_binary_file, phone_data_dir, serialno)
if os.path.exists(opencl_parameter_file):
adb_push(opencl_parameter_file, phone_data_dir, serialno)
adb_push("third_party/nnlib/libhexagon_controller.so",
phone_data_dir, serialno)
mace_model_phone_path = ""
if model_graph_format == ModelFormat.file:
mace_model_phone_path = "%s/%s.pb" % (phone_data_dir, model_tag)
adb_push(mace_model_path,
mace_model_phone_path,
serialno)
if link_dynamic:
adb_push(libmace_dynamic_library_path, phone_data_dir,
serialno)
push_depended_so_libs(libmace_dynamic_library_path, abi,
phone_data_dir, serialno)
adb_push("%s/%s" % (target_dir, target_name), phone_data_dir,
serialno)
stdout_buff = []
process_output = make_output_processor(stdout_buff)
adb_cmd = [
"LD_LIBRARY_PATH=%s" % phone_data_dir,
"MACE_TUNING=%s" % int(tuning),
"MACE_OUT_OF_RANGE_CHECK=%s" % int(out_of_range_check),
"MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
"MACE_RUN_PARAMETER_PATH=%s/mace_run.config" % phone_data_dir,
"MACE_INTERNAL_STORAGE_PATH=%s" % internal_storage_dir,
"MACE_LIMIT_OPENCL_KERNEL_TIME=%s" % limit_opencl_kernel_time,
"MACE_RUNTIME_FAILURE_RATIO=%f" % runtime_failure_ratio,
]
if quantize_stat:
adb_cmd.append("MACE_LOG_TENSOR_RANGE=1")
if address_sanitizer:
adb_cmd.extend([
"LD_PRELOAD=%s/%s" % (phone_data_dir,
asan_rt_library_names(abi))
])
adb_cmd.extend([
"%s/%s" % (phone_data_dir, target_name),
"--model_name=%s" % model_tag,
"--input_node=%s" % ",".join(input_nodes),
"--output_node=%s" % ",".join(output_nodes),
"--input_shape=%s" % ":".join(input_shapes),
"--output_shape=%s" % ":".join(output_shapes),
"--input_file=%s/%s" % (phone_data_dir, input_file_name),
"--output_file=%s/%s" % (phone_data_dir, output_file_name),
"--input_dir=%s" % input_dir,
"--output_dir=%s" % output_dir,
"--model_data_file=%s/%s.data" % (phone_data_dir, model_tag),
"--device=%s" % device_type,
"--round=%s" % running_round,
"--restart_round=%s" % restart_round,
"--omp_num_threads=%s" % omp_num_threads,
"--cpu_affinity_policy=%s" % cpu_affinity_policy,
"--gpu_perf_hint=%s" % gpu_perf_hint,
"--gpu_priority_hint=%s" % gpu_priority_hint,
"--model_file=%s" % mace_model_phone_path,
"--opencl_binary_file=%s/%s" %
(phone_data_dir, os.path.basename(opencl_binary_file)),
"--opencl_parameter_file=%s/%s" %
(phone_data_dir, os.path.basename(opencl_parameter_file)),
])
adb_cmd = ' '.join(adb_cmd)
cmd_file_name = "%s-%s-%s" % ('cmd_file', model_tag, str(time.time()))
adb_cmd_file = "%s/%s" % (phone_data_dir, cmd_file_name)
tmp_cmd_file = "%s/%s" % ('/tmp', cmd_file_name)
with open(tmp_cmd_file, 'w') as cmd_file:
cmd_file.write(adb_cmd)
adb_push(tmp_cmd_file, adb_cmd_file, serialno)
os.remove(tmp_cmd_file)
sh.adb(
"-s",
serialno,
"shell",
"sh",
adb_cmd_file,
_tty_in=True,
_out=process_output,
_err_to_out=True)
stdout = "".join(stdout_buff)
if not stdout_success(stdout):
common.MaceLogger.error("Mace Run", "Mace run failed.")
sh.adb(
"-s",
serialno,
"shell",
"rm",
adb_cmd_file,
_fg=True)
six.print_("Running finished!\n")
sys.stdout.flush()
return stdout
"%s/sources/cxx-stl/llvm-libc++/libs/%s/libc++_shared.so" # noqa
% (os.environ["ANDROID_NDK_HOME"], abi),
phone_data_dir,
serialno)
def validate_model(abi,
serialno,
device,
model_file_path,
weight_file_path,
platform,
......@@ -927,7 +716,6 @@ def validate_model(abi,
input_shapes,
output_shapes,
model_output_dir,
phone_data_dir,
input_data_types,
caffe_env,
input_file_name="model_input",
......@@ -941,8 +729,7 @@ def validate_model(abi,
if os.path.exists("%s/%s" % (model_output_dir,
formatted_name)):
sh.rm("-rf", "%s/%s" % (model_output_dir, formatted_name))
adb_pull("%s/%s" % (phone_data_dir, formatted_name),
model_output_dir, serialno)
device.pull_from_data_dir(formatted_name, model_output_dir)
if platform == "tensorflow":
validate(platform, model_file_path, "",
......@@ -956,11 +743,10 @@ def validate_model(abi,
container_name = "mace_caffe_validator"
if caffe_env == common.CaffeEnvType.LOCAL:
import imp
try:
imp.find_module('caffe')
import caffe
except ImportError:
logger.error('There is no caffe python module.')
logging.error('There is no caffe python module.')
validate(platform, model_file_path, weight_file_path,
"%s/%s" % (model_output_dir, input_file_name),
"%s/%s" % (model_output_dir, output_file_name),
......@@ -1157,8 +943,8 @@ def benchmark_model(abi,
if link_dynamic:
adb_push(libmace_dynamic_library_path, phone_data_dir,
serialno)
push_depended_so_lib(libmace_dynamic_library_path, abi,
phone_data_dir, serialno)
push_depended_so_libs(libmace_dynamic_library_path, abi,
phone_data_dir, serialno)
adb_push("%s/%s" % (benchmark_binary_dir, benchmark_binary_name),
phone_data_dir,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册