diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index f454edf8aa0f3c850eaf477d55d499abea69cc25..bde098d7ac59547c02a9e44f6763b5d268db3de6 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -178,16 +178,18 @@ quantization_tests: - pwd - rm -rf mace-models - GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@github.com:XiaoMi/mace-models.git - - CONF_FILE=mace-models/mobilenet-v1/mobilenet-v1-quantize-retrain.yml - > if ping -c 1 v9.git.n.xiaomi.com 1>/dev/null 2>&1; then GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git DEVICE_CONF_FILE=generic-mobile-devices/devices.yml fi - > - python tools/converter.py convert --config=${CONF_FILE} --model_graph_format=file --model_data_format=file --cl_mem_type=buffer || exit 1; - python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --round=1 --target_abis=armeabi-v7a,arm64 --validate --model_graph_format=file --model_data_format=file || exit 1; - python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --example --target_abis=armeabi-v7a,arm64 --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1; + for CONF_FILE in mace-models/mobilenet-v1/mobilenet-v1-quantize-retrain.yml mace-models/mobilenet-v1/mobilenet-v1-quantize-retrain-for-check-only.yml mace-models/mobilenet-v1/mobilenet-v1-quantize-retrain-dsp.yml; + do + python tools/converter.py convert --config=${CONF_FILE} --model_graph_format=file --model_data_format=file || exit 1; + python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1; + python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --example --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1; + done - rm -rf mace-models build_android_demo: diff --git a/README.md b/README.md index ed119e31a08ee7236c594441ce45ba36903ce2f4..f5cbb989a1a292144d19b404f70444d652a7bef3 100644 --- a/README.md +++ b/README.md @@ -76,7 +76,7 @@ please refer to [the contribution guide](https://mace.readthedocs.io/en/latest/d MACE depends on several open source projects located in the [third_party](third_party) directory. Particularly, we learned a lot from the following projects during the development: -* [Qualcomm Hexagon NN Offload Framework](https://source.codeaurora.org/quic/hexagon_nn/nnlib): the Hexagon DSP runtime +* [Qualcomm Hexagon NN Offload Framework](https://developer.qualcomm.com/software/hexagon-dsp-sdk): the Hexagon DSP runtime depends on this library. * [TensorFlow](https://github.com/tensorflow/tensorflow), [Caffe](https://github.com/BVLC/caffe), diff --git a/docs/development/how_to_debug.rst b/docs/development/how_to_debug.rst new file mode 100644 index 0000000000000000000000000000000000000000..1026f3ff293ac4b46c9f67baa3471b3dd849102e --- /dev/null +++ b/docs/development/how_to_debug.rst @@ -0,0 +1,85 @@ +How to debug +============== + +Log debug info +-------------------------- +Mace defines two sorts of logs: one is for users (LOG), the other is for developers (VLOG). + +LOG includes four levels, i.e, ``INFO``, ``WARNING``, ``ERROR``, ``FATAL``; +Environment variable ``MACE_CPP_MIN_LOG_LEVEL`` can be set to specify log level of users, e.g., +``set MACE_CPP_MIN_LOG_LEVEL=0`` will enable ``INFO`` log level, while ``set MACE_CPP_MIN_LOG_LEVEL=4`` will enable ``FATAL`` log level. + + +VLOG level is specified by numbers, e.g., 0, 1, 2. Environment variable ``MACE_CPP_MIN_VLOG_LEVEL`` can be set to specify vlog level. +Logs with higher levels than which is specified will be printed. So simply specifying a very large level number will make all logs printed. + +By using Mace run tool, vlog level can be easily set by option, e.g., + + .. code:: sh + + python tools/converter.py run --config /path/to/model.yml --vlog_level=2 + + +If models are run on android, you might need to use ``adb logcat`` to view logs. + + +Debug memory usage +-------------------------- +The simplest way to debug process memory usage is to use ``top`` command. With ``-H`` option, it can also show thread info. +For android, if you need more memory info, e.g., memory used of all categories, ``adb shell dumpsys meminfo`` will help. +By watching memory usage, you can check if memory usage meets expectations or if any leak happens. + + +Debug using GDB +-------------------------- +GDB can be used as the last resort, as it is powerful that it can trace stacks of your process. If you run models on android, +things may be a little bit complicated. + + .. code:: sh + + # push gdbserver to your phone + adb push $ANDROID_NDK_HOME/prebuilt/android-arm64/gdbserver/gdbserver /data/local/tmp/ + + + # set system env, pull system libs and bins to host + export SYSTEM_LIB=/path/to/android/system_lib + export SYSTEM_BIN=/path/to/android/system_bin + mkdir -p $SYSTEM_LIB + adb pull /system/lib/. $SYSTEM_LIB + mkdir -p $SYSTEM_BIN + adb pull /system/bin/. $SYSTEM_BIN + + + # Suppose ndk compiler used to compile Mace is of android-21 + export PLATFORMS_21_LIB=$ANDROID_NDK_HOME/platforms/android-21/arch-arm/usr/lib/ + + + # start gdbserver,make gdb listen to port 6000 + # adb shell /data/local/tmp/gdbserver :6000 /path/to/binary/on/phone/example_bin + adb shell LD_LIBRARY_PATH=/dir/to/dynamic/library/on/phone/ /data/local/tmp/gdbserver :6000 /data/local/tmp/mace_run/example_bin + # or attach a running process + adb shell /data/local/tmp/gdbserver :6000 --attach 8700 + # forward tcp port + adb forward tcp:6000 tcp:6000 + + + # use gdb on host to execute binary + $ANDROID_NDK_HOME/prebuilt/linux-x86_64/bin/gdb [/path/to/binary/on/host/example_bin] + + + # connect remote port after starting gdb command + target remote :6000 + + + # set lib path + set solib-search-path $SYSTEM_LIB:$SYSTEM_BIN:$PLATFORMS_21_LIB + + # then you can use it as host gdb, e.g., + bt + + + + + + + diff --git a/docs/faq.md b/docs/faq.md index d0f8b3953412f1378acdff18f067eaaeadc237b3..90b61dadf5ceb5d84b13d53426527356f13e4de5 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -59,9 +59,9 @@ Why is MACE not working on DSP? ------------------------------------------------------------------------------ Running models on Hexagon DSP need a few prerequisites for DSP developers: -* You need make sure SOCs of your phone is manufactured by Qualcomm and has HVX supported. +* You need to make sure SOCs of your phone is manufactured by Qualcomm and has HVX supported. * You need a phone that disables secure boot (once enabled, cannot be reversed, so you probably can only get that type phones from manufacturers) -* You need sign your phone by using testsig provided by Qualcomm. (Download Qualcomm Hexagon SDK first, plugin your phone to PC, run scripts/testsig.py) -* You need install Hexagon nnlib backend by following nnlib README (https://github.com/XiaoMi/nnlib). +* You need to sign your phone by using testsig provided by Qualcomm. (Download Qualcomm Hexagon SDK first, plugin your phone to PC, run scripts/testsig.py) +* You need to push `third_party/nnlib/v6x/libhexagon_nn_skel.so` to `/system/vendor/lib/rfsa/adsp/`. Then, there you go. You can run Mace on Hexagon DSP. diff --git a/docs/index.rst b/docs/index.rst index 7545f2aa8c3227a88fc1b1e4fdc1ea194186c474..c73aa1d349e5be57f55b6294a8a03fe6c0169496 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -39,6 +39,7 @@ The main documentation is organized into the following sections: development/contributing development/adding_a_new_op development/how_to_run_tests + development/how_to_debug development/memory_layout .. toctree:: diff --git a/docs/installation/env_requirement.rst b/docs/installation/env_requirement.rst index 59c63c871afa1b3c9555749494e0220ffb753cf2..be15c67c0917d59caea47836225ba67143098bf9 100644 --- a/docs/installation/env_requirement.rst +++ b/docs/installation/env_requirement.rst @@ -22,21 +22,29 @@ Required dependencies - Linux:``apt-get install cmake`` Mac:``brew install cmake`` - >= 3.11.3 * - Jinja2 - - pip install -I jinja2==2.10 + - pip install jinja2==2.10 - 2.10 * - PyYaml - - pip install -I pyyaml==3.12 + - pip install pyyaml==3.12 - 3.12.0 * - sh - - pip install -I sh==1.12.14 + - pip install sh==1.12.14 - 1.12.14 * - Numpy - - pip install -I numpy==1.14.0 + - pip install numpy==1.14.0 - Required by model validation * - six - - pip install -I six==1.11.0 + - pip install six==1.11.0 - Required for Python 2 and 3 compatibility +For Bazel, install it following installation guide. For python dependencies, + + .. code:: sh + + pip install -U --user setup/requirements.txt + + + Optional dependencies --------------------- @@ -49,25 +57,35 @@ Optional dependencies * - Android NDK - `NDK installation guide `__ - Required by Android build, r15b, r15c, r16b, r17b + * - CMake + - apt-get install cmake + - >= 3.11.3 * - ADB - Linux:``apt-get install android-tools-adb`` Mac:``brew cask install android-platform-tools`` - Required by Android run, >= 1.0.32 * - TensorFlow - - pip install -I tensorflow==1.8.0 + - pip install tensorflow==1.8.0 - Required by TensorFlow model * - Docker - `docker installation guide `__ - Required by docker mode for Caffe model * - Scipy - - pip install -I scipy==1.0.0 + - pip install scipy==1.0.0 - Required by model validation * - FileLock - - pip install -I filelock==3.0.0 + - pip install filelock==3.0.0 - Required by run on Android * - ONNX - - pip install onnx + - pip install onnx==1.3.0 - Required by ONNX model +For python dependencies, + + .. code:: sh + + pip install -U --user setup/optionals.txt + + .. note:: - For Android build, `ANDROID_NDK_HOME` must be confifigured by using ``export ANDROID_NDK_HOME=/path/to/ndk`` diff --git a/docs/user_guide/basic_usage.rst b/docs/user_guide/basic_usage.rst index 6d59a68eced45173ecc8c5e448f20661d34e6ecf..c9bde6fccabe0ed91f971452b22b46d4f0862366 100644 --- a/docs/user_guide/basic_usage.rst +++ b/docs/user_guide/basic_usage.rst @@ -99,7 +99,6 @@ MACE now supports models from TensorFlow and Caffe (more frameworks will be supp Prepare your pre-trained TensorFlow model.pb file. - - Caffe Caffe 1.0+ models are supported in MACE converter tool. @@ -253,7 +252,13 @@ However, there are some differences in different devices. * **DSP** - MACE only support Qualcomm DSP. + MACE only supports Qualcomm DSP. And you need to push the hexagon nn library to the device. + + .. code:: sh + + # For Android device + adb root; adb remount + adb push third_party/nnlib/v6x/libhexagon_nn_skel.so /system/vendor/lib/rfsa/adsp/ In the converting and building steps, you've got the static/shared library, model files and header files. diff --git a/docs/user_guide/quantization_usage.rst b/docs/user_guide/quantization_usage.rst index 11f8bee2205537ed515c3329532a2974d3ff3621..2b2f150991c4450ffb92caaa6a9953d1a9b8b90c 100644 --- a/docs/user_guide/quantization_usage.rst +++ b/docs/user_guide/quantization_usage.rst @@ -22,9 +22,6 @@ models, e.g., MobileNet. The only thing you need to make it run using MACE is to 2. `quantize`: set `quantize` to be 1. - .. note:: - You need set `runtime` to be `cpu` because we only support this quantization method to run on CPU for now (soon DSP will be supported). - Post training quantization --------------------------- diff --git a/mace/benchmark/statistics.cc b/mace/benchmark/statistics.cc index 7329c247854679f3dbc12620e75f0b7c02503a54..7909a598b10ff29f6fbea7896deaabd90eca6334 100644 --- a/mace/benchmark/statistics.cc +++ b/mace/benchmark/statistics.cc @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include diff --git a/mace/core/runtime/cpu/cpu_runtime.cc b/mace/core/runtime/cpu/cpu_runtime.cc index f9baac6e6cf528e3bf58f779ead99ab8bca18db0..89c5720604eef730eb1a5a5b48310e27d99de136 100644 --- a/mace/core/runtime/cpu/cpu_runtime.cc +++ b/mace/core/runtime/cpu/cpu_runtime.cc @@ -223,7 +223,7 @@ MaceStatus CPURuntime::SetOpenMPThreadsAndAffinityPolicy( } ++cores_to_use; } - num_threads_hint = cores_to_use; + num_threads_hint = std::min(num_threads_hint, cores_to_use); } else { cores_to_use = num_threads_hint; } diff --git a/mace/core/runtime/hexagon/hexagon_control_wrapper.cc b/mace/core/runtime/hexagon/hexagon_control_wrapper.cc index b3a0ff6fbd3bed960c91e42635f13cc711934f8d..6b952b88e22f16bb891dd410ff85febc8de05c49 100644 --- a/mace/core/runtime/hexagon/hexagon_control_wrapper.cc +++ b/mace/core/runtime/hexagon/hexagon_control_wrapper.cc @@ -107,11 +107,7 @@ bool HexagonControlWrapper::Config() { bool HexagonControlWrapper::Init() { LOG(INFO) << "Hexagon init"; -#ifdef MACE_USE_NNLIB_OLD - nn_id_ = hexagon_nn_init(); -#else MACE_CHECK(hexagon_nn_init(&nn_id_) == 0, "hexagon_nn_init failed"); -#endif ResetPerfInfo(); return true; } @@ -128,138 +124,116 @@ bool HexagonControlWrapper::SetupGraph(const NetDef &net_def, int64_t t0 = NowMicros(); // const node -#if defined(MACE_USE_NNLIB_CAF) || defined(MACE_USE_NNLIB_OLD) - std::thread const_thread([&]() -#endif - { - std::vector const_node_list; - for (const ConstTensor &const_tensor : net_def.tensors()) { - std::vector tensor_shape(const_tensor.dims().begin(), - const_tensor.dims().end()); - while (tensor_shape.size() < 4) { - tensor_shape.insert(tensor_shape.begin(), 1); - } - - hexagon_nn_const_node const_node; - const_node.node_id = node_id(const_tensor.node_id()); - const_node.tensor.batches = tensor_shape[0]; - const_node.tensor.height = tensor_shape[1]; - const_node.tensor.width = tensor_shape[2]; - const_node.tensor.depth = tensor_shape[3]; - - if (const_tensor.data_type() == DataType::DT_INT32 && - const_tensor.data_size() == 0) { - const_node.tensor.data = NULL; - const_node.tensor.dataLen = 0; - } else { - const_node.tensor.data = - const_cast(model_data + const_tensor.offset()); - const_node.tensor.dataLen = const_tensor.data_size() * - GetEnumTypeSize(const_tensor.data_type()); - } - const_node_list.push_back(const_node); - // 255 is magic number: why fastrpc limits sequence length to that? - if (const_node_list.size() >= 250) { - MACE_CHECK( - hexagon_nn_append_const_node_list(nn_id_, const_node_list.data(), - const_node_list.size()) == 0, - "append const node error"); - const_node_list.clear(); - } + std::vector const_node_list; + for (const ConstTensor &const_tensor : net_def.tensors()) { + std::vector tensor_shape(const_tensor.dims().begin(), + const_tensor.dims().end()); + while (tensor_shape.size() < 4) { + tensor_shape.insert(tensor_shape.begin(), 1); } - if (!const_node_list.empty()) { + hexagon_nn_const_node const_node; + const_node.node_id = node_id(const_tensor.node_id()); + const_node.tensor.batches = tensor_shape[0]; + const_node.tensor.height = tensor_shape[1]; + const_node.tensor.width = tensor_shape[2]; + const_node.tensor.depth = tensor_shape[3]; + + if (const_tensor.data_type() == DataType::DT_INT32 && + const_tensor.data_size() == 0) { + const_node.tensor.data = NULL; + const_node.tensor.dataLen = 0; + } else { + const_node.tensor.data = + const_cast(model_data + const_tensor.offset()); + const_node.tensor.dataLen = const_tensor.data_size() * + GetEnumTypeSize(const_tensor.data_type()); + } + const_node_list.push_back(const_node); + // 255 is magic number: why fastrpc limits sequence length to that? + if (const_node_list.size() >= 250) { MACE_CHECK( hexagon_nn_append_const_node_list(nn_id_, const_node_list.data(), const_node_list.size()) == 0, "append const node error"); + const_node_list.clear(); } - const_node_list.clear(); } -#if defined(MACE_USE_NNLIB_CAF) || defined(MACE_USE_NNLIB_OLD) - ); // NOLINT -#endif + + if (!const_node_list.empty()) { + MACE_CHECK( + hexagon_nn_append_const_node_list(nn_id_, const_node_list.data(), + const_node_list.size()) == 0, + "append const node error"); + } + const_node_list.clear(); // op node -#if defined(MACE_USE_NNLIB_CAF) || defined(MACE_USE_NNLIB_OLD) - std::thread op_thread([&]() -#endif - { - OpMap op_map; - op_map.Init(); - std::vector op_node_list; - std::vector> cached_inputs; - std::vector> cached_outputs; - std::vector inputs; - std::vector outputs; - - for (const OperatorDef &op : net_def.op()) { - int op_id = op_map.GetOpId(op.type()); - inputs.resize(op.node_input().size()); - for (int i = 0; i < op.node_input().size(); ++i) { - inputs[i].src_id = node_id(op.node_input()[i].node_id()); - inputs[i].output_idx = op.node_input()[i].output_port(); - } - outputs.resize(op.output_shape().size()); - for (int i = 0; i < op.output_shape().size(); ++i) { -#ifdef MACE_USE_NNLIB_OLD - outputs[i].max_size = op.out_max_byte_size()[i]; -#else - outputs[i].rank = op.output_shape()[i].dims().size(); - for (size_t j = 0; j < outputs[i].rank; ++j) { - outputs[i].max_sizes[j] = op.output_shape()[i].dims()[j]; - } - if (outputs[i].rank == 0) { - outputs[i].rank = 1; - outputs[i].max_sizes[0] = 1; - } - outputs[i].max_sizes[outputs[i].rank] = 0; - outputs[i].elementsize = GetEnumTypeSize( - static_cast(op.output_type()[i])); - outputs[i].zero_offset = 0; - outputs[i].stepsize = 0; -#endif + OpMap op_map; + op_map.Init(); + std::vector op_node_list; + std::vector> cached_inputs; + std::vector> cached_outputs; + std::vector inputs; + std::vector outputs; + + for (const OperatorDef &op : net_def.op()) { + int op_id = op_map.GetOpId(op.type()); + inputs.resize(op.node_input().size()); + for (int i = 0; i < op.node_input().size(); ++i) { + inputs[i].src_id = node_id(op.node_input()[i].node_id()); + inputs[i].output_idx = op.node_input()[i].output_port(); + } + outputs.resize(op.output_shape().size()); + for (int i = 0; i < op.output_shape().size(); ++i) { + outputs[i].rank = op.output_shape()[i].dims().size(); + for (size_t j = 0; j < outputs[i].rank; ++j) { + outputs[i].max_sizes[j] = op.output_shape()[i].dims()[j]; } - cached_inputs.push_back(inputs); - cached_outputs.push_back(outputs); - - hexagon_nn_padding_type padding_type = - static_cast(op.padding()); - - hexagon_nn_op_node op_node; - op_node.node_id = node_id(op.node_id()); - op_node.operation = op_id; - op_node.padding = padding_type; - op_node.inputs = cached_inputs.back().data(); - op_node.inputsLen = inputs.size(); - op_node.outputs = cached_outputs.back().data(); - op_node.outputsLen = outputs.size(); - - op_node_list.push_back(op_node); - if (op_node_list.size() >= 125) { - MACE_CHECK(hexagon_nn_append_node_list(nn_id_, op_node_list.data(), - op_node_list.size()) == 0, - "append node error"); - op_node_list.clear(); - cached_inputs.clear(); - cached_outputs.clear(); + if (outputs[i].rank == 0) { + outputs[i].rank = 1; + outputs[i].max_sizes[0] = 1; } + outputs[i].max_sizes[outputs[i].rank] = 0; + outputs[i].elementsize = GetEnumTypeSize( + static_cast(op.output_type()[i])); + outputs[i].zero_offset = 0; + outputs[i].stepsize = 0; } - - if (!op_node_list.empty()) { + cached_inputs.push_back(inputs); + cached_outputs.push_back(outputs); + + hexagon_nn_padding_type padding_type = + static_cast(op.padding()); + + hexagon_nn_op_node op_node; + op_node.node_id = node_id(op.node_id()); + op_node.operation = op_id; + op_node.padding = padding_type; + op_node.inputs = cached_inputs.back().data(); + op_node.inputsLen = inputs.size(); + op_node.outputs = cached_outputs.back().data(); + op_node.outputsLen = outputs.size(); + + op_node_list.push_back(op_node); + if (op_node_list.size() >= 125) { MACE_CHECK(hexagon_nn_append_node_list(nn_id_, op_node_list.data(), op_node_list.size()) == 0, "append node error"); + op_node_list.clear(); + cached_inputs.clear(); + cached_outputs.clear(); } - op_node_list.clear(); - cached_inputs.clear(); - cached_outputs.clear(); } -#if defined(MACE_USE_NNLIB_CAF) || defined(MACE_USE_NNLIB_OLD) - ); // NOLINT - const_thread.join(); - op_thread.join(); -#endif + + if (!op_node_list.empty()) { + MACE_CHECK(hexagon_nn_append_node_list(nn_id_, op_node_list.data(), + op_node_list.size()) == 0, + "append node error"); + } + op_node_list.clear(); + cached_inputs.clear(); + cached_outputs.clear(); // input info num_inputs_ = 0; @@ -460,7 +434,7 @@ bool HexagonControlWrapper::ExecuteGraph(const Tensor &input_tensor, bool HexagonControlWrapper::ExecuteGraphNew( const std::vector &input_tensors, std::vector *output_tensors) { - LOG(INFO) << "Execute graph new: " << nn_id_; + VLOG(2) << "Execute graph new: " << nn_id_; uint32_t num_inputs = static_cast(input_tensors.size()); uint32_t num_outputs = static_cast(output_tensors->size()); MACE_ASSERT(num_inputs_ == num_inputs, "Wrong inputs num"); diff --git a/mace/core/runtime/hexagon/hexagon_device.h b/mace/core/runtime/hexagon/hexagon_device.h new file mode 100644 index 0000000000000000000000000000000000000000..4c16a54c2e4198e9a95bcae8a6ac043648918b88 --- /dev/null +++ b/mace/core/runtime/hexagon/hexagon_device.h @@ -0,0 +1,32 @@ +// Copyright 2018 Xiaomi, Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MACE_CORE_RUNTIME_HEXAGON_HEXAGON_DEVICE_H_ +#define MACE_CORE_RUNTIME_HEXAGON_HEXAGON_DEVICE_H_ + +#include "mace/core/device.h" + +namespace mace { + +class HexagonDevice : public CPUDevice { + public: + HexagonDevice() : CPUDevice(0, AFFINITY_NONE, false) {} + + DeviceType device_type() const override { + return DeviceType::HEXAGON; + }; +}; + +} // namespace mace +#endif // MACE_CORE_RUNTIME_HEXAGON_HEXAGON_DEVICE_H_ diff --git a/mace/examples/android/mobilenet.yml b/mace/examples/android/mobilenet.yml index 302f29f52379637e133d70eea8a7711e4db01b34..09596ea87a56ea806cd730329dd30e77ee673aaa 100644 --- a/mace/examples/android/mobilenet.yml +++ b/mace/examples/android/mobilenet.yml @@ -18,7 +18,6 @@ models: - 1,1001 runtime: cpu+gpu limit_opencl_kernel_time: 0 - nnlib_graph_mode: 0 obfuscate: 0 winograd: 0 mobilenet_v2: @@ -36,7 +35,6 @@ models: - 1,1001 runtime: cpu+gpu limit_opencl_kernel_time: 0 - nnlib_graph_mode: 0 obfuscate: 0 winograd: 0 mobilenet_v1_quant: @@ -56,7 +54,6 @@ models: - 1,1001 runtime: cpu limit_opencl_kernel_time: 0 - nnlib_graph_mode: 0 obfuscate: 0 winograd: 0 quantize: 1 @@ -77,7 +74,6 @@ models: - 1,1001 runtime: cpu limit_opencl_kernel_time: 0 - nnlib_graph_mode: 0 obfuscate: 0 winograd: 0 quantize: 1 diff --git a/mace/libmace/mace.cc b/mace/libmace/mace.cc index bcaff34da3372019fd1fc4f15b566a3c62402d93..42959ca877e42002fb567138fb528ca11c086e6f 100644 --- a/mace/libmace/mace.cc +++ b/mace/libmace/mace.cc @@ -34,6 +34,7 @@ #ifdef MACE_ENABLE_HEXAGON #include "mace/core/runtime/hexagon/hexagon_control_wrapper.h" +#include "mace/core/runtime/hexagon/hexagon_device.h" #endif // MACE_ENABLE_HEXAGON namespace mace { @@ -387,7 +388,7 @@ MaceEngine::Impl::Impl(const MaceEngineConfig &config) #endif { LOG(INFO) << "Creating MaceEngine, MACE version: " << MaceVersion(); - if (device_type_ == DeviceType::CPU || device_type_ == DeviceType::HEXAGON) { + if (device_type_ == DeviceType::CPU) { device_.reset(new CPUDevice(config.impl_->num_threads(), config.impl_->cpu_affinity_policy(), config.impl_->use_gemmlowp())); @@ -405,6 +406,12 @@ MaceEngine::Impl::Impl(const MaceEngineConfig &config) config.impl_->use_gemmlowp())); } #endif +#ifdef MACE_ENABLE_HEXAGON + if (device_type_ == DeviceType::HEXAGON) { + device_.reset(new HexagonDevice()); + } +#endif + MACE_CHECK_NOTNULL(device_); } MaceStatus MaceEngine::Impl::Init( @@ -443,6 +450,7 @@ MaceStatus MaceEngine::Impl::Init( << "' does not belong to model's outputs " << MakeString(MapKeys(output_info_map_)); } + ws_->CreateTensor(output_name, device_->allocator(), DT_FLOAT); } #ifdef MACE_ENABLE_HEXAGON if (device_type_ == HEXAGON) { diff --git a/mace/ops/split.cc b/mace/ops/split.cc index 0f9dcc04bdb9c5b229f08c2b59f3e9551020f7a6..d7f33965493cb1e6d0d6124334fe546cc196da86 100644 --- a/mace/ops/split.cc +++ b/mace/ops/split.cc @@ -75,8 +75,8 @@ class SplitOp : public Operation { #pragma omp parallel for for (int outer_idx = 0; outer_idx < outer_size; ++outer_idx) { - int input_idx = outer_idx * input_channels * inner_size; - int output_idx = outer_idx * output_channels * inner_size; + index_t input_idx = outer_idx * input_channels * inner_size; + index_t output_idx = outer_idx * output_channels * inner_size; for (size_t i = 0; i < outputs_count; ++i) { if (DataTypeCanUseMemcpy(DataTypeToEnum::v())) { memcpy(output_ptrs[i]+output_idx, input_ptr+input_idx, diff --git a/mace/python/tools/BUILD b/mace/python/tools/BUILD index f89bacd5ec0afbeeeda1ae8b3590c50196354f86..a5a35397f0d1eacdeadccfefc02f75645dc9fbf3 100644 --- a/mace/python/tools/BUILD +++ b/mace/python/tools/BUILD @@ -16,7 +16,6 @@ py_library( "converter_tool/onnx_converter.py", "converter_tool/shape_inference.py", "converter_tool/tensorflow_converter.py", - "converter_tool/tf_dsp_converter.py", "converter_tool/transformer.py", "graph_util.py", ], diff --git a/mace/python/tools/converter.py b/mace/python/tools/converter.py index fe337e997f09ce9baff0f2bb7c357f7487b0831d..be9e65b9ba33979412b5e487eda8cee5fdf11a10 100644 --- a/mace/python/tools/converter.py +++ b/mace/python/tools/converter.py @@ -45,14 +45,14 @@ data_format_map = { def parse_data_type(data_type, device_type): - if device_type == cvt.DeviceType.CPU.value or\ + if device_type == cvt.DeviceType.CPU.value or \ device_type == cvt.DeviceType.GPU.value: if data_type == 'fp32_fp32': return mace_pb2.DT_FLOAT else: return mace_pb2.DT_HALF elif device_type == cvt.DeviceType.HEXAGON.value: - return mace_pb2.DT_UINT8 + return mace_pb2.DT_FLOAT else: print("Invalid device type: " + device_type) @@ -167,45 +167,39 @@ def main(unused_args): check_node.name = check_node_names[i] check_node.shape = parse_int_array_from_str(check_node_shapes[i]) option.add_check_node(check_node) + else: + option.check_nodes = option.output_nodes option.build() print("Transform model to one that can better run on device") - if FLAGS.runtime == 'dsp' and not option.quantize: - mace_check(FLAGS.platform == 'tensorflow', - 'DSP only supports tensorflow') - from mace.python.tools.converter_tool import tf_dsp_converter - converter = tf_dsp_converter.TensorflowDspConverter( + if FLAGS.platform == 'tensorflow': + from mace.python.tools.converter_tool import tensorflow_converter + converter = tensorflow_converter.TensorflowConverter( option, FLAGS.model_file) - output_graph_def = converter.run() + elif FLAGS.platform == 'caffe': + from mace.python.tools.converter_tool import caffe_converter + converter = caffe_converter.CaffeConverter(option, + FLAGS.model_file, + FLAGS.weight_file) + elif FLAGS.platform == 'onnx': + from mace.python.tools.converter_tool import onnx_converter + converter = onnx_converter.OnnxConverter(option, FLAGS.model_file) else: - if FLAGS.platform == 'tensorflow': - from mace.python.tools.converter_tool import tensorflow_converter - converter = tensorflow_converter.TensorflowConverter( - option, FLAGS.model_file) - elif FLAGS.platform == 'caffe': - from mace.python.tools.converter_tool import caffe_converter - converter = caffe_converter.CaffeConverter(option, - FLAGS.model_file, - FLAGS.weight_file) - elif FLAGS.platform == 'onnx': - from mace.python.tools.converter_tool import onnx_converter - converter = onnx_converter.OnnxConverter(option, FLAGS.model_file) - else: - six.print_("Mace do not support platorm %s yet." % FLAGS.platform, - file=sys.stderr) - exit(1) + six.print_("Mace do not support platorm %s yet." % FLAGS.platform, + file=sys.stderr) + exit(1) + + output_graph_def = converter.run() + mace_transformer = transformer.Transformer( + option, output_graph_def) + output_graph_def, quantize_activation_info = mace_transformer.run() + if FLAGS.runtime == 'dsp': + from mace.python.tools.converter_tool import hexagon_converter + converter = hexagon_converter.HexagonConverter( + option, output_graph_def, quantize_activation_info) output_graph_def = converter.run() - mace_transformer = transformer.Transformer( - option, output_graph_def) - output_graph_def, quantize_activation_info = mace_transformer.run() - - if FLAGS.runtime == 'dsp': - from mace.python.tools.converter_tool import hexagon_converter - converter = hexagon_converter.HexagonConverter( - option, output_graph_def, quantize_activation_info) - output_graph_def = converter.run() model_saver.save_model( option, output_graph_def, model_checksum, weight_checksum, diff --git a/mace/python/tools/converter_tool/base_converter.py b/mace/python/tools/converter_tool/base_converter.py index a2158081c1575465712f928f6281525521c0ff62..9bfd690991b59b609b36ee216b357e36dffd8b21 100644 --- a/mace/python/tools/converter_tool/base_converter.py +++ b/mace/python/tools/converter_tool/base_converter.py @@ -257,6 +257,7 @@ class TransformerRule(Enum): FOLD_EMBEDDING_LOOKUP = 35 TRANSPOSE_CAFFE_RESHAPE_AND_FLATTEN = 36 FOLD_FC_RESHAPE = 37 + TRANSFORM_CHANNEL_SHUFFLE = 38 class ConverterInterface(object): @@ -373,7 +374,7 @@ class ConverterOption(object): @input_nodes.setter def input_nodes(self, input_nodes): - for node in input_nodes: + for node in input_nodes.values(): self._input_nodes[node.name] = node def add_input_node(self, input_node): @@ -381,7 +382,7 @@ class ConverterOption(object): @output_nodes.setter def output_nodes(self, output_nodes): - for node in output_nodes: + for node in output_nodes.values(): self.output_nodes[node.name] = node def add_output_node(self, output_node): @@ -389,7 +390,7 @@ class ConverterOption(object): @check_nodes.setter def check_nodes(self, check_nodes): - for node in check_nodes: + for node in check_nodes.values(): self.check_nodes[node.name] = node def add_check_node(self, check_node): @@ -463,6 +464,7 @@ class ConverterOption(object): TransformerRule.TRANSFORM_GLOBAL_CONV_TO_FC, TransformerRule.RESHAPE_FC_WEIGHT, TransformerRule.FOLD_FC_RESHAPE, + TransformerRule.TRANSFORM_CHANNEL_SHUFFLE, # Model data format related transformation TransformerRule.TRANSPOSE_FILTERS, TransformerRule.TRANSPOSE_DATA_FORMAT, diff --git a/mace/python/tools/converter_tool/hexagon_converter.py b/mace/python/tools/converter_tool/hexagon_converter.py index 39d6c5e3156dfbd34e136b01f3780ea7b55c9101..d20e7ef262b4e8a57652ce5cc5539f0cda2d7b8b 100644 --- a/mace/python/tools/converter_tool/hexagon_converter.py +++ b/mace/python/tools/converter_tool/hexagon_converter.py @@ -104,7 +104,6 @@ class HexagonConverter(base_converter.ConverterInterface): output_name = self._option.output_nodes.values()[0].name else: output_name = self._option.check_nodes.values()[0].name - output_name = MaceKeyword.mace_output_node_name + '_' + output_name output_name = normalize_name(output_name) self._model = graph_util.sort_mace_graph(self._model, output_name) @@ -311,9 +310,8 @@ class HexagonConverter(base_converter.ConverterInterface): return tensor.name def add_input_output_node(self): - input_node = self._option.input_nodes.values()[0] for op in self._model.op: - if op.name == input_node.name: + if op.name.startswith(MaceKeyword.mace_input_node_name): del op.input[0] break @@ -324,8 +322,7 @@ class HexagonConverter(base_converter.ConverterInterface): output_name = self._option.check_nodes.values()[0].name output_name = normalize_name(output_name) for op in self._model.op: - if op.name.startswith(MaceKeyword.mace_output_node_name) \ - and op.name.find(output_name) != -1: + if op.name == output_name: output_node = op break mace_check(output_node is not None, @@ -348,8 +345,6 @@ class HexagonConverter(base_converter.ConverterInterface): node_id_counter += 1 node_id_map[op.name] = op.node_id for ipt in op.input: - if ipt.startswith(MaceKeyword.mace_input_node_name): - ipt = ipt[len(MaceKeyword.mace_input_node_name + '_'):] op_name, port = get_op_and_port_from_tensor(ipt) node_id = node_id_map[op_name] node_input = op.node_input.add() diff --git a/mace/python/tools/converter_tool/tensorflow_converter.py b/mace/python/tools/converter_tool/tensorflow_converter.py index 9390d5a11af62f2b9cbbe55f2c98ecd4b23ed1a8..825c3c000431895a2ad7443f4aeaeb2211b06e18 100644 --- a/mace/python/tools/converter_tool/tensorflow_converter.py +++ b/mace/python/tools/converter_tool/tensorflow_converter.py @@ -271,11 +271,15 @@ class TensorflowConverter(base_converter.ConverterInterface): print("Run transform_graph: %s" % TFTransformGraphOptions[ option.device]) - transformed_graph_def = TransformGraph(tf_graph_def, - option.input_nodes.keys(), - option.output_nodes.keys(), - TFTransformGraphOptions[ - option.device]) + try: + transformed_graph_def = TransformGraph(tf_graph_def, + option.input_nodes.keys(), + option.output_nodes.keys(), + TFTransformGraphOptions[ + option.device]) + except Exception as ex: + print("Failed to transform graph using tf tool: %s" % ex) + transformed_graph_def = tf_graph_def with tf.Session() as session: with session.graph.as_default() as graph: diff --git a/mace/python/tools/converter_tool/tf_dsp_converter.py b/mace/python/tools/converter_tool/tf_dsp_converter.py deleted file mode 100644 index 501c9933a6d22ff128d87e399fd2128ed91f70a1..0000000000000000000000000000000000000000 --- a/mace/python/tools/converter_tool/tf_dsp_converter.py +++ /dev/null @@ -1,698 +0,0 @@ -# Copyright 2018 Xiaomi, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from mace.proto import mace_pb2 -from mace.python.tools.converter_tool import base_converter -from mace.python.tools import graph_util -from mace.python.tools.convert_util import mace_check - -import six -import tensorflow as tf -from tensorflow.core.framework import tensor_shape_pb2 -from operator import mul -import numpy as np - - -class DspOps(object): - def __init__(self): - self.dsp_ops = { - 'INPUT': 'INPUT"', - 'OUTPUT': 'OUTPUT', - 'NoOp': 'Nop', - 'FLATTEN': 'Flatten', - 'Identity': 'Nop', - 'Placeholder': 'INPUT', - 'Const': 'Const', - 'QuantizedConv2D': 'QuantizedConv2d_8x8to32', - 'QuantizedMatMul': 'QuantizedMatMul_8x8to32', - 'QuantizeDownAndShrinkRange': 'QuantizeDownAndShrinkRange_32to8', - 'QuantizedRelu': 'QuantizedRelu_8', - 'QuantizedReluX': 'QuantizedReluX_8', - 'QuantizedMaxPool': 'QuantizedMaxPool_8', - 'QuantizedAvgPool': 'QuantizedAvgPool_8', - 'QuantizedConcat': 'QuantizedConcat_8', - 'QuantizedBiasAdd': 'QuantizedBiasAdd_8p8to32', - 'QuantizedResizeBilinear': 'QuantizedResizeBilinear_8', - 'QuantizedSpaceToBatchND': 'QuantizedSpaceToBatchND_8', - 'QuantizedBatchToSpaceND': 'QuantizedBatchToSpaceND_8', - 'QuantizedSoftmax': 'QuantizedSoftmax_8', - 'QuantizedTanh': 'QuantizedTanh_8', - 'Min': 'Min_f', - 'Max': 'Max_f', - 'QuantizeV2': 'Quantize', - 'Dequantize': 'Dequantize', - 'Softmax': 'Softmax_f', - 'Reshape': 'Reshape', - 'QuantizedReshape': 'QuantizedReshape', - 'Sigmoid': 'Sigmoid_f', - 'Slice': 'Slice_f', - 'Add': 'Add_f', - 'Mul': 'Mul_f', - 'Requantize': 'Requantize_32to8', - 'RequantizationRange': 'RequantizationRange_32', - 'Sub': 'Sub_f', - 'Pack': 'Pack_int32', - 'StridedSlice': 'StridedSlice_f', - 'ExpandDims': 'ExpandDims_f', - 'QuantizedMul': 'QuantizedMul_8x8to32', - 'QuantizedAdd': 'QuantizedAdd_8p8to32', - 'Pad': 'Pad_f', - 'SpaceToBatchND': 'SpaceToBatchND_f', - 'BatchToSpaceND': 'BatchToSpaceND_f', - 'ResizeBilinear': 'ResizeBilinear_f', - 'ConcatV2': 'ConcatV2_f', - 'Conv2DBackpropInput': 'Deconv_f', - 'Tanh': 'Tanh_f', - 'Split': 'Split_f', - 'Transpose': 'Transpose_f', - 'Concat': 'Concat_f', - 'AddN': 'AddN_f', - } - - def has_op(self, tf_op): - return tf_op in self.dsp_ops - - def map_nn_op(self, tf_op): - if tf_op not in self.dsp_ops: - raise Exception('Could not map nn op for: ', tf_op) - return self.dsp_ops[tf_op] - - -TF_DTYPE_2_MACE_DTYPE_MAP = { - tf.float32: mace_pb2.DT_FLOAT, - tf.half: mace_pb2.DT_HALF, - tf.int32: mace_pb2.DT_INT32, - tf.qint32: mace_pb2.DT_INT32, - tf.quint8: mace_pb2.DT_UINT8, - tf.uint8: mace_pb2.DT_UINT8, -} - - -def tf_dtype_2_mace_dtype(tf_dtype): - mace_dtype = TF_DTYPE_2_MACE_DTYPE_MAP.get(tf_dtype, None) - if not mace_dtype: - raise Exception("Not supported tensorflow dtype: " + tf_dtype) - return mace_dtype - - -padding_mode = { - 'NA': 0, - 'SAME': 1, - 'VALID': 2, - 'MIRROR_REFLECT': 3, - 'MIRROR_SYMMETRIC': 4, - 'SAME_CAFFE': 5 -} - - -def get_tensor_name_from_op(op_name, port): - return op_name + ':' + str(port) - - -def get_node_from_map(op_map, op_or_tensor_name): - op_name = op_or_tensor_name.split(':')[0] - return op_map[op_name] - - -def get_op_and_port_from_tensor(tensor_name): - op, port = tensor_name.split(':') - port = int(port) - return op, port - - -def max_elem_size(tensor): - if len(tensor.shape.as_list()) == 0: - return tensor.dtype.size - else: - return reduce(mul, tensor.shape.as_list()) * tensor.dtype.size - - -def find_dtype(tensor_dtype): - if tensor_dtype == tf.float32: - return mace_pb2.DT_FLOAT - elif tensor_dtype == tf.uint8 or tensor_dtype == tf.quint8: - return mace_pb2.DT_UINT8 - elif tensor_dtype == tf.int32 or tensor_dtype == tf.qint32: - return mace_pb2.DT_INT32 - else: - raise Exception('Unsupported data type: ', tensor_dtype) - - -def has_padding_and_strides(op): - return 'padding' in op.node_def.attr and 'strides' in op.node_def.attr - - -def is_node_flatten_reshape(op): - return op.type == 'Reshape' and len(op.outputs[0].shape) == 1 - - -def get_input_tensor(op, index): - input_tensor = op.inputs[index] - if input_tensor.op.type == 'Reshape': - input_tensor = get_input_tensor(input_tensor.op, 0) - return input_tensor - - -def add_shape_const_node(net_def, op, values, name): - tensor = net_def.tensors.add() - node_name = op.name + '/' + name - tensor.name = node_name + ':0' - tensor.data_type = mace_pb2.DT_INT32 - tensor.dims.extend(values) - return tensor.name - - -def convert_op_outputs(mace_op_def, tf_op): - mace_op_def.out_max_byte_size.extend( - [max_elem_size(output) for output in tf_op.outputs]) - mace_op_def.output_type.extend( - [tf_dtype_2_mace_dtype(output.dtype) for output in tf_op.outputs]) - output_shapes = [] - for output in tf_op.outputs: - output_shape = mace_pb2.OutputShape() - shape_list = output.shape.as_list() - if not shape_list: - shape_list = [1] - elif len(shape_list) == 2: - shape_list = [1, 1, shape_list[0], shape_list[1]] - output_shape.dims.extend(shape_list) - output_shapes.append(output_shape) - mace_op_def.output_shape.extend(output_shapes) - - -def convert_ops(unresolved_ops, resolved_ops, net_def, dsp_ops): - first_op = unresolved_ops[0] - print('Op: ', first_op.name, first_op.type, first_op.outputs[0].shape) - - if first_op.name in resolved_ops: - pass - - elif first_op.type == 'Const': - print('Add const node: ', first_op.name) - tf_tensor = first_op.outputs[0].eval() - tensor = net_def.tensors.add() - tensor.name = first_op.outputs[0].name - tensor.data_type = find_dtype(first_op.outputs[0].dtype) - shape = list(tf_tensor.shape) - if len(shape) > 0: - tensor.dims.extend(shape) - if first_op.outputs[0].dtype == tf.float32: - tensor.float_data.extend(tf_tensor.astype(float).flat) - elif first_op.outputs[0].dtype == tf.int32 or \ - first_op.outputs[0].dtype == tf.int8 or \ - first_op.outputs[0].dtype == tf.int16 or \ - first_op.outputs[0].dtype == tf.quint8 or \ - first_op.outputs[0].dtype == tf.quint16: - tensor.int32_data.extend(tf_tensor.astype(int).flat) - elif first_op.type == 'Shape': - resolved_ops.add(first_op.name) - else: - op_def = net_def.op.add() - op_def.name = first_op.name - op_def.type = dsp_ops.map_nn_op(first_op.type) - op_def.padding = padding_mode['NA'] - - if len(first_op.outputs) > 0 and first_op.type == 'Dequantize' \ - and len(first_op.outputs[0].consumers()) > 0 \ - and (first_op.outputs[0].consumers()[0].type == 'SpaceToBatchND' or - first_op.outputs[0].consumers()[0].type == 'BatchToSpaceND'): # noqa - input_tensor = first_op.inputs[0] - min_tensor = first_op.inputs[1] - max_tensor = first_op.inputs[2] - s2b_op = first_op.outputs[0].consumers()[0] - reshape_op = s2b_op.outputs[0].consumers()[0] - min_op = reshape_op.outputs[0].consumers()[0] - max_op = reshape_op.outputs[0].consumers()[1] - quantize_op = min_op.outputs[0].consumers()[0] - resolved_ops.add(s2b_op.name) - resolved_ops.add(reshape_op.name) - resolved_ops.add(min_op.name) - resolved_ops.add(max_op.name) - resolved_ops.add(quantize_op.name) - - op_def.name = quantize_op.name - op_def.type = dsp_ops.map_nn_op('Quantized' + s2b_op.type) - op_def.input.append(input_tensor.name) - op_def.input.extend([t.name for t in s2b_op.inputs[1:]]) - op_def.input.extend([min_tensor.name, max_tensor.name]) - convert_op_outputs(op_def, quantize_op) - elif (len(first_op.outputs) > 0 and - first_op.type == 'QuantizedReshape' and - len(first_op.outputs[0].consumers()) > 0 and - first_op.outputs[0].consumers()[0].type == 'Dequantize' and - len(first_op.outputs[0].consumers()[0].outputs[0].consumers()) > 0 and # noqa - first_op.outputs[0].consumers()[0].outputs[0].consumers()[0].type == 'Softmax'): # noqa - input_tensor = first_op.inputs[0] - min_tensor = first_op.inputs[2] - max_tensor = first_op.inputs[3] - dequantize_op = first_op.outputs[0].consumers()[0] - softmax_op = dequantize_op.outputs[0].consumers()[0] - reshape_op = softmax_op.outputs[0].consumers()[0] - min_op = reshape_op.outputs[0].consumers()[0] - max_op = reshape_op.outputs[0].consumers()[1] - quantize_op = min_op.outputs[0].consumers()[0] - quantize_reshape_op = quantize_op.outputs[0].consumers()[0] - - resolved_ops.add(dequantize_op.name) - resolved_ops.add(softmax_op.name) - resolved_ops.add(reshape_op.name) - resolved_ops.add(min_op.name) - resolved_ops.add(max_op.name) - resolved_ops.add(quantize_op.name) - resolved_ops.add(quantize_reshape_op.name) - - op_def.name = quantize_reshape_op.name - op_def.type = dsp_ops.map_nn_op('QuantizedSoftmax') - op_def.input.extend( - [input_tensor.name, min_tensor.name, max_tensor.name]) - convert_op_outputs(op_def, quantize_reshape_op) - # remove Squeeze - elif (len(first_op.outputs) > 0 and - first_op.type == 'Requantize' and - len(first_op.outputs[0].consumers()) > 0 and - first_op.outputs[0].consumers()[0].type == 'Dequantize' and - len(first_op.outputs[0].consumers()[0].outputs[0].consumers()) > 0 and # noqa - first_op.outputs[0].consumers()[0].outputs[0].consumers()[0].type == 'Squeeze'): # noqa - dequantize_op = first_op.outputs[0].consumers()[0] - squeeze_op = dequantize_op.outputs[0].consumers()[0] - reshape_op = squeeze_op.outputs[0].consumers()[0] - if reshape_op.type == 'Shape': - reshape_op = squeeze_op.outputs[0].consumers()[1] - min_op = reshape_op.outputs[0].consumers()[0] - max_op = reshape_op.outputs[0].consumers()[1] - quantize_op = min_op.outputs[0].consumers()[0] - - resolved_ops.add(dequantize_op.name) - resolved_ops.add(squeeze_op.name) - resolved_ops.add(reshape_op.name) - resolved_ops.add(min_op.name) - resolved_ops.add(max_op.name) - resolved_ops.add(quantize_op.name) - - op_def.name = quantize_op.name - op_def.input.extend([t.name for t in first_op.inputs]) - convert_op_outputs(op_def, quantize_op) - - # Squeeze -> Softmax - next_op = quantize_op.outputs[0].consumers()[0] \ - if len(quantize_op.outputs) > 0 else None - dequantize_op = next_op.outputs[0].consumers()[0] \ - if next_op and len(next_op.outputs) > 0 and \ - next_op.type == 'QuantizedReshape' and \ - len(next_op.outputs[0].consumers()) > 0 else None - softmax_op = dequantize_op.outputs[0].consumers()[0] \ - if dequantize_op and len(dequantize_op.outputs) > 0 and \ - dequantize_op.type == 'Dequantize' and \ - len(dequantize_op.outputs[0].consumers()) > 0 else None - if softmax_op and softmax_op.type == 'Softmax': - reshape_op = softmax_op.outputs[0].consumers()[0] - min_op = reshape_op.outputs[0].consumers()[0] - max_op = reshape_op.outputs[0].consumers()[1] - quantize_op = min_op.outputs[0].consumers()[0] - quantize_reshape_op = quantize_op.outputs[0].consumers()[0] - - resolved_ops.add(next_op.name) - resolved_ops.add(dequantize_op.name) - resolved_ops.add(softmax_op.name) - resolved_ops.add(reshape_op.name) - resolved_ops.add(min_op.name) - resolved_ops.add(max_op.name) - resolved_ops.add(quantize_op.name) - resolved_ops.add(quantize_reshape_op.name) - - softmax_op_def = net_def.op.add() - softmax_op_def.padding = padding_mode['NA'] - softmax_op_def.name = quantize_reshape_op.name - softmax_op_def.type = dsp_ops.map_nn_op('QuantizedSoftmax') - softmax_op_def.input.extend([ - get_tensor_name_from_op(op_def.name, 0), - get_tensor_name_from_op(op_def.name, 1), - get_tensor_name_from_op(op_def.name, 2)]) - convert_op_outputs(softmax_op_def, quantize_reshape_op) - - elif len(first_op.outputs) > 0 and first_op.type == 'Dequantize' and \ - len(first_op.outputs[0].consumers()) > 0 and \ - first_op.outputs[0].consumers()[0].type == 'Tanh': - input_tensor = first_op.inputs[0] - min_tensor = first_op.inputs[1] - max_tensor = first_op.inputs[2] - tanh_op = first_op.outputs[0].consumers()[0] - - # if not last op - resolved_ops.add(tanh_op.name) - if tanh_op.outputs[0].consumers(): - reshape_op = tanh_op.outputs[0].consumers()[0] - min_op = reshape_op.outputs[0].consumers()[0] - max_op = reshape_op.outputs[0].consumers()[1] - quantize_op = min_op.outputs[0].consumers()[0] - resolved_ops.add(reshape_op.name) - resolved_ops.add(min_op.name) - resolved_ops.add(max_op.name) - resolved_ops.add(quantize_op.name) - - op_def.name = quantize_op.name - op_def.type = dsp_ops.map_nn_op('Quantized' + tanh_op.type) - op_def.input.extend( - [input_tensor.name, min_tensor.name, max_tensor.name]) - convert_op_outputs(op_def, quantize_op) - # tanh is last op - else: - op_def.name = tanh_op.name + '/QuantizedTanh' - op_def.type = dsp_ops.map_nn_op('Quantized' + tanh_op.type) - op_def.input.extend( - [input_tensor.name, min_tensor.name, max_tensor.name]) - op_def.out_max_byte_size.extend([ - max_elem_size(input_tensor), - max_elem_size(min_tensor), - max_elem_size(max_tensor) - ]) - op_def.output_type.extend( - [mace_pb2.DT_UINT8, mace_pb2.DT_FLOAT, mace_pb2.DT_FLOAT]) - output_shapes = [] - for output in first_op.inputs: - output_shape = mace_pb2.OutputShape() - output_shape.dims.extend(output.shape.as_list()) - output_shapes.append(output_shape) - op_def.output_shape.extend(output_shapes) - - new_tanh_op_def = net_def.op.add() - new_tanh_op_def.name = tanh_op.name - new_tanh_op_def.type = dsp_ops.map_nn_op('Dequantize') - new_tanh_op_def.input.extend([ - get_tensor_name_from_op(op_def.name, 0), - get_tensor_name_from_op(op_def.name, 1), - get_tensor_name_from_op(op_def.name, 2) - ]) - convert_op_outputs(new_tanh_op_def, tanh_op) - elif has_padding_and_strides(first_op): - op_def.padding = padding_mode[first_op.get_attr('padding')] - op_def.input.extend([t.name for t in first_op.inputs]) - if 'ksize' in first_op.node_def.attr: - ksize = first_op.get_attr('ksize') - ksize_tensor = add_shape_const_node(net_def, first_op, ksize, - 'ksize') - op_def.input.extend([ksize_tensor]) - strides = first_op.get_attr('strides') - strides_tensor = add_shape_const_node(net_def, first_op, strides, - 'strides') - op_def.input.extend([strides_tensor]) - convert_op_outputs(op_def, first_op) - elif is_node_flatten_reshape(first_op): - op_def.type = 'Flatten' - op_def.input.extend([first_op.inputs[0].name]) - convert_op_outputs(op_def, first_op) - elif dsp_ops.has_op(first_op.type): - op_def.input.extend([t.name for t in first_op.inputs]) - convert_op_outputs(op_def, first_op) - else: - raise Exception('Unsupported op: ', first_op) - - resolved_ops.add(first_op.name) - - del unresolved_ops[0] - - -def add_output_node(net_def, output_node): - op_def = net_def.op.add() - op_def.name = '__output__' - op_def.type = 'OUTPUT' - op_def.input.extend([get_tensor_name_from_op(output_node, 0)]) - - -def reverse_batch_to_space_and_biasadd(net_def): - tensor_map = {} - for tensor in net_def.tensors: - tensor_map[tensor.name] = tensor - op_map = {} - for op in net_def.op: - op_map[op.name] = op - consumers = {} - for op in net_def.op: - for ipt in op.input: - if ipt not in consumers: - consumers[ipt] = [] - consumers[ipt].append(op) - - new_ops = [] - skip_ops = set() - visited_ops = set() - - for op in net_def.op: - if op.name in visited_ops: - pass - # pattern: QConv -> RR -> R -> QB2S -> QBiasAdd -> RR -> R - success = False - if op.type == 'Requantize_32to8': - biasadd_requantize_op = op - biasadd_op = get_node_from_map(op_map, - biasadd_requantize_op.input[0]) - if biasadd_op.type == 'QuantizedBiasAdd_8p8to32': - b2s_op = get_node_from_map(op_map, biasadd_op.input[0]) - if b2s_op.type == 'QuantizedBatchToSpaceND_8': - conv_requantize_op = get_node_from_map( - op_map, b2s_op.input[0]) - conv_op = get_node_from_map(op_map, - conv_requantize_op.input[0]) - if conv_op.type == 'QuantizedConv2d_8x8to32': - new_biasadd_op = mace_pb2.OperatorDef() - new_biasadd_op.CopyFrom(biasadd_op) - new_biasadd_op.input[0] = get_tensor_name_from_op( - conv_requantize_op.name, 0) - new_biasadd_op.input[2] = get_tensor_name_from_op( - conv_requantize_op.name, 1) - new_biasadd_op.input[3] = get_tensor_name_from_op( - conv_requantize_op.name, 2) - new_biasadd_op.out_max_byte_size[ - 0] = conv_requantize_op.out_max_byte_size[0] * 4 - - new_biasadd_requantize_op = mace_pb2.OperatorDef() - new_biasadd_requantize_op.CopyFrom( - biasadd_requantize_op) - new_biasadd_requantize_op.out_max_byte_size[ - 0] = new_biasadd_op.out_max_byte_size[0] / 4 - - new_b2s_op = mace_pb2.OperatorDef() - new_b2s_op.CopyFrom(b2s_op) - new_b2s_op.input[0] = get_tensor_name_from_op( - biasadd_requantize_op.name, 0) - new_b2s_op.input[3] = get_tensor_name_from_op( - biasadd_requantize_op.name, 1) - new_b2s_op.input[4] = get_tensor_name_from_op( - biasadd_requantize_op.name, 2) - - new_ops.extend([ - new_biasadd_op, new_biasadd_requantize_op, - new_b2s_op - ]) - skip_ops = skip_ops.union([ - biasadd_op.name, biasadd_requantize_op.name, - b2s_op.name - ]) - visited_ops.add(op.name) - - follow_ops = consumers[get_tensor_name_from_op( - biasadd_requantize_op.name, 0)] - for follow_op in follow_ops: - new_follow_op = mace_pb2.OperatorDef() - new_follow_op.CopyFrom(follow_op) - for i in six.moves.range(len(follow_op.input)): - for k in six.moves.range(3): - if new_follow_op.input[i] == get_tensor_name_from_op( # noqa - biasadd_requantize_op.name, k): - new_follow_op.input[i] = get_tensor_name_from_op( # noqa - b2s_op.name, k) - new_ops.append(new_follow_op) - skip_ops.add(follow_op.name) - visited_ops.add(follow_op.name) - - visited_ops.add(op.name) - - new_net_def = mace_pb2.NetDef() - new_net_def.tensors.extend(tensor_map.values()) - new_net_def.op.extend([op for op in net_def.op if op.name not in skip_ops]) - new_net_def.op.extend(new_ops) - - return new_net_def - - -def add_node_id(net_def): - node_id_counter = 0 - node_id_map = {} - for tensor in net_def.tensors: - tensor.node_id = node_id_counter - node_id_counter += 1 - tensor_op, port = get_op_and_port_from_tensor(tensor.name) - node_id_map[tensor_op] = tensor.node_id - - for op in net_def.op: - op.node_id = node_id_counter - node_id_counter += 1 - node_id_map[op.name] = op.node_id - for ipt in op.input: - op_name, port = get_op_and_port_from_tensor(ipt) - node_id = node_id_map[op_name] - node_input = op.node_input.add() - node_input.node_id = node_id - node_input.output_port = int(port) - - return net_def - - -def add_input_output_info(net_def, input_node, output_node, graph, dtype): - input_tensor = graph.get_tensor_by_name( - get_tensor_name_from_op(input_node, 0)) - output_tensor = graph.get_tensor_by_name( - get_tensor_name_from_op(output_node, 0)) - - input_info = net_def.input_info.add() - input_info.name = input_node - input_info.dims.extend(input_tensor.shape.as_list()) - input_info.data_type = dtype - if dtype == mace_pb2.DT_UINT8: - for i in six.moves.range(2): - input_info = net_def.input_info.add() - input_info.dims.extend([1, 1, 1, 1]) - input_info.data_type = mace_pb2.DT_FLOAT - - output_info = net_def.output_info.add() - output_info.name = output_node - output_info.dims.extend(output_tensor.shape.as_list()) - output_info.data_type = dtype - if dtype == mace_pb2.DT_UINT8: - for i in six.moves.range(2): - output_info = net_def.output_info.add() - output_info.dims.extend([1, 1, 1, 1]) - output_info.data_type = mace_pb2.DT_FLOAT - - return net_def - - -def fuse_quantize(net_def): - tensor_map = {} - for tensor in net_def.tensors: - tensor_map[tensor.name] = tensor - op_map = {} - for op in net_def.op: - op_map[op.name] = op - consumers = {} - for op in net_def.op: - for ipt in op.input: - if ipt not in consumers: - consumers[ipt] = [] - consumers[ipt].append(op) - - skip_ops = set() - new_ops = [] - skip_tensors = set() - - # INPUT->Flatten->Minf, Maxf->Quantize - for op in net_def.op: - if op.type == 'INPUT': - input_op = op - flatten_op = None - quantize_op = None - for o in consumers[get_tensor_name_from_op(input_op.name, 0)]: - if o.type == 'Flatten': - flatten_op = o - elif o.type == 'Quantize': - quantize_op = o - if quantize_op is not None: - minf_op, maxf_op = consumers[get_tensor_name_from_op( - flatten_op.name, 0)] - skip_ops = skip_ops.union( - [flatten_op.name, minf_op.name, maxf_op.name]) - skip_tensors = skip_tensors.union( - [minf_op.input[0], maxf_op.input[0], - quantize_op.input[1], quantize_op.input[2]]) - quantize_op.type = 'AutoQuantize' - del quantize_op.input[1:] - - new_net_def = mace_pb2.NetDef() - new_net_def.tensors.extend([ - tensor for tensor in net_def.tensors if tensor.name not in skip_tensors - ]) - new_net_def.op.extend([op for op in net_def.op if op.name not in skip_ops]) - new_net_def.op.extend(new_ops) - return new_net_def - - -class TensorflowDspConverter(base_converter.ConverterInterface): - def __init__(self, option, src_model_file): - self._option = option - self._mace_net_def = mace_pb2.NetDef() - - # import tensorflow graph - tf_graph_def = tf.GraphDef() - with tf.gfile.Open(src_model_file, 'rb') as f: - tf_graph_def.ParseFromString(f.read()) - - self._placeholders = {} - self.add_shape_info(tf_graph_def) - - with tf.Session() as session: - with session.graph.as_default() as graph: - tf.import_graph_def(tf_graph_def, name='') - self._tf_graph = graph - - def run(self): - ops = self._tf_graph.get_operations() - dsp_ops = DspOps() - resolved_ops = set() - - mace_check(len(self._option.input_nodes) == 1 - and len(self._option.output_nodes) == 1, - 'dsp only support single input and output') - input_node = self._option.input_nodes.values()[0].name - output_node = self._option.output_nodes.values()[0].name - - # convert const node - unresolved_ops = [op for op in ops if op.type == 'Const'] - with tf.Session() as session: - while len(unresolved_ops) > 0: - convert_ops(unresolved_ops, resolved_ops, self._mace_net_def, - dsp_ops) - - # convert op node - unresolved_ops = [op for op in ops if op.type != 'Const'] - while len(unresolved_ops) > 0: - convert_ops(unresolved_ops, resolved_ops, self._mace_net_def, - dsp_ops) - - add_output_node(self._mace_net_def, output_node) - net_def = reverse_batch_to_space_and_biasadd(self._mace_net_def) - net_def = fuse_quantize(net_def) - - sorted_net_def = graph_util.sort_mace_graph(net_def, '__output__') - net_def_with_node_id = add_node_id(sorted_net_def) - - dtype = mace_pb2.DT_FLOAT - final_net_def = add_input_output_info( - net_def_with_node_id, input_node, output_node, - self._tf_graph, dtype) - - return final_net_def - - def add_shape_info(self, tf_graph_def): - for node in tf_graph_def.node: - for input_node in self._option.input_nodes.values(): - if node.name == input_node.name or \ - node.name + ':0' == input_node.name: - del node.attr['shape'].shape.dim[:] - node.attr['shape'].shape.dim.extend([ - tensor_shape_pb2.TensorShapeProto.Dim(size=i) for i in - input_node.shape - ]) - self._placeholders[node.name + ':0'] = \ - np.zeros(shape=input_node.shape, dtype=float) diff --git a/mace/python/tools/converter_tool/transformer.py b/mace/python/tools/converter_tool/transformer.py index 657d344579c2f305a74d0120bba4e0159368aa4c..49cba5b82a9fc125dd25046e11d3059bafe34f75 100644 --- a/mace/python/tools/converter_tool/transformer.py +++ b/mace/python/tools/converter_tool/transformer.py @@ -100,6 +100,8 @@ class Transformer(base_converter.ConverterInterface): self.check_quantize_info, TransformerRule.TRANSPOSE_CAFFE_RESHAPE_AND_FLATTEN: self.transform_caffe_reshape_and_flatten, + TransformerRule.TRANSFORM_CHANNEL_SHUFFLE: + self.transform_channel_shuffle, } self._option = option @@ -122,8 +124,7 @@ class Transformer(base_converter.ConverterInterface): changed = transformer() if not changed: break - - self.add_check_nodes() + self.delete_after_check_nodes() return self._model, self._quantize_activation_info def filter_format(self): @@ -232,7 +233,7 @@ class Transformer(base_converter.ConverterInterface): # that the op is identity op and its input is a tensor. mace_check(len(op.output) == 1 and len(op.input) == 1, "cannot remove op that w/o replace op specified" - " and input/output length > 1" + str(op)) + " and input/output length > 1\n" + str(op)) for consumer_op in self._consumers.get(op.output[0], []): self.replace(consumer_op.input, op.output[0], op.input[0]) @@ -278,7 +279,8 @@ class Transformer(base_converter.ConverterInterface): input_info.dims.extend(input_node.shape) input_info.data_type = mace_pb2.DT_FLOAT - for output_node in self._option.output_nodes.values(): + output_nodes = self._option.check_nodes.values() + for output_node in output_nodes: output_info = net.output_info.add() output_info.name = output_node.name output_info.data_format = output_node.data_format.value @@ -1367,7 +1369,8 @@ class Transformer(base_converter.ConverterInterface): + '_' + input_node.name input_name_map[input_node.name] = new_input_name - for output_node in self._option.output_nodes.values(): + output_nodes = self._option.check_nodes.values() + for output_node in output_nodes: new_output_name = MaceKeyword.mace_output_node_name \ + '_' + output_node.name output_name_map[output_node.name] = new_output_name @@ -1378,7 +1381,12 @@ class Transformer(base_converter.ConverterInterface): op.input[i] = input_name_map[op.input[i]] for i in range(len(op.output)): if op.output[i] in output_name_map: - op.output[i] = output_name_map[op.output[i]] + op.name = MaceKeyword.mace_output_node_name \ + + '_' + op.name + new_output_name = output_name_map[op.output[i]] + self._quantize_activation_info[new_output_name] = \ + self._quantize_activation_info[op.output[i]] + op.output[i] = new_output_name data_type_arg = ConverterUtil.get_arg( op, MaceKeyword.mace_op_data_type_str) @@ -1399,7 +1407,8 @@ class Transformer(base_converter.ConverterInterface): for input_node in self._option.input_nodes.values(): op_def = self._model.op.add() - op_def.name = self.normalize_op_name(input_node.name) + op_def.name = \ + self.normalize_op_name(input_name_map[input_node.name]) op_def.type = MaceOp.Quantize.name op_def.input.extend([input_node.name]) op_def.output.extend([input_name_map[input_node.name]]) @@ -1409,10 +1418,9 @@ class Transformer(base_converter.ConverterInterface): ConverterUtil.add_data_type_arg(op_def, mace_pb2.DT_UINT8) ConverterUtil.add_data_format_arg(op_def, DataFormat.NHWC) - for output_node in self._option.output_nodes.values(): + for output_node in output_nodes: op_def = self._model.op.add() - op_def.name = self.normalize_op_name( - output_name_map[output_node.name]) + op_def.name = self.normalize_op_name(output_node.name) op_def.type = MaceOp.Dequantize.name op_def.input.extend([output_name_map[output_node.name]]) op_def.output.extend([output_node.name]) @@ -1721,34 +1729,17 @@ class Transformer(base_converter.ConverterInterface): arg.i = mace_pb2.GPU_IMAGE if self._option.cl_mem_type == "image"\ else mace_pb2.GPU_BUFFER - def add_check_nodes(self): - if self._option.check_nodes: + def delete_after_check_nodes(self): + if self._option.check_nodes != self._option.output_nodes: mace_check(len(self._option.check_nodes) == 1, "Only support one check node now.") check_node = None for i in six.moves.range(len(self._model.op)): - if self._model.op[i].name in self._option.check_nodes: + if self._model.op[i].output[0] in self._option.check_nodes: check_node = self._model.op[i] del self._model.op[i+1:] break mace_check(check_node is not None, "check node not found.") - output_name = \ - MaceKeyword.mace_output_node_name + '_' + check_node.name - op_def = self._model.op.add() - op_def.name = self.normalize_op_name(output_name) - op_def.type = MaceOp.Dequantize.name - op_def.input.extend([check_node.output[0]]) - op_def.output.extend([output_name]) - output_shape = op_def.output_shape.add() - output_shape.dims.extend(check_node.output_shape[0].dims) - ConverterUtil.add_data_type_arg(op_def, mace_pb2.DT_UINT8) - op_def.output_type.extend([mace_pb2.DT_FLOAT]) - - del self._model.output_info[:] - output_info = self._model.output_info.add() - output_info.name = check_node.name - output_info.dims.extend(check_node.output_shape[0].dims) - output_info.data_type = mace_pb2.DT_FLOAT def transform_caffe_reshape_and_flatten(self): net = self._model @@ -1800,3 +1791,45 @@ class Transformer(base_converter.ConverterInterface): self.safe_remove_node(consumer, None) return True return False + + def transform_channel_shuffle(self): + net = self._model + for op in net.op: + if op.type == MaceOp.Transpose.name and \ + len(op.output_shape[0].dims) == 5: + perm = ConverterUtil.get_arg(op, + MaceKeyword.mace_dims_str).ints + if [0, 1, 2, 4, 3] == list(perm): + # Remove the following Reshape op + reshape_op = self._consumers.get(op.output[0], None) + if (reshape_op and + len(reshape_op) == 1 and + reshape_op[0].type == MaceOp.Reshape.name and + len(reshape_op[0].output_shape[0].dims) == 4): + print("Transform channel shuffle") + output_shape = reshape_op[0].output_shape[0].dims + self.safe_remove_node(reshape_op[0], op, + remove_input_tensor=True) + else: + return False + + # Change Transpose op to ChannelShuffle + op.type = MaceOp.ChannelShuffle.name + del op.arg[:] + group_arg = op.arg.add() + group_arg.name = MaceKeyword.mace_group_str + group_arg.i = op.output_shape[0].dims[4] + op.output_shape[0].dims[:] = output_shape + + # Remove previous Reshape op + producer_op = self._producer.get(op.input[0], None) + if producer_op: + if producer_op.type == MaceOp.Reshape.name: + self.safe_remove_node(producer_op, None) + elif producer_op.type == MaceOp.Stack.name: + print("Change channel shuffle stack to concat") + # Change previous Stack op to Concat if any + producer_op.type = MaceOp.Concat.name + producer_op.output_shape[0].dims[:] = output_shape + + return True diff --git a/setup/optionals.txt b/setup/optionals.txt new file mode 100644 index 0000000000000000000000000000000000000000..9418795047a500b1b5c67fb6bd0c0f48b9ab68c7 --- /dev/null +++ b/setup/optionals.txt @@ -0,0 +1,4 @@ +tensorflow>=1.8.0 +scipy>=1.0.0 +filelock>=3.0.0 +onnx>=1.3.0 \ No newline at end of file diff --git a/setup/requirements.txt b/setup/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..a3250b400b7edf0a8d3131ff784e970e5c0afb19 --- /dev/null +++ b/setup/requirements.txt @@ -0,0 +1,6 @@ +python>=2.7.0 +jinja2>=2.10 +pyyaml>=3.12 +sh>=1.12.14 +numpy>=1.14.0 +six>=1.11.0 \ No newline at end of file diff --git a/third_party/nnlib/hexagon_nn.h b/third_party/nnlib/hexagon_nn.h index f2aaaa88161a1de3ab2813bc3c5b5f14da1918f4..5a059c5cfd1fd3bbe90ffe99b8bfa41debffa212 100644 --- a/third_party/nnlib/hexagon_nn.h +++ b/third_party/nnlib/hexagon_nn.h @@ -36,197 +36,6 @@ #ifndef THIRD_PARTY_NNLIB_HEXAGON_NN_H_ #define THIRD_PARTY_NNLIB_HEXAGON_NN_H_ -#ifdef MACE_USE_NNLIB_OLD - -#ifndef __QAIC_HEADER -#define __QAIC_HEADER(ff) ff -#endif // __QAIC_HEADER - -#ifndef __QAIC_HEADER_EXPORT -#define __QAIC_HEADER_EXPORT -#endif // __QAIC_HEADER_EXPORT - -#ifndef __QAIC_HEADER_ATTRIBUTE -#define __QAIC_HEADER_ATTRIBUTE -#endif // __QAIC_HEADER_ATTRIBUTE - -#ifndef __QAIC_IMPL -#define __QAIC_IMPL(ff) ff -#endif // __QAIC_IMPL - -#ifndef __QAIC_IMPL_EXPORT -#define __QAIC_IMPL_EXPORT -#endif // __QAIC_IMPL_EXPORT - -#ifndef __QAIC_IMPL_ATTRIBUTE -#define __QAIC_IMPL_ATTRIBUTE -#endif // __QAIC_IMPL_ATTRIBUTE -#ifdef __cplusplus -extern "C" { -#endif -#if !defined(__QAIC_STRING1_OBJECT_DEFINED__) && !defined(__STRING1_OBJECT__) -#define __QAIC_STRING1_OBJECT_DEFINED__ -#define __STRING1_OBJECT__ -typedef struct _cstring1_s { - char *data; - int dataLen; -} _cstring1_t; - -#endif /* __QAIC_STRING1_OBJECT_DEFINED__ */ -typedef struct hexagon_nn_input hexagon_nn_input; -struct hexagon_nn_input { - unsigned int src_id; - unsigned int output_idx; -}; -typedef struct hexagon_nn_output hexagon_nn_output; -struct hexagon_nn_output { - unsigned int max_size; - unsigned int unused; -}; -typedef struct hexagon_nn_perfinfo hexagon_nn_perfinfo; -struct hexagon_nn_perfinfo { - unsigned int node_id; - unsigned int node_type; - unsigned int executions; - unsigned int unused; - unsigned int counter_lo; - unsigned int counter_hi; -}; -typedef int hexagon_nn_nn_id; -enum hexagon_nn_padding_type { - NN_PAD_NA, - NN_PAD_SAME, - NN_PAD_VALID, - NN_PAD_MIRROR_REFLECT, - NN_PAD_MIRROR_SYMMETRIC, - NN_PAD_SAME_CAFFE, - _32BIT_PLACEHOLDER_hexagon_nn_padding_type = 0x7fffffff -}; -typedef enum hexagon_nn_padding_type hexagon_nn_padding_type; -typedef struct hexagon_nn_tensordef hexagon_nn_tensordef; -struct hexagon_nn_tensordef { - unsigned int batches; - unsigned int height; - unsigned int width; - unsigned int depth; - unsigned char *data; - int dataLen; - unsigned int data_valid_len; - unsigned int unused; -}; -typedef struct hexagon_nn_op_node hexagon_nn_op_node; -struct hexagon_nn_op_node { - unsigned int node_id; - unsigned int operation; - hexagon_nn_padding_type padding; - hexagon_nn_input *inputs; - int inputsLen; - hexagon_nn_output *outputs; - int outputsLen; -}; -typedef struct hexagon_nn_const_node hexagon_nn_const_node; -struct hexagon_nn_const_node { - unsigned int node_id; - hexagon_nn_tensordef tensor; -}; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_config)(void) - __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_init)(void) - __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_set_debug_level)( - hexagon_nn_nn_id id, int level) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_set_graph_mode)( - hexagon_nn_nn_id id, int mode) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_snpprint)(hexagon_nn_nn_id id, - unsigned char *buf, - int bufLen) - __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_getlog)(hexagon_nn_nn_id id, - unsigned char *buf, - int bufLen) - __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_node)( - hexagon_nn_nn_id id, - unsigned int node_id, - unsigned int operation, - hexagon_nn_padding_type padding, - const hexagon_nn_input *inputs, - int inputsLen, - const hexagon_nn_output *outputs, - int outputsLen) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_node_list)( - hexagon_nn_nn_id id, - const hexagon_nn_op_node *ops, - int opsLen) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_const_node)( - hexagon_nn_nn_id id, - unsigned int node_id, - unsigned int batches, - unsigned int height, - unsigned int width, - unsigned int depth, - const unsigned char *data, - int dataLen) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_const_node_list)( - hexagon_nn_nn_id id, - const hexagon_nn_const_node *consts, - int constsLen) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_prepare)(hexagon_nn_nn_id id) - __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_execute)( - hexagon_nn_nn_id id, - unsigned int batches_in, - unsigned int height_in, - unsigned int width_in, - unsigned int depth_in, - const unsigned char *data_in, - int data_inLen, - unsigned int *batches_out, - unsigned int *height_out, - unsigned int *width_out, - unsigned int *depth_out, - unsigned char *data_out, - int data_outLen, - unsigned int *data_len_out) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_teardown)(hexagon_nn_nn_id id) - __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_set_powersave_level)( - unsigned int level) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_get_perfinfo)( - hexagon_nn_nn_id id, - hexagon_nn_perfinfo *info_out, - int info_outLen, - unsigned int *n_items) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_reset_perfinfo)( - hexagon_nn_nn_id id, unsigned int event) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_last_execution_cycles)( - hexagon_nn_nn_id id, - unsigned int *cycles_lo, - unsigned int *cycles_hi) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_version)(int *ver) - __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_op_name_to_id)( - const char *name, unsigned int *node_id) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_op_id_to_name)( - unsigned int node_id, char *name, int nameLen) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_disable_dcvs)(void) - __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_GetHexagonBinaryVersion)( - int *ver) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_PrintLog)( - const unsigned char *buf, int bufLen) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_execute_new)( - hexagon_nn_nn_id id, - const hexagon_nn_tensordef *inputs, - int inputsLen, - hexagon_nn_tensordef *outputs, - int outputsLen) __QAIC_HEADER_ATTRIBUTE; -#ifdef __cplusplus -} -#endif - -#elif defined(MACE_USE_NNLIB_2_1) // nnlib version - #ifndef __QAIC_HEADER #define __QAIC_HEADER(ff) ff #endif //__QAIC_HEADER @@ -370,200 +179,4 @@ __QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_execute_new)(hexagon_nn_nn_id } #endif -#else // nnlib version : MACE_USE_NNLIB_CAF - -#ifndef __QAIC_HEADER -#define __QAIC_HEADER(ff) ff -#endif //__QAIC_HEADER - -#ifndef __QAIC_HEADER_EXPORT -#define __QAIC_HEADER_EXPORT -#endif // __QAIC_HEADER_EXPORT - -#ifndef __QAIC_HEADER_ATTRIBUTE -#define __QAIC_HEADER_ATTRIBUTE -#endif // __QAIC_HEADER_ATTRIBUTE - -#ifndef __QAIC_IMPL -#define __QAIC_IMPL(ff) ff -#endif //__QAIC_IMPL - -#ifndef __QAIC_IMPL_EXPORT -#define __QAIC_IMPL_EXPORT -#endif // __QAIC_IMPL_EXPORT - -#ifndef __QAIC_IMPL_ATTRIBUTE -#define __QAIC_IMPL_ATTRIBUTE -#endif // __QAIC_IMPL_ATTRIBUTE -#ifdef __cplusplus -extern "C" { -#endif -#if !defined(__QAIC_STRING1_OBJECT_DEFINED__) && !defined(__STRING1_OBJECT__) -#define __QAIC_STRING1_OBJECT_DEFINED__ -#define __STRING1_OBJECT__ -typedef struct _cstring1_s { - char *data; - int dataLen; -} _cstring1_t; - -#endif /* __QAIC_STRING1_OBJECT_DEFINED__ */ -typedef struct hexagon_nn_input hexagon_nn_input; -struct hexagon_nn_input { - unsigned int src_id; - unsigned int output_idx; -}; -typedef struct hexagon_nn_output hexagon_nn_output; -struct hexagon_nn_output { - unsigned int rank; - unsigned int max_sizes[8]; - unsigned int elementsize; - int zero_offset; - float stepsize; -}; -typedef struct hexagon_nn_perfinfo hexagon_nn_perfinfo; -struct hexagon_nn_perfinfo { - unsigned int node_id; - unsigned int node_type; - unsigned int executions; - unsigned int unused; - unsigned int counter_lo; - unsigned int counter_hi; -}; -typedef int hexagon_nn_nn_id; -enum hexagon_nn_padding_type { - NN_PAD_NA, - NN_PAD_SAME, - NN_PAD_VALID, - NN_PAD_MIRROR_REFLECT, - NN_PAD_MIRROR_SYMMETRIC, - NN_PAD_SAME_CAFFE, - _32BIT_PLACEHOLDER_hexagon_nn_padding_type = 0x7fffffff -}; -typedef enum hexagon_nn_padding_type hexagon_nn_padding_type; -typedef struct hexagon_nn_tensordef hexagon_nn_tensordef; -struct hexagon_nn_tensordef { - unsigned int batches; - unsigned int height; - unsigned int width; - unsigned int depth; - unsigned char *data; - int dataLen; - unsigned int data_valid_len; - unsigned int unused; -}; -typedef struct hexagon_nn_op_node hexagon_nn_op_node; -struct hexagon_nn_op_node { - unsigned int node_id; - unsigned int operation; - hexagon_nn_padding_type padding; - hexagon_nn_input *inputs; - int inputsLen; - hexagon_nn_output *outputs; - int outputsLen; -}; -typedef struct hexagon_nn_const_node hexagon_nn_const_node; -struct hexagon_nn_const_node { - unsigned int node_id; - hexagon_nn_tensordef tensor; -}; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_config)(void) - __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_init)(hexagon_nn_nn_id *g) - __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_set_debug_level)( - hexagon_nn_nn_id id, int level) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_set_graph_mode)( - hexagon_nn_nn_id id, int mode) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_snpprint)(hexagon_nn_nn_id id, - unsigned char *buf, - int bufLen) - __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_getlog)(hexagon_nn_nn_id id, - unsigned char *buf, - int bufLen) - __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_node)( - hexagon_nn_nn_id id, - unsigned int node_id, - unsigned int operation, - hexagon_nn_padding_type padding, - const hexagon_nn_input *inputs, - int inputsLen, - const hexagon_nn_output *outputs, - int outputsLen) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_node_list)( - hexagon_nn_nn_id id, - const hexagon_nn_op_node *ops, - int opsLen) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_const_node)( - hexagon_nn_nn_id id, - unsigned int node_id, - unsigned int batches, - unsigned int height, - unsigned int width, - unsigned int depth, - const unsigned char *data, - int dataLen) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_const_node_list)( - hexagon_nn_nn_id id, - const hexagon_nn_const_node *consts, - int constsLen) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_prepare)(hexagon_nn_nn_id id) - __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_execute)( - hexagon_nn_nn_id id, - unsigned int batches_in, - unsigned int height_in, - unsigned int width_in, - unsigned int depth_in, - const unsigned char *data_in, - int data_inLen, - unsigned int *batches_out, - unsigned int *height_out, - unsigned int *width_out, - unsigned int *depth_out, - unsigned char *data_out, - int data_outLen, - unsigned int *data_len_out) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_teardown)(hexagon_nn_nn_id id) - __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_set_powersave_level)( - unsigned int level) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_get_perfinfo)( - hexagon_nn_nn_id id, - hexagon_nn_perfinfo *info_out, - int info_outLen, - unsigned int *n_items) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_reset_perfinfo)( - hexagon_nn_nn_id id, unsigned int event) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_last_execution_cycles)( - hexagon_nn_nn_id id, - unsigned int *cycles_lo, - unsigned int *cycles_hi) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_version)(int *ver) - __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_op_name_to_id)( - const char *name, unsigned int *node_id) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_op_id_to_name)( - unsigned int node_id, char *name, int nameLen) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_disable_dcvs)(void) - __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_GetHexagonBinaryVersion)( - int *ver) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_PrintLog)( - const unsigned char *buf, int bufLen) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_execute_new)( - hexagon_nn_nn_id id, - const hexagon_nn_tensordef *inputs, - int inputsLen, - hexagon_nn_tensordef *outputs, - int outputsLen) __QAIC_HEADER_ATTRIBUTE; -__QAIC_HEADER_EXPORT unsigned int __QAIC_HEADER(hexagon_nn_get_dsp_offset)(void) - __QAIC_HEADER_ATTRIBUTE; -#ifdef __cplusplus -} -#endif - -#endif // nnlib version - #endif // THIRD_PARTY_NNLIB_HEXAGON_NN_H_ diff --git a/third_party/nnlib/libhexagon_controller.so b/third_party/nnlib/libhexagon_controller.so index 05c8f3d8d71401ff117ce9b75f21cebc5e44dc66..ee742a36cd466845e97d50871ae749bf1ec97769 100755 Binary files a/third_party/nnlib/libhexagon_controller.so and b/third_party/nnlib/libhexagon_controller.so differ diff --git a/third_party/nnlib/libhexagon_controller_2_1.so b/third_party/nnlib/libhexagon_controller_2_1.so deleted file mode 100755 index 3d592e01856d276bbfbcbfd1d768307719fd00ed..0000000000000000000000000000000000000000 Binary files a/third_party/nnlib/libhexagon_controller_2_1.so and /dev/null differ diff --git a/third_party/nnlib/libhexagon_controller_old.so b/third_party/nnlib/libhexagon_controller_old.so deleted file mode 100755 index 1c5a4111f9958eddfb0426c3010bc93ad8e13119..0000000000000000000000000000000000000000 Binary files a/third_party/nnlib/libhexagon_controller_old.so and /dev/null differ diff --git a/third_party/nnlib/ops.h b/third_party/nnlib/ops.h index 9e91981354a6d5e0d1ea2060b873445ee5711c2b..356b6c3b91f5ebe4e5058ed6cdd19c3fb594e320 100644 --- a/third_party/nnlib/ops.h +++ b/third_party/nnlib/ops.h @@ -79,147 +79,6 @@ */ // NOLINT(build/header_guard) -#ifdef MACE_USE_NNLIB_OLD - -DEF_OP(INPUT) -DEF_OP(OUTPUT) -DEF_OP(Nop) -DEF_OP(Const) -DEF_OP(Check) -DEF_OP(Close_f) -DEF_OP(Close_quint8) -DEF_OP(Close_q_quint8) -DEF_OP(Close_int32) -DEF_OP(Close_qint32) -DEF_OP(PPrint_8) -DEF_OP(PPrint_32) -DEF_OP(PPrint_f) -DEF_OP(PreFree) -DEF_OP(Flatten) - -#ifndef DEF_OP_WREF -#define DEF_OP_WREF(NAME) DEF_OP(NAME) DEF_OP(NAME##_ref) -#define __SELF_DEF_OP_WREF -#endif - -DEF_OP_WREF(QuantizedConv2d_8x8to32) -DEF_OP_WREF(QuantizedMatMul_8x8to32) -DEF_OP_WREF(QuantizeDownAndShrinkRange_32to8) -DEF_OP_WREF(QuantizedRelu_8) -DEF_OP_WREF(QuantizedReluX_8) -DEF_OP_WREF(QuantizedMaxPool_8) -DEF_OP_WREF(QuantizedAvgPool_8) -DEF_OP_WREF(QuantizedConcat_8) -DEF_OP_WREF(QuantizedBiasAdd_8p8to32) -DEF_OP_WREF(Min_f) -DEF_OP_WREF(Max_f) -DEF_OP_WREF(Quantize) -DEF_OP_WREF(Dequantize) -DEF_OP_WREF(Supernode_8x8p8to8) - -DEF_OP(QuantizedFlatten) -DEF_OP(Softmax_f) -DEF_OP(Conv2d_f) -DEF_OP(MatMul_f) -DEF_OP(Relu_f) -DEF_OP(ReluX_f) -DEF_OP(AvgPool_f) -DEF_OP(MaxPool_f) -DEF_OP(Concat_f) -DEF_OP(BiasAdd_f) -DEF_OP(LRN_f) - -DEF_OP(Variable) -DEF_OP(Assign) -DEF_OP(Reshape) -DEF_OP(QuantizedReshape) -DEF_OP(Tanh_f) -DEF_OP(Sigmoid_f) -DEF_OP(Slice_8) -DEF_OP(Slice_f) -DEF_OP(QuantizedSlice_8) -DEF_OP(Add_f) -DEF_OP(Mul_f) -DEF_OP(Minimum_f) -DEF_OP(Maximum_f) - -DEF_OP_WREF(Requantize_32to8) -DEF_OP_WREF(RequantizationRange_32) - -DEF_OP(Neg_f) -DEF_OP(Sub_f) -DEF_OP(AddN_f) -DEF_OP(Range_int32) -DEF_OP(Rank_int32) -DEF_OP(Transpose_int32) -DEF_OP(Transpose_f) -DEF_OP(InstanceNorm_f) -DEF_OP_WREF(QuantizedInstanceNorm_8) -DEF_OP(Sub_int32) -DEF_OP(Add_int32) -DEF_OP(Split_f) -DEF_OP(Dequantize_qint32_f) -DEF_OP(PRelu_f) -DEF_OP_WREF(QuantizedPRelu_8) -DEF_OP(Sum_f) -DEF_OP(Prod_f) -DEF_OP(Mul_int32) -DEF_OP(LogicalAnd_int32) -DEF_OP(LogicalOr_int32) -DEF_OP(LogicalXor_int32) -DEF_OP(Shape_int32) -DEF_OP(Pack_int32) -DEF_OP(MirrorPad_f) -DEF_OP(ResizeNearestNeighbor_f) -DEF_OP(StridedSlice_int32) -DEF_OP(StridedSlice_f) -DEF_OP(ExpandDims_int32) -DEF_OP(ExpandDims_f) - -DEF_OP(LogSoftmax_f) -DEF_OP(Split_int32) -DEF_OP(QuantizedSplit_8) - -DEF_OP(Deconv_f) -DEF_OP_WREF(QuantizedDeconv_8x8to32) - -DEF_OP_WREF(QuantizedMul_8x8to32) -DEF_OP_WREF(QuantizedAdd_8p8to32) -DEF_OP_WREF(QuantizedSigmoid_8) -DEF_OP_WREF(QuantizedTanh_8) -DEF_OP_WREF(QuantizedSoftmax_8) -DEF_OP_WREF(QuantizedLRN_8) -DEF_OP_WREF(QuantizedSub_8p8to32) -DEF_OP_WREF(QuantizedMaximum_8) -DEF_OP_WREF(QuantizedMinimum_8) - -DEF_OP(Pad_f) -DEF_OP(SpaceToBatchND_f) -DEF_OP(BatchToSpaceND_f) -DEF_OP(QuantizedSpaceToBatchND_8) -DEF_OP(QuantizedBatchToSpaceND_8) -DEF_OP(QuantizedPad_8) -DEF_OP(ResizeBilinear_f) -DEF_OP(QuantizedResizeBilinear_8) -DEF_OP(ConcatV2_f) -DEF_OP(ConcatV2_int32) -DEF_OP(Prod_int32) -DEF_OP(Slice_int32) - -DEF_OP(QuantizedAdd_8p8to8) - -DEF_OP_WREF(AutoQuantize) -DEF_OP_WREF(QuantizedDepthwiseConv2d_8x8to32) -DEF_OP(DepthwiseConv2d_f) -DEF_OP(QuantizedBiasAdd_8p8to8) - -#ifdef __SELF_DEF_OP_WREF -#undef __SELF_DEF_OP_WREF -#undef DEF_OP_WREF -#endif - -#elif defined(MACE_USE_NNLIB_2_1) // nnlib version - DEF_OP(INPUT) DEF_OP(OUTPUT) DEF_OP(Nop) @@ -441,214 +300,3 @@ DEF_OP(QuantizedChannelShuffle_8) #undef __SELF_DEF_OP_WREF #undef DEF_OP_WREF #endif - -#else // nnlib version : MACE_USE_NNLIB_CAF - -DEF_OP(INPUT) -DEF_OP(OUTPUT) -DEF_OP(Nop) -DEF_OP(Const) -DEF_OP(Check) -DEF_OP(Close_f) -DEF_OP(Close_quint8) -DEF_OP(Close_q_quint8) -DEF_OP(Close_int32) -DEF_OP(Close_qint32) -DEF_OP(PPrint_8) -DEF_OP(PPrint_32) -DEF_OP(PPrint_f) -DEF_OP(PreFree) -DEF_OP(Flatten) - -#ifndef DEF_OP_WREF -#define DEF_OP_WREF(NAME) DEF_OP(NAME) DEF_OP(NAME##_ref) -#define __SELF_DEF_OP_WREF -#endif - -DEF_OP_WREF(QuantizedConv2d_8x8to32) -DEF_OP_WREF(QuantizedMatMul_8x8to32) -DEF_OP_WREF(QuantizeDownAndShrinkRange_32to8) -DEF_OP_WREF(QuantizedRelu_8) -DEF_OP_WREF(QuantizedReluX_8) -DEF_OP_WREF(QuantizedMaxPool_8) -DEF_OP_WREF(QuantizedAvgPool_8) -DEF_OP_WREF(QuantizedL2Pool_8) -DEF_OP_WREF(QuantizedConcat_8) -DEF_OP_WREF(QuantizedBiasAdd_8p8to32) -DEF_OP_WREF(Min_f) -DEF_OP_WREF(Max_f) -DEF_OP_WREF(Quantize) -DEF_OP_WREF(Dequantize) -DEF_OP_WREF(Supernode_8x8p8to8) - -DEF_OP(QuantizedFlatten) -DEF_OP(Softmax_f) -DEF_OP(Conv2d_f) -DEF_OP(MatMul_f) -DEF_OP(Relu_f) -DEF_OP(ReluX_f) -DEF_OP(AvgPool_f) -DEF_OP(L2Pool_f) -DEF_OP(MaxPool_f) -DEF_OP(Concat_f) -DEF_OP(BiasAdd_f) -DEF_OP(LRN_f) - -DEF_OP(Variable) -DEF_OP(Assign) -DEF_OP(Reshape) -DEF_OP(QuantizedReshape) -DEF_OP(Tanh_f) -DEF_OP(Sigmoid_f) -DEF_OP(Slice_8) -DEF_OP(Slice_f) -DEF_OP(QuantizedSlice_8) -DEF_OP(Add_f) -DEF_OP(Mul_f) -DEF_OP(Minimum_f) -DEF_OP(Maximum_f) - -DEF_OP_WREF(Requantize_32to8) -DEF_OP_WREF(RequantizationRange_32) - -DEF_OP(Neg_f) -DEF_OP(Sub_f) -DEF_OP(AddN_f) -DEF_OP(Range_int32) -DEF_OP(Rank_int32) -DEF_OP(Transpose_int32) -DEF_OP(Transpose_f) -DEF_OP(InstanceNorm_f) -DEF_OP_WREF(QuantizedInstanceNorm_8) -DEF_OP(Sub_int32) -DEF_OP(Add_int32) -DEF_OP(Split_f) -DEF_OP(Dequantize_qint32_f) -DEF_OP(PRelu_f) -DEF_OP_WREF(QuantizedPRelu_8) -DEF_OP(Sum_f) -DEF_OP(Prod_f) -DEF_OP(Mul_int32) -DEF_OP(LogicalAnd_int32) -DEF_OP(LogicalOr_int32) -DEF_OP(LogicalXor_int32) -DEF_OP(Shape_int32) -DEF_OP(Pack_int32) -DEF_OP(MirrorPad_f) -DEF_OP(ResizeNearestNeighbor_f) -DEF_OP(StridedSlice_int32) -DEF_OP(StridedSlice_f) -DEF_OP(ExpandDims_int32) -DEF_OP(ExpandDims_f) - -DEF_OP(LogSoftmax_f) -DEF_OP(Split_int32) -DEF_OP(QuantizedSplit_8) - -DEF_OP(Deconv_f) -DEF_OP_WREF(QuantizedDeconv_8x8to32) - -DEF_OP_WREF(QuantizedMul_8x8to32) -DEF_OP_WREF(QuantizedAdd_8p8to32) -DEF_OP_WREF(QuantizedSigmoid_8) -DEF_OP_WREF(QuantizedTanh_8) -DEF_OP_WREF(QuantizedSoftmax_8) -DEF_OP_WREF(QuantizedLRN_8) -DEF_OP_WREF(Quantizedpad2d_frame_8p) -DEF_OP_WREF(QuantizedSub_8p8to32) -DEF_OP_WREF(QuantizedMaximum_8) -DEF_OP_WREF(QuantizedMinimum_8) - -DEF_OP(Pad_f) -DEF_OP(SpaceToBatchND_f) -DEF_OP(BatchToSpaceND_f) -DEF_OP(QuantizedPad_8) -DEF_OP(ResizeBilinear_f) -DEF_OP(ConcatV2_f) -DEF_OP(ConcatV2_int32) -DEF_OP(Prod_int32) -DEF_OP(Slice_int32) - -DEF_OP(QuantizedAdd_8p8to8) -DEF_OP(QuantizedResizeBilinear_8) -DEF_OP(Supernode_8x8p8to8_d32) -DEF_OP(Convert_to_d32) -DEF_OP(Convert_from_d32) -DEF_OP_WREF(QuantizedMaxPool_8_d32) -DEF_OP_WREF(QuantizedConcat_8_d32) -DEF_OP_WREF(QuantizedAvgPool_8_d32) - -DEF_OP(Sink) - -DEF_OP_WREF(QuantizedPRelu_8_d32) -DEF_OP_WREF(AutoQuantize) -DEF_OP_WREF(QuantizedDepthwiseConv2d_8x8to32) -DEF_OP_WREF(DepthwiseConv2d_f) -DEF_OP(DepthwiseSupernode_8x8p8to8) -DEF_OP(DepthwiseSupernode_8x8p8to8_d32) - -DEF_OP_WREF(QuantizedMul_8x8to8_d32) - -DEF_OP(FullyConnected_u8) -#if 0 - DEF_OP_WREF(QuantizedFC_8x8p8to8) -#endif - -DEF_OP_WREF(QuantizedAdd_8p8to8_d32) - -DEF_OP_WREF(QuantizedClamp_8) -DEF_OP(Clamp_f) -DEF_OP(QuantizeForTest_d32) -DEF_OP(Close_d32) -DEF_OP_WREF(QuantizedSub_8p8to8_d32) - -DEF_OP(InputSupernode_8x8p8to8_outd32) -DEF_OP(QuantizedLRN_8_d32) -DEF_OP_WREF(QuantizedBiasAdd_32p32to32) -DEF_OP_WREF(Quantize_int32) - -DEF_OP(Supernode_8x8p32to8) -DEF_OP(DepthwiseSupernode_8x8p32to8) -DEF_OP(Supernode_8x8p32to8_d32) -DEF_OP(DepthwiseSupernode_8x8p32to8_d32) -DEF_OP(InputSupernode_8x8p32to8_outd32) - -DEF_OP(PPrint_8_d32) -DEF_OP(PPrintWithPadding_8_d32) -DEF_OP_WREF(AutoQuantize_d32) - -DEF_OP_WREF(QuantizedTanh_8_d32) -DEF_OP_WREF(QuantizedSigmoid_8_d32) -DEF_OP_WREF(QuantizedSoftmax_8_d32) - - -DEF_OP_WREF(QuantizedL2Pool_8_d32) - -DEF_OP(Gather_f) -DEF_OP(Gather_int32) -DEF_OP(Gather_8) -DEF_OP(Table_f) -DEF_OP(Table_int32) -DEF_OP(Table_8) - -DEF_OP(FillPadding_8_d32) -DEF_OP(QuantizedResizeBilinear_8_d32) - -DEF_OP(QuantizeINPUT_f_to_8) -DEF_OP_WREF(DeconvBias_8x8to32) - -DEF_OP(SpaceToBatchND_8) -DEF_OP(BatchToSpaceND_8) - - -DEF_OP(SpaceToDepth_f) -DEF_OP(DepthToSpace_f) -DEF_OP(SpaceToDepth_8) -DEF_OP(DepthToSpace_8) - -#ifdef __SELF_DEF_OP_WREF -#undef __SELF_DEF_OP_WREF -#undef DEF_OP_WREF -#endif - -#endif // nnlib version diff --git a/third_party/nnlib/v60/libhexagon_nn_skel.so b/third_party/nnlib/v60/libhexagon_nn_skel.so new file mode 100755 index 0000000000000000000000000000000000000000..96645ccae17161cdb5aa4efbe5fa814cb4012c8b Binary files /dev/null and b/third_party/nnlib/v60/libhexagon_nn_skel.so differ diff --git a/third_party/nnlib/v66/libhexagon_nn_skel.so b/third_party/nnlib/v66/libhexagon_nn_skel.so new file mode 100755 index 0000000000000000000000000000000000000000..4f86b5a87519d674372b8cf23150475a29969214 Binary files /dev/null and b/third_party/nnlib/v66/libhexagon_nn_skel.so differ diff --git a/tools/bazel.rc b/tools/bazel.rc index 93442944babbe618cbab78171b4b197a14e9805b..fc0b9b579fcbbd156052e0037f60b1537e4ca237 100644 --- a/tools/bazel.rc +++ b/tools/bazel.rc @@ -9,7 +9,6 @@ build --copt=-fPIC build --copt=-D_GLIBCXX_USE_C99_MATH_TR1 build --copt=-DMACE_OBFUSCATE_LITERALS build --copt=-DGEMMLOWP_USE_OPENMP -build --copt=-DMACE_USE_NNLIB_CAF # Usage example: bazel build --config symbol_hidden build:symbol_hidden --copt=-fvisibility=hidden diff --git a/tools/converter.py b/tools/converter.py index 486014ab315a9854739701ee3ed0d939133cfe03..fa67ea919e3421e0f3554c1dc53743c3d5c5d7b0 100644 --- a/tools/converter.py +++ b/tools/converter.py @@ -445,7 +445,8 @@ def format_model_config(flags): threshold_dict = { DeviceType.CPU: ValidationThreshold.cpu_threshold, DeviceType.GPU: ValidationThreshold.gpu_threshold, - DeviceType.HEXAGON: ValidationThreshold.hexagon_threshold, + DeviceType.HEXAGON + "_QUANTIZE": + ValidationThreshold.hexagon_threshold, DeviceType.CPU + "_QUANTIZE": ValidationThreshold.cpu_quantize_threshold, } diff --git a/tools/device.py b/tools/device.py index 90683eb87afed561a96e35c1402ec58536d46478..298b997fe72de4de18d8357f76b2bb7c961ede98 100644 --- a/tools/device.py +++ b/tools/device.py @@ -173,6 +173,8 @@ class DeviceWrapper: gpu_priority_hint=3, input_file_name='model_input', output_file_name='model_out', + input_dir="", + output_dir="", runtime_failure_ratio=0.0, address_sanitizer=False, link_dynamic=False, @@ -208,6 +210,8 @@ class DeviceWrapper: input_file_name), "--output_file=%s/%s" % (model_output_dir, output_file_name), + "--input_dir=%s" % input_dir, + "--output_dir=%s" % output_dir, "--model_data_file=%s/%s.data" % (mace_model_dir, model_tag), "--device=%s" % device_type, @@ -296,6 +300,8 @@ class DeviceWrapper: "--output_shape=%s" % ":".join(output_shapes), "--input_file=%s/%s" % (self.data_dir, input_file_name), "--output_file=%s/%s" % (self.data_dir, output_file_name), + "--input_dir=%s" % input_dir, + "--output_dir=%s" % output_dir, "--model_data_file=%s/%s.data" % (self.data_dir, model_tag), "--device=%s" % device_type, "--round=%s" % running_round, @@ -515,6 +521,12 @@ class DeviceWrapper: for runtime in runtime_list: device_type = parse_device_type(runtime) # run for specified soc + if not subgraphs[0][YAMLKeyword.check_tensors]: + output_nodes = subgraphs[0][YAMLKeyword.output_tensors] + output_shapes = subgraphs[0][YAMLKeyword.output_shapes] + else: + output_nodes = subgraphs[0][YAMLKeyword.check_tensors] + output_shapes = subgraphs[0][YAMLKeyword.check_shapes] run_output = self.tuning_run( abi=target_abi, target_dir=build_tmp_binary_dir, @@ -523,9 +535,9 @@ class DeviceWrapper: embed_model_data=embed_model_data, model_output_dir=model_output_dir, input_nodes=subgraphs[0][YAMLKeyword.input_tensors], - output_nodes=subgraphs[0][YAMLKeyword.output_tensors], + output_nodes=output_nodes, input_shapes=subgraphs[0][YAMLKeyword.input_shapes], - output_shapes=subgraphs[0][YAMLKeyword.output_shapes], + output_shapes=output_shapes, mace_model_dir=mace_model_dir, model_tag=model_name, device_type=device_type, @@ -547,6 +559,8 @@ class DeviceWrapper: libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH, link_dynamic=link_dynamic, quantize_stat=flags.quantize_stat, + input_dir=flags.input_dir, + output_dir=flags.output_dir, ) if flags.validate: model_file_path, weight_file_path = get_model_files( @@ -568,9 +582,9 @@ class DeviceWrapper: platform=model_config[YAMLKeyword.platform], device_type=device_type, input_nodes=subgraphs[0][YAMLKeyword.input_tensors], - output_nodes=subgraphs[0][YAMLKeyword.output_tensors], + output_nodes=output_nodes, input_shapes=subgraphs[0][YAMLKeyword.input_shapes], - output_shapes=subgraphs[0][YAMLKeyword.output_shapes], + output_shapes=output_shapes, model_output_dir=model_output_dir, input_data_types=subgraphs[0][ YAMLKeyword.input_data_types], @@ -961,7 +975,8 @@ class DeviceManager: YAMLKeyword.address: adb[0], YAMLKeyword.username: '', } - devices.append(android) + if android not in devices: + devices.append(android) return devices @classmethod