提交 1c150474 编写于 作者: B Bin Li

Open hexagon_nn

上级 c10407b2
......@@ -178,16 +178,18 @@ quantization_tests:
- pwd
- rm -rf mace-models
- GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@github.com:XiaoMi/mace-models.git
- CONF_FILE=mace-models/mobilenet-v1/mobilenet-v1-quantize-retrain.yml
- >
if ping -c 1 v9.git.n.xiaomi.com 1>/dev/null 2>&1; then
GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/generic-mobile-devices.git
DEVICE_CONF_FILE=generic-mobile-devices/devices.yml
fi
- >
python tools/converter.py convert --config=${CONF_FILE} --model_graph_format=file --model_data_format=file --cl_mem_type=buffer || exit 1;
python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --round=1 --target_abis=armeabi-v7a,arm64 --validate --model_graph_format=file --model_data_format=file || exit 1;
python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --example --target_abis=armeabi-v7a,arm64 --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
for CONF_FILE in mace-models/mobilenet-v1/mobilenet-v1-quantize-retrain.yml mace-models/mobilenet-v1/mobilenet-v1-quantize-retrain-for-check-only.yml mace-models/mobilenet-v1/mobilenet-v1-quantize-retrain-dsp.yml;
do
python tools/converter.py convert --config=${CONF_FILE} --model_graph_format=file --model_data_format=file || exit 1;
python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --example --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
done
- rm -rf mace-models
build_android_demo:
......
......@@ -76,7 +76,7 @@ please refer to [the contribution guide](https://mace.readthedocs.io/en/latest/d
MACE depends on several open source projects located in the
[third_party](third_party) directory. Particularly, we learned a lot from
the following projects during the development:
* [Qualcomm Hexagon NN Offload Framework](https://source.codeaurora.org/quic/hexagon_nn/nnlib): the Hexagon DSP runtime
* [Qualcomm Hexagon NN Offload Framework](https://developer.qualcomm.com/software/hexagon-dsp-sdk): the Hexagon DSP runtime
depends on this library.
* [TensorFlow](https://github.com/tensorflow/tensorflow),
[Caffe](https://github.com/BVLC/caffe),
......
......@@ -59,9 +59,9 @@ Why is MACE not working on DSP?
------------------------------------------------------------------------------
Running models on Hexagon DSP need a few prerequisites for DSP developers:
* You need make sure SOCs of your phone is manufactured by Qualcomm and has HVX supported.
* You need to make sure SOCs of your phone is manufactured by Qualcomm and has HVX supported.
* You need a phone that disables secure boot (once enabled, cannot be reversed, so you probably can only get that type phones from manufacturers)
* You need sign your phone by using testsig provided by Qualcomm. (Download Qualcomm Hexagon SDK first, plugin your phone to PC, run scripts/testsig.py)
* You need install Hexagon nnlib backend by following nnlib README (https://github.com/XiaoMi/nnlib).
* You need to sign your phone by using testsig provided by Qualcomm. (Download Qualcomm Hexagon SDK first, plugin your phone to PC, run scripts/testsig.py)
* You need to push `third_party/nnlib/v6x/libhexagon_nn_skel.so` to `/system/vendor/lib/rfsa/adsp/`.
Then, there you go. You can run Mace on Hexagon DSP.
......@@ -99,7 +99,6 @@ MACE now supports models from TensorFlow and Caffe (more frameworks will be supp
Prepare your pre-trained TensorFlow model.pb file.
- Caffe
Caffe 1.0+ models are supported in MACE converter tool.
......@@ -253,7 +252,13 @@ However, there are some differences in different devices.
* **DSP**
MACE only support Qualcomm DSP.
MACE only supports Qualcomm DSP. And you need to push the hexagon nn library to the device.
.. code:: sh
# For Android device
adb root; adb remount
adb push third_party/nnlib/v6x/libhexagon_nn_skel.so /system/vendor/lib/rfsa/adsp/
In the converting and building steps, you've got the static/shared library, model files and
header files.
......
......@@ -22,9 +22,6 @@ models, e.g., MobileNet. The only thing you need to make it run using MACE is to
2. `quantize`: set `quantize` to be 1.
.. note::
You need set `runtime` to be `cpu` because we only support this quantization method to run on CPU for now (soon DSP will be supported).
Post training quantization
---------------------------
......
......@@ -107,11 +107,7 @@ bool HexagonControlWrapper::Config() {
bool HexagonControlWrapper::Init() {
LOG(INFO) << "Hexagon init";
#ifdef MACE_USE_NNLIB_OLD
nn_id_ = hexagon_nn_init();
#else
MACE_CHECK(hexagon_nn_init(&nn_id_) == 0, "hexagon_nn_init failed");
#endif
ResetPerfInfo();
return true;
}
......@@ -128,138 +124,116 @@ bool HexagonControlWrapper::SetupGraph(const NetDef &net_def,
int64_t t0 = NowMicros();
// const node
#if defined(MACE_USE_NNLIB_CAF) || defined(MACE_USE_NNLIB_OLD)
std::thread const_thread([&]()
#endif
{
std::vector<hexagon_nn_const_node> const_node_list;
for (const ConstTensor &const_tensor : net_def.tensors()) {
std::vector<int> tensor_shape(const_tensor.dims().begin(),
const_tensor.dims().end());
while (tensor_shape.size() < 4) {
tensor_shape.insert(tensor_shape.begin(), 1);
}
hexagon_nn_const_node const_node;
const_node.node_id = node_id(const_tensor.node_id());
const_node.tensor.batches = tensor_shape[0];
const_node.tensor.height = tensor_shape[1];
const_node.tensor.width = tensor_shape[2];
const_node.tensor.depth = tensor_shape[3];
if (const_tensor.data_type() == DataType::DT_INT32 &&
const_tensor.data_size() == 0) {
const_node.tensor.data = NULL;
const_node.tensor.dataLen = 0;
} else {
const_node.tensor.data =
const_cast<unsigned char *>(model_data + const_tensor.offset());
const_node.tensor.dataLen = const_tensor.data_size() *
GetEnumTypeSize(const_tensor.data_type());
}
const_node_list.push_back(const_node);
// 255 is magic number: why fastrpc limits sequence length to that?
if (const_node_list.size() >= 250) {
MACE_CHECK(
hexagon_nn_append_const_node_list(nn_id_, const_node_list.data(),
const_node_list.size()) == 0,
"append const node error");
const_node_list.clear();
}
std::vector<hexagon_nn_const_node> const_node_list;
for (const ConstTensor &const_tensor : net_def.tensors()) {
std::vector<int> tensor_shape(const_tensor.dims().begin(),
const_tensor.dims().end());
while (tensor_shape.size() < 4) {
tensor_shape.insert(tensor_shape.begin(), 1);
}
if (!const_node_list.empty()) {
hexagon_nn_const_node const_node;
const_node.node_id = node_id(const_tensor.node_id());
const_node.tensor.batches = tensor_shape[0];
const_node.tensor.height = tensor_shape[1];
const_node.tensor.width = tensor_shape[2];
const_node.tensor.depth = tensor_shape[3];
if (const_tensor.data_type() == DataType::DT_INT32 &&
const_tensor.data_size() == 0) {
const_node.tensor.data = NULL;
const_node.tensor.dataLen = 0;
} else {
const_node.tensor.data =
const_cast<unsigned char *>(model_data + const_tensor.offset());
const_node.tensor.dataLen = const_tensor.data_size() *
GetEnumTypeSize(const_tensor.data_type());
}
const_node_list.push_back(const_node);
// 255 is magic number: why fastrpc limits sequence length to that?
if (const_node_list.size() >= 250) {
MACE_CHECK(
hexagon_nn_append_const_node_list(nn_id_, const_node_list.data(),
const_node_list.size()) == 0,
"append const node error");
const_node_list.clear();
}
const_node_list.clear();
}
#if defined(MACE_USE_NNLIB_CAF) || defined(MACE_USE_NNLIB_OLD)
); // NOLINT
#endif
if (!const_node_list.empty()) {
MACE_CHECK(
hexagon_nn_append_const_node_list(nn_id_, const_node_list.data(),
const_node_list.size()) == 0,
"append const node error");
}
const_node_list.clear();
// op node
#if defined(MACE_USE_NNLIB_CAF) || defined(MACE_USE_NNLIB_OLD)
std::thread op_thread([&]()
#endif
{
OpMap op_map;
op_map.Init();
std::vector<hexagon_nn_op_node> op_node_list;
std::vector<std::vector<hexagon_nn_input>> cached_inputs;
std::vector<std::vector<hexagon_nn_output>> cached_outputs;
std::vector<hexagon_nn_input> inputs;
std::vector<hexagon_nn_output> outputs;
for (const OperatorDef &op : net_def.op()) {
int op_id = op_map.GetOpId(op.type());
inputs.resize(op.node_input().size());
for (int i = 0; i < op.node_input().size(); ++i) {
inputs[i].src_id = node_id(op.node_input()[i].node_id());
inputs[i].output_idx = op.node_input()[i].output_port();
}
outputs.resize(op.output_shape().size());
for (int i = 0; i < op.output_shape().size(); ++i) {
#ifdef MACE_USE_NNLIB_OLD
outputs[i].max_size = op.out_max_byte_size()[i];
#else
outputs[i].rank = op.output_shape()[i].dims().size();
for (size_t j = 0; j < outputs[i].rank; ++j) {
outputs[i].max_sizes[j] = op.output_shape()[i].dims()[j];
}
if (outputs[i].rank == 0) {
outputs[i].rank = 1;
outputs[i].max_sizes[0] = 1;
}
outputs[i].max_sizes[outputs[i].rank] = 0;
outputs[i].elementsize = GetEnumTypeSize(
static_cast<DataType>(op.output_type()[i]));
outputs[i].zero_offset = 0;
outputs[i].stepsize = 0;
#endif
OpMap op_map;
op_map.Init();
std::vector<hexagon_nn_op_node> op_node_list;
std::vector<std::vector<hexagon_nn_input>> cached_inputs;
std::vector<std::vector<hexagon_nn_output>> cached_outputs;
std::vector<hexagon_nn_input> inputs;
std::vector<hexagon_nn_output> outputs;
for (const OperatorDef &op : net_def.op()) {
int op_id = op_map.GetOpId(op.type());
inputs.resize(op.node_input().size());
for (int i = 0; i < op.node_input().size(); ++i) {
inputs[i].src_id = node_id(op.node_input()[i].node_id());
inputs[i].output_idx = op.node_input()[i].output_port();
}
outputs.resize(op.output_shape().size());
for (int i = 0; i < op.output_shape().size(); ++i) {
outputs[i].rank = op.output_shape()[i].dims().size();
for (size_t j = 0; j < outputs[i].rank; ++j) {
outputs[i].max_sizes[j] = op.output_shape()[i].dims()[j];
}
cached_inputs.push_back(inputs);
cached_outputs.push_back(outputs);
hexagon_nn_padding_type padding_type =
static_cast<hexagon_nn_padding_type>(op.padding());
hexagon_nn_op_node op_node;
op_node.node_id = node_id(op.node_id());
op_node.operation = op_id;
op_node.padding = padding_type;
op_node.inputs = cached_inputs.back().data();
op_node.inputsLen = inputs.size();
op_node.outputs = cached_outputs.back().data();
op_node.outputsLen = outputs.size();
op_node_list.push_back(op_node);
if (op_node_list.size() >= 125) {
MACE_CHECK(hexagon_nn_append_node_list(nn_id_, op_node_list.data(),
op_node_list.size()) == 0,
"append node error");
op_node_list.clear();
cached_inputs.clear();
cached_outputs.clear();
if (outputs[i].rank == 0) {
outputs[i].rank = 1;
outputs[i].max_sizes[0] = 1;
}
outputs[i].max_sizes[outputs[i].rank] = 0;
outputs[i].elementsize = GetEnumTypeSize(
static_cast<DataType>(op.output_type()[i]));
outputs[i].zero_offset = 0;
outputs[i].stepsize = 0;
}
if (!op_node_list.empty()) {
cached_inputs.push_back(inputs);
cached_outputs.push_back(outputs);
hexagon_nn_padding_type padding_type =
static_cast<hexagon_nn_padding_type>(op.padding());
hexagon_nn_op_node op_node;
op_node.node_id = node_id(op.node_id());
op_node.operation = op_id;
op_node.padding = padding_type;
op_node.inputs = cached_inputs.back().data();
op_node.inputsLen = inputs.size();
op_node.outputs = cached_outputs.back().data();
op_node.outputsLen = outputs.size();
op_node_list.push_back(op_node);
if (op_node_list.size() >= 125) {
MACE_CHECK(hexagon_nn_append_node_list(nn_id_, op_node_list.data(),
op_node_list.size()) == 0,
"append node error");
op_node_list.clear();
cached_inputs.clear();
cached_outputs.clear();
}
op_node_list.clear();
cached_inputs.clear();
cached_outputs.clear();
}
#if defined(MACE_USE_NNLIB_CAF) || defined(MACE_USE_NNLIB_OLD)
); // NOLINT
const_thread.join();
op_thread.join();
#endif
if (!op_node_list.empty()) {
MACE_CHECK(hexagon_nn_append_node_list(nn_id_, op_node_list.data(),
op_node_list.size()) == 0,
"append node error");
}
op_node_list.clear();
cached_inputs.clear();
cached_outputs.clear();
// input info
num_inputs_ = 0;
......@@ -460,7 +434,7 @@ bool HexagonControlWrapper::ExecuteGraph(const Tensor &input_tensor,
bool HexagonControlWrapper::ExecuteGraphNew(
const std::vector<Tensor *> &input_tensors,
std::vector<Tensor *> *output_tensors) {
LOG(INFO) << "Execute graph new: " << nn_id_;
VLOG(2) << "Execute graph new: " << nn_id_;
uint32_t num_inputs = static_cast<uint32_t>(input_tensors.size());
uint32_t num_outputs = static_cast<uint32_t>(output_tensors->size());
MACE_ASSERT(num_inputs_ == num_inputs, "Wrong inputs num");
......
// Copyright 2018 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_CORE_RUNTIME_HEXAGON_HEXAGON_DEVICE_H_
#define MACE_CORE_RUNTIME_HEXAGON_HEXAGON_DEVICE_H_
#include "mace/core/device.h"
namespace mace {
class HexagonDevice : public CPUDevice {
public:
HexagonDevice() : CPUDevice(0, AFFINITY_NONE, false) {}
DeviceType device_type() const override {
return DeviceType::HEXAGON;
};
};
} // namespace mace
#endif // MACE_CORE_RUNTIME_HEXAGON_HEXAGON_DEVICE_H_
......@@ -18,7 +18,6 @@ models:
- 1,1001
runtime: cpu+gpu
limit_opencl_kernel_time: 0
nnlib_graph_mode: 0
obfuscate: 0
winograd: 0
mobilenet_v2:
......@@ -36,7 +35,6 @@ models:
- 1,1001
runtime: cpu+gpu
limit_opencl_kernel_time: 0
nnlib_graph_mode: 0
obfuscate: 0
winograd: 0
mobilenet_v1_quant:
......@@ -56,7 +54,6 @@ models:
- 1,1001
runtime: cpu
limit_opencl_kernel_time: 0
nnlib_graph_mode: 0
obfuscate: 0
winograd: 0
quantize: 1
......@@ -77,7 +74,6 @@ models:
- 1,1001
runtime: cpu
limit_opencl_kernel_time: 0
nnlib_graph_mode: 0
obfuscate: 0
winograd: 0
quantize: 1
......@@ -34,6 +34,7 @@
#ifdef MACE_ENABLE_HEXAGON
#include "mace/core/runtime/hexagon/hexagon_control_wrapper.h"
#include "mace/core/runtime/hexagon/hexagon_device.h"
#endif // MACE_ENABLE_HEXAGON
namespace mace {
......@@ -387,7 +388,7 @@ MaceEngine::Impl::Impl(const MaceEngineConfig &config)
#endif
{
LOG(INFO) << "Creating MaceEngine, MACE version: " << MaceVersion();
if (device_type_ == DeviceType::CPU || device_type_ == DeviceType::HEXAGON) {
if (device_type_ == DeviceType::CPU) {
device_.reset(new CPUDevice(config.impl_->num_threads(),
config.impl_->cpu_affinity_policy(),
config.impl_->use_gemmlowp()));
......@@ -405,6 +406,12 @@ MaceEngine::Impl::Impl(const MaceEngineConfig &config)
config.impl_->use_gemmlowp()));
}
#endif
#ifdef MACE_ENABLE_HEXAGON
if (device_type_ == DeviceType::HEXAGON) {
device_.reset(new HexagonDevice());
}
#endif
MACE_CHECK_NOTNULL(device_);
}
MaceStatus MaceEngine::Impl::Init(
......@@ -443,6 +450,7 @@ MaceStatus MaceEngine::Impl::Init(
<< "' does not belong to model's outputs "
<< MakeString(MapKeys(output_info_map_));
}
ws_->CreateTensor(output_name, device_->allocator(), DT_FLOAT);
}
#ifdef MACE_ENABLE_HEXAGON
if (device_type_ == HEXAGON) {
......
......@@ -16,7 +16,6 @@ py_library(
"converter_tool/onnx_converter.py",
"converter_tool/shape_inference.py",
"converter_tool/tensorflow_converter.py",
"converter_tool/tf_dsp_converter.py",
"converter_tool/transformer.py",
"graph_util.py",
],
......
......@@ -45,14 +45,14 @@ data_format_map = {
def parse_data_type(data_type, device_type):
if device_type == cvt.DeviceType.CPU.value or\
if device_type == cvt.DeviceType.CPU.value or \
device_type == cvt.DeviceType.GPU.value:
if data_type == 'fp32_fp32':
return mace_pb2.DT_FLOAT
else:
return mace_pb2.DT_HALF
elif device_type == cvt.DeviceType.HEXAGON.value:
return mace_pb2.DT_UINT8
return mace_pb2.DT_FLOAT
else:
print("Invalid device type: " + device_type)
......@@ -167,45 +167,39 @@ def main(unused_args):
check_node.name = check_node_names[i]
check_node.shape = parse_int_array_from_str(check_node_shapes[i])
option.add_check_node(check_node)
else:
option.check_nodes = option.output_nodes
option.build()
print("Transform model to one that can better run on device")
if FLAGS.runtime == 'dsp' and not option.quantize:
mace_check(FLAGS.platform == 'tensorflow',
'DSP only supports tensorflow')
from mace.python.tools.converter_tool import tf_dsp_converter
converter = tf_dsp_converter.TensorflowDspConverter(
if FLAGS.platform == 'tensorflow':
from mace.python.tools.converter_tool import tensorflow_converter
converter = tensorflow_converter.TensorflowConverter(
option, FLAGS.model_file)
output_graph_def = converter.run()
elif FLAGS.platform == 'caffe':
from mace.python.tools.converter_tool import caffe_converter
converter = caffe_converter.CaffeConverter(option,
FLAGS.model_file,
FLAGS.weight_file)
elif FLAGS.platform == 'onnx':
from mace.python.tools.converter_tool import onnx_converter
converter = onnx_converter.OnnxConverter(option, FLAGS.model_file)
else:
if FLAGS.platform == 'tensorflow':
from mace.python.tools.converter_tool import tensorflow_converter
converter = tensorflow_converter.TensorflowConverter(
option, FLAGS.model_file)
elif FLAGS.platform == 'caffe':
from mace.python.tools.converter_tool import caffe_converter
converter = caffe_converter.CaffeConverter(option,
FLAGS.model_file,
FLAGS.weight_file)
elif FLAGS.platform == 'onnx':
from mace.python.tools.converter_tool import onnx_converter
converter = onnx_converter.OnnxConverter(option, FLAGS.model_file)
else:
six.print_("Mace do not support platorm %s yet." % FLAGS.platform,
file=sys.stderr)
exit(1)
six.print_("Mace do not support platorm %s yet." % FLAGS.platform,
file=sys.stderr)
exit(1)
output_graph_def = converter.run()
mace_transformer = transformer.Transformer(
option, output_graph_def)
output_graph_def, quantize_activation_info = mace_transformer.run()
if FLAGS.runtime == 'dsp':
from mace.python.tools.converter_tool import hexagon_converter
converter = hexagon_converter.HexagonConverter(
option, output_graph_def, quantize_activation_info)
output_graph_def = converter.run()
mace_transformer = transformer.Transformer(
option, output_graph_def)
output_graph_def, quantize_activation_info = mace_transformer.run()
if FLAGS.runtime == 'dsp':
from mace.python.tools.converter_tool import hexagon_converter
converter = hexagon_converter.HexagonConverter(
option, output_graph_def, quantize_activation_info)
output_graph_def = converter.run()
model_saver.save_model(
option, output_graph_def, model_checksum, weight_checksum,
......
......@@ -372,7 +372,7 @@ class ConverterOption(object):
@input_nodes.setter
def input_nodes(self, input_nodes):
for node in input_nodes:
for node in input_nodes.values():
self._input_nodes[node.name] = node
def add_input_node(self, input_node):
......@@ -380,7 +380,7 @@ class ConverterOption(object):
@output_nodes.setter
def output_nodes(self, output_nodes):
for node in output_nodes:
for node in output_nodes.values():
self.output_nodes[node.name] = node
def add_output_node(self, output_node):
......@@ -388,7 +388,7 @@ class ConverterOption(object):
@check_nodes.setter
def check_nodes(self, check_nodes):
for node in check_nodes:
for node in check_nodes.values():
self.check_nodes[node.name] = node
def add_check_node(self, check_node):
......
......@@ -104,7 +104,6 @@ class HexagonConverter(base_converter.ConverterInterface):
output_name = self._option.output_nodes.values()[0].name
else:
output_name = self._option.check_nodes.values()[0].name
output_name = MaceKeyword.mace_output_node_name + '_' + output_name
output_name = normalize_name(output_name)
self._model = graph_util.sort_mace_graph(self._model, output_name)
......@@ -311,9 +310,8 @@ class HexagonConverter(base_converter.ConverterInterface):
return tensor.name
def add_input_output_node(self):
input_node = self._option.input_nodes.values()[0]
for op in self._model.op:
if op.name == input_node.name:
if op.name.startswith(MaceKeyword.mace_input_node_name):
del op.input[0]
break
......@@ -324,8 +322,7 @@ class HexagonConverter(base_converter.ConverterInterface):
output_name = self._option.check_nodes.values()[0].name
output_name = normalize_name(output_name)
for op in self._model.op:
if op.name.startswith(MaceKeyword.mace_output_node_name) \
and op.name.find(output_name) != -1:
if op.name == output_name:
output_node = op
break
mace_check(output_node is not None,
......@@ -348,8 +345,6 @@ class HexagonConverter(base_converter.ConverterInterface):
node_id_counter += 1
node_id_map[op.name] = op.node_id
for ipt in op.input:
if ipt.startswith(MaceKeyword.mace_input_node_name):
ipt = ipt[len(MaceKeyword.mace_input_node_name + '_'):]
op_name, port = get_op_and_port_from_tensor(ipt)
node_id = node_id_map[op_name]
node_input = op.node_input.add()
......
......@@ -119,8 +119,7 @@ class Transformer(base_converter.ConverterInterface):
changed = transformer()
if not changed:
break
self.add_check_nodes()
self.delete_after_check_nodes()
return self._model, self._quantize_activation_info
def filter_format(self):
......@@ -275,7 +274,8 @@ class Transformer(base_converter.ConverterInterface):
input_info.dims.extend(input_node.shape)
input_info.data_type = mace_pb2.DT_FLOAT
for output_node in self._option.output_nodes.values():
output_nodes = self._option.check_nodes.values()
for output_node in output_nodes:
output_info = net.output_info.add()
output_info.name = output_node.name
output_info.data_format = output_node.data_format.value
......@@ -1336,7 +1336,8 @@ class Transformer(base_converter.ConverterInterface):
+ '_' + input_node.name
input_name_map[input_node.name] = new_input_name
for output_node in self._option.output_nodes.values():
output_nodes = self._option.check_nodes.values()
for output_node in output_nodes:
new_output_name = MaceKeyword.mace_output_node_name \
+ '_' + output_node.name
output_name_map[output_node.name] = new_output_name
......@@ -1347,7 +1348,12 @@ class Transformer(base_converter.ConverterInterface):
op.input[i] = input_name_map[op.input[i]]
for i in range(len(op.output)):
if op.output[i] in output_name_map:
op.output[i] = output_name_map[op.output[i]]
op.name = MaceKeyword.mace_output_node_name \
+ '_' + op.name
new_output_name = output_name_map[op.output[i]]
self._quantize_activation_info[new_output_name] = \
self._quantize_activation_info[op.output[i]]
op.output[i] = new_output_name
data_type_arg = ConverterUtil.get_arg(
op, MaceKeyword.mace_op_data_type_str)
......@@ -1368,7 +1374,8 @@ class Transformer(base_converter.ConverterInterface):
for input_node in self._option.input_nodes.values():
op_def = self._model.op.add()
op_def.name = self.normalize_op_name(input_node.name)
op_def.name = \
self.normalize_op_name(input_name_map[input_node.name])
op_def.type = MaceOp.Quantize.name
op_def.input.extend([input_node.name])
op_def.output.extend([input_name_map[input_node.name]])
......@@ -1378,10 +1385,9 @@ class Transformer(base_converter.ConverterInterface):
ConverterUtil.add_data_type_arg(op_def, mace_pb2.DT_UINT8)
ConverterUtil.add_data_format_arg(op_def, DataFormat.NHWC)
for output_node in self._option.output_nodes.values():
for output_node in output_nodes:
op_def = self._model.op.add()
op_def.name = self.normalize_op_name(
output_name_map[output_node.name])
op_def.name = self.normalize_op_name(output_node.name)
op_def.type = MaceOp.Dequantize.name
op_def.input.extend([output_name_map[output_node.name]])
op_def.output.extend([output_node.name])
......@@ -1690,34 +1696,17 @@ class Transformer(base_converter.ConverterInterface):
arg.i = mace_pb2.GPU_IMAGE if self._option.cl_mem_type == "image"\
else mace_pb2.GPU_BUFFER
def add_check_nodes(self):
if self._option.check_nodes:
def delete_after_check_nodes(self):
if self._option.check_nodes != self._option.output_nodes:
mace_check(len(self._option.check_nodes) == 1,
"Only support one check node now.")
check_node = None
for i in six.moves.range(len(self._model.op)):
if self._model.op[i].name in self._option.check_nodes:
if self._model.op[i].output[0] in self._option.check_nodes:
check_node = self._model.op[i]
del self._model.op[i+1:]
break
mace_check(check_node is not None, "check node not found.")
output_name = \
MaceKeyword.mace_output_node_name + '_' + check_node.name
op_def = self._model.op.add()
op_def.name = self.normalize_op_name(output_name)
op_def.type = MaceOp.Dequantize.name
op_def.input.extend([check_node.output[0]])
op_def.output.extend([output_name])
output_shape = op_def.output_shape.add()
output_shape.dims.extend(check_node.output_shape[0].dims)
ConverterUtil.add_data_type_arg(op_def, mace_pb2.DT_UINT8)
op_def.output_type.extend([mace_pb2.DT_FLOAT])
del self._model.output_info[:]
output_info = self._model.output_info.add()
output_info.name = check_node.name
output_info.dims.extend(check_node.output_shape[0].dims)
output_info.data_type = mace_pb2.DT_FLOAT
def transform_caffe_reshape_and_flatten(self):
net = self._model
......
......@@ -36,197 +36,6 @@
#ifndef THIRD_PARTY_NNLIB_HEXAGON_NN_H_
#define THIRD_PARTY_NNLIB_HEXAGON_NN_H_
#ifdef MACE_USE_NNLIB_OLD
#ifndef __QAIC_HEADER
#define __QAIC_HEADER(ff) ff
#endif // __QAIC_HEADER
#ifndef __QAIC_HEADER_EXPORT
#define __QAIC_HEADER_EXPORT
#endif // __QAIC_HEADER_EXPORT
#ifndef __QAIC_HEADER_ATTRIBUTE
#define __QAIC_HEADER_ATTRIBUTE
#endif // __QAIC_HEADER_ATTRIBUTE
#ifndef __QAIC_IMPL
#define __QAIC_IMPL(ff) ff
#endif // __QAIC_IMPL
#ifndef __QAIC_IMPL_EXPORT
#define __QAIC_IMPL_EXPORT
#endif // __QAIC_IMPL_EXPORT
#ifndef __QAIC_IMPL_ATTRIBUTE
#define __QAIC_IMPL_ATTRIBUTE
#endif // __QAIC_IMPL_ATTRIBUTE
#ifdef __cplusplus
extern "C" {
#endif
#if !defined(__QAIC_STRING1_OBJECT_DEFINED__) && !defined(__STRING1_OBJECT__)
#define __QAIC_STRING1_OBJECT_DEFINED__
#define __STRING1_OBJECT__
typedef struct _cstring1_s {
char *data;
int dataLen;
} _cstring1_t;
#endif /* __QAIC_STRING1_OBJECT_DEFINED__ */
typedef struct hexagon_nn_input hexagon_nn_input;
struct hexagon_nn_input {
unsigned int src_id;
unsigned int output_idx;
};
typedef struct hexagon_nn_output hexagon_nn_output;
struct hexagon_nn_output {
unsigned int max_size;
unsigned int unused;
};
typedef struct hexagon_nn_perfinfo hexagon_nn_perfinfo;
struct hexagon_nn_perfinfo {
unsigned int node_id;
unsigned int node_type;
unsigned int executions;
unsigned int unused;
unsigned int counter_lo;
unsigned int counter_hi;
};
typedef int hexagon_nn_nn_id;
enum hexagon_nn_padding_type {
NN_PAD_NA,
NN_PAD_SAME,
NN_PAD_VALID,
NN_PAD_MIRROR_REFLECT,
NN_PAD_MIRROR_SYMMETRIC,
NN_PAD_SAME_CAFFE,
_32BIT_PLACEHOLDER_hexagon_nn_padding_type = 0x7fffffff
};
typedef enum hexagon_nn_padding_type hexagon_nn_padding_type;
typedef struct hexagon_nn_tensordef hexagon_nn_tensordef;
struct hexagon_nn_tensordef {
unsigned int batches;
unsigned int height;
unsigned int width;
unsigned int depth;
unsigned char *data;
int dataLen;
unsigned int data_valid_len;
unsigned int unused;
};
typedef struct hexagon_nn_op_node hexagon_nn_op_node;
struct hexagon_nn_op_node {
unsigned int node_id;
unsigned int operation;
hexagon_nn_padding_type padding;
hexagon_nn_input *inputs;
int inputsLen;
hexagon_nn_output *outputs;
int outputsLen;
};
typedef struct hexagon_nn_const_node hexagon_nn_const_node;
struct hexagon_nn_const_node {
unsigned int node_id;
hexagon_nn_tensordef tensor;
};
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_config)(void)
__QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_init)(void)
__QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_set_debug_level)(
hexagon_nn_nn_id id, int level) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_set_graph_mode)(
hexagon_nn_nn_id id, int mode) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_snpprint)(hexagon_nn_nn_id id,
unsigned char *buf,
int bufLen)
__QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_getlog)(hexagon_nn_nn_id id,
unsigned char *buf,
int bufLen)
__QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_node)(
hexagon_nn_nn_id id,
unsigned int node_id,
unsigned int operation,
hexagon_nn_padding_type padding,
const hexagon_nn_input *inputs,
int inputsLen,
const hexagon_nn_output *outputs,
int outputsLen) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_node_list)(
hexagon_nn_nn_id id,
const hexagon_nn_op_node *ops,
int opsLen) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_const_node)(
hexagon_nn_nn_id id,
unsigned int node_id,
unsigned int batches,
unsigned int height,
unsigned int width,
unsigned int depth,
const unsigned char *data,
int dataLen) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_const_node_list)(
hexagon_nn_nn_id id,
const hexagon_nn_const_node *consts,
int constsLen) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_prepare)(hexagon_nn_nn_id id)
__QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_execute)(
hexagon_nn_nn_id id,
unsigned int batches_in,
unsigned int height_in,
unsigned int width_in,
unsigned int depth_in,
const unsigned char *data_in,
int data_inLen,
unsigned int *batches_out,
unsigned int *height_out,
unsigned int *width_out,
unsigned int *depth_out,
unsigned char *data_out,
int data_outLen,
unsigned int *data_len_out) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_teardown)(hexagon_nn_nn_id id)
__QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_set_powersave_level)(
unsigned int level) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_get_perfinfo)(
hexagon_nn_nn_id id,
hexagon_nn_perfinfo *info_out,
int info_outLen,
unsigned int *n_items) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_reset_perfinfo)(
hexagon_nn_nn_id id, unsigned int event) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_last_execution_cycles)(
hexagon_nn_nn_id id,
unsigned int *cycles_lo,
unsigned int *cycles_hi) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_version)(int *ver)
__QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_op_name_to_id)(
const char *name, unsigned int *node_id) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_op_id_to_name)(
unsigned int node_id, char *name, int nameLen) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_disable_dcvs)(void)
__QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_GetHexagonBinaryVersion)(
int *ver) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_PrintLog)(
const unsigned char *buf, int bufLen) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_execute_new)(
hexagon_nn_nn_id id,
const hexagon_nn_tensordef *inputs,
int inputsLen,
hexagon_nn_tensordef *outputs,
int outputsLen) __QAIC_HEADER_ATTRIBUTE;
#ifdef __cplusplus
}
#endif
#elif defined(MACE_USE_NNLIB_2_1) // nnlib version
#ifndef __QAIC_HEADER
#define __QAIC_HEADER(ff) ff
#endif //__QAIC_HEADER
......@@ -370,200 +179,4 @@ __QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_execute_new)(hexagon_nn_nn_id
}
#endif
#else // nnlib version : MACE_USE_NNLIB_CAF
#ifndef __QAIC_HEADER
#define __QAIC_HEADER(ff) ff
#endif //__QAIC_HEADER
#ifndef __QAIC_HEADER_EXPORT
#define __QAIC_HEADER_EXPORT
#endif // __QAIC_HEADER_EXPORT
#ifndef __QAIC_HEADER_ATTRIBUTE
#define __QAIC_HEADER_ATTRIBUTE
#endif // __QAIC_HEADER_ATTRIBUTE
#ifndef __QAIC_IMPL
#define __QAIC_IMPL(ff) ff
#endif //__QAIC_IMPL
#ifndef __QAIC_IMPL_EXPORT
#define __QAIC_IMPL_EXPORT
#endif // __QAIC_IMPL_EXPORT
#ifndef __QAIC_IMPL_ATTRIBUTE
#define __QAIC_IMPL_ATTRIBUTE
#endif // __QAIC_IMPL_ATTRIBUTE
#ifdef __cplusplus
extern "C" {
#endif
#if !defined(__QAIC_STRING1_OBJECT_DEFINED__) && !defined(__STRING1_OBJECT__)
#define __QAIC_STRING1_OBJECT_DEFINED__
#define __STRING1_OBJECT__
typedef struct _cstring1_s {
char *data;
int dataLen;
} _cstring1_t;
#endif /* __QAIC_STRING1_OBJECT_DEFINED__ */
typedef struct hexagon_nn_input hexagon_nn_input;
struct hexagon_nn_input {
unsigned int src_id;
unsigned int output_idx;
};
typedef struct hexagon_nn_output hexagon_nn_output;
struct hexagon_nn_output {
unsigned int rank;
unsigned int max_sizes[8];
unsigned int elementsize;
int zero_offset;
float stepsize;
};
typedef struct hexagon_nn_perfinfo hexagon_nn_perfinfo;
struct hexagon_nn_perfinfo {
unsigned int node_id;
unsigned int node_type;
unsigned int executions;
unsigned int unused;
unsigned int counter_lo;
unsigned int counter_hi;
};
typedef int hexagon_nn_nn_id;
enum hexagon_nn_padding_type {
NN_PAD_NA,
NN_PAD_SAME,
NN_PAD_VALID,
NN_PAD_MIRROR_REFLECT,
NN_PAD_MIRROR_SYMMETRIC,
NN_PAD_SAME_CAFFE,
_32BIT_PLACEHOLDER_hexagon_nn_padding_type = 0x7fffffff
};
typedef enum hexagon_nn_padding_type hexagon_nn_padding_type;
typedef struct hexagon_nn_tensordef hexagon_nn_tensordef;
struct hexagon_nn_tensordef {
unsigned int batches;
unsigned int height;
unsigned int width;
unsigned int depth;
unsigned char *data;
int dataLen;
unsigned int data_valid_len;
unsigned int unused;
};
typedef struct hexagon_nn_op_node hexagon_nn_op_node;
struct hexagon_nn_op_node {
unsigned int node_id;
unsigned int operation;
hexagon_nn_padding_type padding;
hexagon_nn_input *inputs;
int inputsLen;
hexagon_nn_output *outputs;
int outputsLen;
};
typedef struct hexagon_nn_const_node hexagon_nn_const_node;
struct hexagon_nn_const_node {
unsigned int node_id;
hexagon_nn_tensordef tensor;
};
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_config)(void)
__QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_init)(hexagon_nn_nn_id *g)
__QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_set_debug_level)(
hexagon_nn_nn_id id, int level) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_set_graph_mode)(
hexagon_nn_nn_id id, int mode) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_snpprint)(hexagon_nn_nn_id id,
unsigned char *buf,
int bufLen)
__QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_getlog)(hexagon_nn_nn_id id,
unsigned char *buf,
int bufLen)
__QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_node)(
hexagon_nn_nn_id id,
unsigned int node_id,
unsigned int operation,
hexagon_nn_padding_type padding,
const hexagon_nn_input *inputs,
int inputsLen,
const hexagon_nn_output *outputs,
int outputsLen) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_node_list)(
hexagon_nn_nn_id id,
const hexagon_nn_op_node *ops,
int opsLen) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_const_node)(
hexagon_nn_nn_id id,
unsigned int node_id,
unsigned int batches,
unsigned int height,
unsigned int width,
unsigned int depth,
const unsigned char *data,
int dataLen) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_append_const_node_list)(
hexagon_nn_nn_id id,
const hexagon_nn_const_node *consts,
int constsLen) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_prepare)(hexagon_nn_nn_id id)
__QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_execute)(
hexagon_nn_nn_id id,
unsigned int batches_in,
unsigned int height_in,
unsigned int width_in,
unsigned int depth_in,
const unsigned char *data_in,
int data_inLen,
unsigned int *batches_out,
unsigned int *height_out,
unsigned int *width_out,
unsigned int *depth_out,
unsigned char *data_out,
int data_outLen,
unsigned int *data_len_out) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_teardown)(hexagon_nn_nn_id id)
__QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_set_powersave_level)(
unsigned int level) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_get_perfinfo)(
hexagon_nn_nn_id id,
hexagon_nn_perfinfo *info_out,
int info_outLen,
unsigned int *n_items) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_reset_perfinfo)(
hexagon_nn_nn_id id, unsigned int event) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_last_execution_cycles)(
hexagon_nn_nn_id id,
unsigned int *cycles_lo,
unsigned int *cycles_hi) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_version)(int *ver)
__QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_op_name_to_id)(
const char *name, unsigned int *node_id) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_op_id_to_name)(
unsigned int node_id, char *name, int nameLen) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_disable_dcvs)(void)
__QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_GetHexagonBinaryVersion)(
int *ver) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_PrintLog)(
const unsigned char *buf, int bufLen) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(hexagon_nn_execute_new)(
hexagon_nn_nn_id id,
const hexagon_nn_tensordef *inputs,
int inputsLen,
hexagon_nn_tensordef *outputs,
int outputsLen) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT unsigned int __QAIC_HEADER(hexagon_nn_get_dsp_offset)(void)
__QAIC_HEADER_ATTRIBUTE;
#ifdef __cplusplus
}
#endif
#endif // nnlib version
#endif // THIRD_PARTY_NNLIB_HEXAGON_NN_H_
......@@ -79,147 +79,6 @@
*/
// NOLINT(build/header_guard)
#ifdef MACE_USE_NNLIB_OLD
DEF_OP(INPUT)
DEF_OP(OUTPUT)
DEF_OP(Nop)
DEF_OP(Const)
DEF_OP(Check)
DEF_OP(Close_f)
DEF_OP(Close_quint8)
DEF_OP(Close_q_quint8)
DEF_OP(Close_int32)
DEF_OP(Close_qint32)
DEF_OP(PPrint_8)
DEF_OP(PPrint_32)
DEF_OP(PPrint_f)
DEF_OP(PreFree)
DEF_OP(Flatten)
#ifndef DEF_OP_WREF
#define DEF_OP_WREF(NAME) DEF_OP(NAME) DEF_OP(NAME##_ref)
#define __SELF_DEF_OP_WREF
#endif
DEF_OP_WREF(QuantizedConv2d_8x8to32)
DEF_OP_WREF(QuantizedMatMul_8x8to32)
DEF_OP_WREF(QuantizeDownAndShrinkRange_32to8)
DEF_OP_WREF(QuantizedRelu_8)
DEF_OP_WREF(QuantizedReluX_8)
DEF_OP_WREF(QuantizedMaxPool_8)
DEF_OP_WREF(QuantizedAvgPool_8)
DEF_OP_WREF(QuantizedConcat_8)
DEF_OP_WREF(QuantizedBiasAdd_8p8to32)
DEF_OP_WREF(Min_f)
DEF_OP_WREF(Max_f)
DEF_OP_WREF(Quantize)
DEF_OP_WREF(Dequantize)
DEF_OP_WREF(Supernode_8x8p8to8)
DEF_OP(QuantizedFlatten)
DEF_OP(Softmax_f)
DEF_OP(Conv2d_f)
DEF_OP(MatMul_f)
DEF_OP(Relu_f)
DEF_OP(ReluX_f)
DEF_OP(AvgPool_f)
DEF_OP(MaxPool_f)
DEF_OP(Concat_f)
DEF_OP(BiasAdd_f)
DEF_OP(LRN_f)
DEF_OP(Variable)
DEF_OP(Assign)
DEF_OP(Reshape)
DEF_OP(QuantizedReshape)
DEF_OP(Tanh_f)
DEF_OP(Sigmoid_f)
DEF_OP(Slice_8)
DEF_OP(Slice_f)
DEF_OP(QuantizedSlice_8)
DEF_OP(Add_f)
DEF_OP(Mul_f)
DEF_OP(Minimum_f)
DEF_OP(Maximum_f)
DEF_OP_WREF(Requantize_32to8)
DEF_OP_WREF(RequantizationRange_32)
DEF_OP(Neg_f)
DEF_OP(Sub_f)
DEF_OP(AddN_f)
DEF_OP(Range_int32)
DEF_OP(Rank_int32)
DEF_OP(Transpose_int32)
DEF_OP(Transpose_f)
DEF_OP(InstanceNorm_f)
DEF_OP_WREF(QuantizedInstanceNorm_8)
DEF_OP(Sub_int32)
DEF_OP(Add_int32)
DEF_OP(Split_f)
DEF_OP(Dequantize_qint32_f)
DEF_OP(PRelu_f)
DEF_OP_WREF(QuantizedPRelu_8)
DEF_OP(Sum_f)
DEF_OP(Prod_f)
DEF_OP(Mul_int32)
DEF_OP(LogicalAnd_int32)
DEF_OP(LogicalOr_int32)
DEF_OP(LogicalXor_int32)
DEF_OP(Shape_int32)
DEF_OP(Pack_int32)
DEF_OP(MirrorPad_f)
DEF_OP(ResizeNearestNeighbor_f)
DEF_OP(StridedSlice_int32)
DEF_OP(StridedSlice_f)
DEF_OP(ExpandDims_int32)
DEF_OP(ExpandDims_f)
DEF_OP(LogSoftmax_f)
DEF_OP(Split_int32)
DEF_OP(QuantizedSplit_8)
DEF_OP(Deconv_f)
DEF_OP_WREF(QuantizedDeconv_8x8to32)
DEF_OP_WREF(QuantizedMul_8x8to32)
DEF_OP_WREF(QuantizedAdd_8p8to32)
DEF_OP_WREF(QuantizedSigmoid_8)
DEF_OP_WREF(QuantizedTanh_8)
DEF_OP_WREF(QuantizedSoftmax_8)
DEF_OP_WREF(QuantizedLRN_8)
DEF_OP_WREF(QuantizedSub_8p8to32)
DEF_OP_WREF(QuantizedMaximum_8)
DEF_OP_WREF(QuantizedMinimum_8)
DEF_OP(Pad_f)
DEF_OP(SpaceToBatchND_f)
DEF_OP(BatchToSpaceND_f)
DEF_OP(QuantizedSpaceToBatchND_8)
DEF_OP(QuantizedBatchToSpaceND_8)
DEF_OP(QuantizedPad_8)
DEF_OP(ResizeBilinear_f)
DEF_OP(QuantizedResizeBilinear_8)
DEF_OP(ConcatV2_f)
DEF_OP(ConcatV2_int32)
DEF_OP(Prod_int32)
DEF_OP(Slice_int32)
DEF_OP(QuantizedAdd_8p8to8)
DEF_OP_WREF(AutoQuantize)
DEF_OP_WREF(QuantizedDepthwiseConv2d_8x8to32)
DEF_OP(DepthwiseConv2d_f)
DEF_OP(QuantizedBiasAdd_8p8to8)
#ifdef __SELF_DEF_OP_WREF
#undef __SELF_DEF_OP_WREF
#undef DEF_OP_WREF
#endif
#elif defined(MACE_USE_NNLIB_2_1) // nnlib version
DEF_OP(INPUT)
DEF_OP(OUTPUT)
DEF_OP(Nop)
......@@ -441,214 +300,3 @@ DEF_OP(QuantizedChannelShuffle_8)
#undef __SELF_DEF_OP_WREF
#undef DEF_OP_WREF
#endif
#else // nnlib version : MACE_USE_NNLIB_CAF
DEF_OP(INPUT)
DEF_OP(OUTPUT)
DEF_OP(Nop)
DEF_OP(Const)
DEF_OP(Check)
DEF_OP(Close_f)
DEF_OP(Close_quint8)
DEF_OP(Close_q_quint8)
DEF_OP(Close_int32)
DEF_OP(Close_qint32)
DEF_OP(PPrint_8)
DEF_OP(PPrint_32)
DEF_OP(PPrint_f)
DEF_OP(PreFree)
DEF_OP(Flatten)
#ifndef DEF_OP_WREF
#define DEF_OP_WREF(NAME) DEF_OP(NAME) DEF_OP(NAME##_ref)
#define __SELF_DEF_OP_WREF
#endif
DEF_OP_WREF(QuantizedConv2d_8x8to32)
DEF_OP_WREF(QuantizedMatMul_8x8to32)
DEF_OP_WREF(QuantizeDownAndShrinkRange_32to8)
DEF_OP_WREF(QuantizedRelu_8)
DEF_OP_WREF(QuantizedReluX_8)
DEF_OP_WREF(QuantizedMaxPool_8)
DEF_OP_WREF(QuantizedAvgPool_8)
DEF_OP_WREF(QuantizedL2Pool_8)
DEF_OP_WREF(QuantizedConcat_8)
DEF_OP_WREF(QuantizedBiasAdd_8p8to32)
DEF_OP_WREF(Min_f)
DEF_OP_WREF(Max_f)
DEF_OP_WREF(Quantize)
DEF_OP_WREF(Dequantize)
DEF_OP_WREF(Supernode_8x8p8to8)
DEF_OP(QuantizedFlatten)
DEF_OP(Softmax_f)
DEF_OP(Conv2d_f)
DEF_OP(MatMul_f)
DEF_OP(Relu_f)
DEF_OP(ReluX_f)
DEF_OP(AvgPool_f)
DEF_OP(L2Pool_f)
DEF_OP(MaxPool_f)
DEF_OP(Concat_f)
DEF_OP(BiasAdd_f)
DEF_OP(LRN_f)
DEF_OP(Variable)
DEF_OP(Assign)
DEF_OP(Reshape)
DEF_OP(QuantizedReshape)
DEF_OP(Tanh_f)
DEF_OP(Sigmoid_f)
DEF_OP(Slice_8)
DEF_OP(Slice_f)
DEF_OP(QuantizedSlice_8)
DEF_OP(Add_f)
DEF_OP(Mul_f)
DEF_OP(Minimum_f)
DEF_OP(Maximum_f)
DEF_OP_WREF(Requantize_32to8)
DEF_OP_WREF(RequantizationRange_32)
DEF_OP(Neg_f)
DEF_OP(Sub_f)
DEF_OP(AddN_f)
DEF_OP(Range_int32)
DEF_OP(Rank_int32)
DEF_OP(Transpose_int32)
DEF_OP(Transpose_f)
DEF_OP(InstanceNorm_f)
DEF_OP_WREF(QuantizedInstanceNorm_8)
DEF_OP(Sub_int32)
DEF_OP(Add_int32)
DEF_OP(Split_f)
DEF_OP(Dequantize_qint32_f)
DEF_OP(PRelu_f)
DEF_OP_WREF(QuantizedPRelu_8)
DEF_OP(Sum_f)
DEF_OP(Prod_f)
DEF_OP(Mul_int32)
DEF_OP(LogicalAnd_int32)
DEF_OP(LogicalOr_int32)
DEF_OP(LogicalXor_int32)
DEF_OP(Shape_int32)
DEF_OP(Pack_int32)
DEF_OP(MirrorPad_f)
DEF_OP(ResizeNearestNeighbor_f)
DEF_OP(StridedSlice_int32)
DEF_OP(StridedSlice_f)
DEF_OP(ExpandDims_int32)
DEF_OP(ExpandDims_f)
DEF_OP(LogSoftmax_f)
DEF_OP(Split_int32)
DEF_OP(QuantizedSplit_8)
DEF_OP(Deconv_f)
DEF_OP_WREF(QuantizedDeconv_8x8to32)
DEF_OP_WREF(QuantizedMul_8x8to32)
DEF_OP_WREF(QuantizedAdd_8p8to32)
DEF_OP_WREF(QuantizedSigmoid_8)
DEF_OP_WREF(QuantizedTanh_8)
DEF_OP_WREF(QuantizedSoftmax_8)
DEF_OP_WREF(QuantizedLRN_8)
DEF_OP_WREF(Quantizedpad2d_frame_8p)
DEF_OP_WREF(QuantizedSub_8p8to32)
DEF_OP_WREF(QuantizedMaximum_8)
DEF_OP_WREF(QuantizedMinimum_8)
DEF_OP(Pad_f)
DEF_OP(SpaceToBatchND_f)
DEF_OP(BatchToSpaceND_f)
DEF_OP(QuantizedPad_8)
DEF_OP(ResizeBilinear_f)
DEF_OP(ConcatV2_f)
DEF_OP(ConcatV2_int32)
DEF_OP(Prod_int32)
DEF_OP(Slice_int32)
DEF_OP(QuantizedAdd_8p8to8)
DEF_OP(QuantizedResizeBilinear_8)
DEF_OP(Supernode_8x8p8to8_d32)
DEF_OP(Convert_to_d32)
DEF_OP(Convert_from_d32)
DEF_OP_WREF(QuantizedMaxPool_8_d32)
DEF_OP_WREF(QuantizedConcat_8_d32)
DEF_OP_WREF(QuantizedAvgPool_8_d32)
DEF_OP(Sink)
DEF_OP_WREF(QuantizedPRelu_8_d32)
DEF_OP_WREF(AutoQuantize)
DEF_OP_WREF(QuantizedDepthwiseConv2d_8x8to32)
DEF_OP_WREF(DepthwiseConv2d_f)
DEF_OP(DepthwiseSupernode_8x8p8to8)
DEF_OP(DepthwiseSupernode_8x8p8to8_d32)
DEF_OP_WREF(QuantizedMul_8x8to8_d32)
DEF_OP(FullyConnected_u8)
#if 0
DEF_OP_WREF(QuantizedFC_8x8p8to8)
#endif
DEF_OP_WREF(QuantizedAdd_8p8to8_d32)
DEF_OP_WREF(QuantizedClamp_8)
DEF_OP(Clamp_f)
DEF_OP(QuantizeForTest_d32)
DEF_OP(Close_d32)
DEF_OP_WREF(QuantizedSub_8p8to8_d32)
DEF_OP(InputSupernode_8x8p8to8_outd32)
DEF_OP(QuantizedLRN_8_d32)
DEF_OP_WREF(QuantizedBiasAdd_32p32to32)
DEF_OP_WREF(Quantize_int32)
DEF_OP(Supernode_8x8p32to8)
DEF_OP(DepthwiseSupernode_8x8p32to8)
DEF_OP(Supernode_8x8p32to8_d32)
DEF_OP(DepthwiseSupernode_8x8p32to8_d32)
DEF_OP(InputSupernode_8x8p32to8_outd32)
DEF_OP(PPrint_8_d32)
DEF_OP(PPrintWithPadding_8_d32)
DEF_OP_WREF(AutoQuantize_d32)
DEF_OP_WREF(QuantizedTanh_8_d32)
DEF_OP_WREF(QuantizedSigmoid_8_d32)
DEF_OP_WREF(QuantizedSoftmax_8_d32)
DEF_OP_WREF(QuantizedL2Pool_8_d32)
DEF_OP(Gather_f)
DEF_OP(Gather_int32)
DEF_OP(Gather_8)
DEF_OP(Table_f)
DEF_OP(Table_int32)
DEF_OP(Table_8)
DEF_OP(FillPadding_8_d32)
DEF_OP(QuantizedResizeBilinear_8_d32)
DEF_OP(QuantizeINPUT_f_to_8)
DEF_OP_WREF(DeconvBias_8x8to32)
DEF_OP(SpaceToBatchND_8)
DEF_OP(BatchToSpaceND_8)
DEF_OP(SpaceToDepth_f)
DEF_OP(DepthToSpace_f)
DEF_OP(SpaceToDepth_8)
DEF_OP(DepthToSpace_8)
#ifdef __SELF_DEF_OP_WREF
#undef __SELF_DEF_OP_WREF
#undef DEF_OP_WREF
#endif
#endif // nnlib version
......@@ -9,7 +9,6 @@ build --copt=-fPIC
build --copt=-D_GLIBCXX_USE_C99_MATH_TR1
build --copt=-DMACE_OBFUSCATE_LITERALS
build --copt=-DGEMMLOWP_USE_OPENMP
build --copt=-DMACE_USE_NNLIB_CAF
# Usage example: bazel build --config symbol_hidden
build:symbol_hidden --copt=-fvisibility=hidden
......
......@@ -445,7 +445,8 @@ def format_model_config(flags):
threshold_dict = {
DeviceType.CPU: ValidationThreshold.cpu_threshold,
DeviceType.GPU: ValidationThreshold.gpu_threshold,
DeviceType.HEXAGON: ValidationThreshold.hexagon_threshold,
DeviceType.HEXAGON + "_QUANTIZE":
ValidationThreshold.hexagon_threshold,
DeviceType.CPU + "_QUANTIZE":
ValidationThreshold.cpu_quantize_threshold,
}
......
......@@ -515,6 +515,12 @@ class DeviceWrapper:
for runtime in runtime_list:
device_type = parse_device_type(runtime)
# run for specified soc
if not subgraphs[0][YAMLKeyword.check_tensors]:
output_nodes = subgraphs[0][YAMLKeyword.output_tensors]
output_shapes = subgraphs[0][YAMLKeyword.output_shapes]
else:
output_nodes = subgraphs[0][YAMLKeyword.check_tensors]
output_shapes = subgraphs[0][YAMLKeyword.check_shapes]
run_output = self.tuning_run(
abi=target_abi,
target_dir=build_tmp_binary_dir,
......@@ -523,9 +529,9 @@ class DeviceWrapper:
embed_model_data=embed_model_data,
model_output_dir=model_output_dir,
input_nodes=subgraphs[0][YAMLKeyword.input_tensors],
output_nodes=subgraphs[0][YAMLKeyword.output_tensors],
output_nodes=output_nodes,
input_shapes=subgraphs[0][YAMLKeyword.input_shapes],
output_shapes=subgraphs[0][YAMLKeyword.output_shapes],
output_shapes=output_shapes,
mace_model_dir=mace_model_dir,
model_tag=model_name,
device_type=device_type,
......@@ -568,9 +574,9 @@ class DeviceWrapper:
platform=model_config[YAMLKeyword.platform],
device_type=device_type,
input_nodes=subgraphs[0][YAMLKeyword.input_tensors],
output_nodes=subgraphs[0][YAMLKeyword.output_tensors],
output_nodes=output_nodes,
input_shapes=subgraphs[0][YAMLKeyword.input_shapes],
output_shapes=subgraphs[0][YAMLKeyword.output_shapes],
output_shapes=output_shapes,
model_output_dir=model_output_dir,
input_data_types=subgraphs[0][
YAMLKeyword.input_data_types],
......@@ -961,7 +967,8 @@ class DeviceManager:
YAMLKeyword.address: adb[0],
YAMLKeyword.username: '',
}
devices.append(android)
if android not in devices:
devices.append(android)
return devices
@classmethod
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册