提交 33892145 编写于 作者: 李寅

Merge branch 'hexagon' into 'master'

Support Mace to Hexagon

See merge request !849
......@@ -324,7 +324,9 @@ void HexagonControlWrapper::GetPerfInfo() {
}
++node_type_counters[node_type].first;
node_type_counters[node_type].second += node_id_counters[node_id];
total_duration += node_id_counters[node_id];
if (node_type.compare("Const") != 0) {
total_duration += node_id_counters[node_id];
}
}
for (auto &node_type_counter : node_type_counters) {
......@@ -332,7 +334,7 @@ void HexagonControlWrapper::GetPerfInfo() {
<< ", time: " << node_type_counter.second.first
<< ", duration: " << node_type_counter.second.second;
}
LOG(INFO) << "total duration: " << total_duration;
LOG(INFO) << "total duration: " << std::fixed << total_duration;
}
void HexagonControlWrapper::ResetPerfInfo() {
......
......@@ -36,7 +36,9 @@ message ConstTensor {
optional int64 data_size = 7;
optional float scale = 8;
optional int32 zero_point = 9;
optional bool quantized = 10 [default = false];
optional float minval = 10;
optional float maxval = 11;
optional bool quantized = 12 [default = false];
optional uint32 node_id = 100;
}
......
......@@ -10,18 +10,19 @@ py_library(
name = "converter_lib",
srcs = [
"convert_util.py",
"graph_util.py",
"converter_tool/tf_dsp_converter.py",
"converter_tool/base_converter.py",
"converter_tool/caffe_converter.py",
"converter_tool/hexagon_converter.py",
"converter_tool/shape_inference.py",
"converter_tool/tensorflow_converter.py",
"converter_tool/caffe_converter.py",
"converter_tool/tf_dsp_converter.py",
"converter_tool/transformer.py",
"graph_util.py",
],
srcs_version = "PY2AND3",
deps = [
":quantization_lib",
":memory_optimizer",
":quantization_lib",
"//mace/proto:mace_py",
"//third_party/caffe:caffe_py",
],
......
......@@ -132,15 +132,30 @@ def main(unused_args):
option.add_input_node(input_node)
output_node_names = FLAGS.output_node.split(',')
output_node_shapes = FLAGS.output_shape.split(':')
if len(output_node_names) != len(output_node_shapes):
raise Exception('output node count and shape count do not match.')
for i in six.moves.range(len(output_node_names)):
output_node = cvt.NodeInfo()
output_node.name = output_node_names[i]
output_node.shape = parse_int_array_from_str(output_node_shapes[i])
option.add_output_node(output_node)
if FLAGS.check_node != '':
check_node_names = FLAGS.check_node.split(',')
check_node_shapes = FLAGS.check_shape.split(':')
if len(check_node_names) != len(check_node_shapes):
raise Exception('check node count and shape count do not match.')
for i in six.moves.range(len(check_node_names)):
check_node = cvt.NodeInfo()
check_node.name = check_node_names[i]
check_node.shape = parse_int_array_from_str(check_node_shapes[i])
option.add_check_node(check_node)
option.build()
print("Transform model to one that can better run on device")
if FLAGS.runtime == 'dsp':
if FLAGS.runtime == 'dsp' and not option.quantize:
mace_check(FLAGS.platform == 'tensorflow',
'DSP only supports tensorflow')
from mace.python.tools.converter_tool import tf_dsp_converter
......@@ -172,7 +187,7 @@ def main(unused_args):
FLAGS.data_type, cvt.DeviceType.GPU.value)
mace_gpu_transformer = transformer.Transformer(
option, output_graph_def)
output_graph_def = mace_gpu_transformer.run()
output_graph_def, _ = mace_gpu_transformer.run()
six.print_("start optimize gpu memory.")
memory_optimizer.optimize_gpu_memory(output_graph_def)
six.print_("GPU memory optimization done.")
......@@ -183,7 +198,7 @@ def main(unused_args):
option.disable_transpose_filters()
mace_cpu_transformer = transformer.Transformer(
option, cpu_graph_def)
cpu_graph_def = mace_cpu_transformer.run()
cpu_graph_def, _ = mace_cpu_transformer.run()
print("start optimize cpu memory.")
memory_optimizer.optimize_cpu_memory(cpu_graph_def)
print("CPU memory optimization done.")
......@@ -206,13 +221,21 @@ def main(unused_args):
FLAGS.data_type, option.device)
mace_transformer = transformer.Transformer(
option, output_graph_def)
output_graph_def = mace_transformer.run()
output_graph_def, quantize_activation_info = mace_transformer.run()
if FLAGS.runtime == 'dsp':
from mace.python.tools.converter_tool import hexagon_converter
converter = hexagon_converter.HexagonConverter(
option, output_graph_def, quantize_activation_info)
output_graph_def = converter.run()
print("start optimize memory.")
if FLAGS.runtime == 'gpu':
memory_optimizer.optimize_gpu_memory(output_graph_def)
elif FLAGS.runtime == 'cpu':
memory_optimizer.optimize_cpu_memory(output_graph_def)
elif FLAGS.runtime == 'dsp':
pass
else:
mace_check(False, "runtime only support [gpu|cpu|dsp]")
......@@ -272,6 +295,8 @@ def parse_args():
help="e.g., input_node")
parser.add_argument(
"--output_node", type=str, default="softmax", help="e.g., softmax")
parser.add_argument(
"--check_node", type=str, default="softmax", help="e.g., softmax")
parser.add_argument(
"--template_dir", type=str, default="", help="template path")
parser.add_argument(
......@@ -297,6 +322,10 @@ def parse_args():
"--input_shape", type=str, default="", help="input shape.")
parser.add_argument(
"--input_range", type=str, default="", help="input range.")
parser.add_argument(
"--output_shape", type=str, default="", help="output shape.")
parser.add_argument(
"--check_shape", type=str, default="", help="check shape.")
parser.add_argument(
"--platform", type=str, default="tensorflow", help="tensorflow/caffe")
parser.add_argument(
......
......@@ -40,6 +40,7 @@ class PaddingMode(Enum):
VALID = 0
SAME = 1
FULL = 2
NA = 3
class PoolingType(Enum):
......@@ -270,6 +271,7 @@ class ConverterOption(object):
def __init__(self):
self._input_nodes = {}
self._output_nodes = {}
self._check_nodes = {}
self._data_type = mace_pb2.DT_FLOAT
self._device = DeviceType.CPU.value
self._winograd = 0
......@@ -287,6 +289,10 @@ class ConverterOption(object):
def output_nodes(self):
return self._output_nodes
@property
def check_nodes(self):
return self._check_nodes
@property
def data_type(self):
return self._data_type
......@@ -335,6 +341,14 @@ class ConverterOption(object):
def add_output_node(self, output_node):
self._output_nodes[output_node.name] = output_node
@check_nodes.setter
def check_nodes(self, check_nodes):
for node in check_nodes:
self.check_nodes[node.name] = node
def add_check_node(self, check_node):
self._check_nodes[check_node.name] = check_node
@data_type.setter
def data_type(self, data_type):
self._data_type = data_type
......
# Copyright 2018 Xiaomi, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from mace.proto import mace_pb2
from mace.python.tools.converter_tool import base_converter
from mace.python.tools.converter_tool.base_converter import ConverterUtil
from mace.python.tools.converter_tool.base_converter import EltwiseType
from mace.python.tools.converter_tool.base_converter import MaceKeyword
from mace.python.tools.converter_tool.base_converter import MaceOp
from mace.python.tools.converter_tool.base_converter import PaddingMode
from mace.python.tools.converter_tool.base_converter import PoolingType
from mace.python.tools.convert_util import mace_check
from mace.python.tools import graph_util
import copy
from operator import mul
class HexagonOps(object):
def __init__(self):
self.hexagon_ops = {
'INPUT': 'INPUT',
'OUTPUT': 'OUTPUT',
'Quantize': 'Quantize',
'Dequantize': 'Dequantize',
'Concat': 'QuantizedConcat_8',
'Conv2D': 'Supernode_8x8p32to8',
'DepthwiseConv2d': 'DepthwiseSupernode_8x8p32to8',
'ResizeBilinear': 'QuantizedResizeBilinear_8',
'SpaceToBatchND': 'SpaceToBatchND_8',
'BatchToSpaceND': 'BatchToSpaceND_8',
'Softmax': 'QuantizedSoftmax_8',
'Eltwise': 'Eltwise',
'Pooling': 'Pooling',
'Identity': 'Nop',
'Squeeze': 'Nop',
}
def has_op(self, tf_op):
return tf_op in self.hexagon_ops
def map_nn_op(self, tf_op):
if tf_op not in self.hexagon_ops:
raise Exception('Could not map nn op for: ', tf_op)
return self.hexagon_ops[tf_op]
padding_mode = {
PaddingMode.NA: 0,
PaddingMode.SAME: 1,
PaddingMode.VALID: 2
}
def get_tensor_name_from_op(op_name, port):
return op_name + ':' + str(port)
def get_op_and_port_from_tensor(tensor_name):
if ':' in tensor_name:
op, port = tensor_name.split(':')
port = int(port)
else:
op = tensor_name
port = 0
return op, port
class HexagonConverter(base_converter.ConverterInterface):
def __init__(self, option, model, quantize_activation_info):
self._option = option
self._model = model
self._hexagon_ops = HexagonOps()
self._consts = {}
self._quantize_activation_info = quantize_activation_info
def run(self):
mace_check(len(self._option.input_nodes) == 1
and len(self._option.output_nodes) == 1,
'dsp only support single input and output')
for tensor in self._model.tensors:
self._consts[tensor.name] = tensor
# convert op node
self.convert_ops()
self.add_input_output_node()
self._model = graph_util.sort_mace_graph(self._model, '__output__')
self.add_node_id()
return self._model
def convert_ops(self):
print("Convert mace graph to hexagon.")
for op in self._model.op:
if not self._hexagon_ops.has_op(op.type):
raise Exception('Unsupported op: ', op)
print('Op: ', op.name, op.type)
for i in range(len(op.input)):
if ':' not in op.input[i]:
node_name = op.input[i]
op.input[i] += ':0'
if node_name in self._quantize_activation_info:
self._quantize_activation_info[op.input[i]] = \
self._quantize_activation_info[node_name]
if op.type == MaceOp.Conv2D.name \
or op.type == MaceOp.DepthwiseConv2d.name:
mace_check(len(op.input) == 3,
"Missing bias of Conv or Depthwise Conv.")
bias = op.input.pop()
self.add_min_max_const_node(op, op.input[0])
self.add_min_max_const_node(op, op.input[1])
strides_arg = ConverterUtil.get_arg(op, 'strides')
mace_check(strides_arg is not None,
"Missing strides of Conv or Depthwise Conv.")
strides = self.add_shape_const_node(
op, [1, strides_arg.ints[0], strides_arg.ints[1], 1],
MaceKeyword.mace_strides_str)
op.input.extend([strides, bias])
self.add_min_max_const_node(op, bias)
self.add_min_max_const_node(
op, op.output[0], True, True, False)
elif op.type == MaceOp.Eltwise.name:
self.add_min_max_const_node(op, op.input[0])
self.add_min_max_const_node(op, op.input[1])
self.add_min_max_const_node(
op, op.output[0], True, True, False)
elif op.type == MaceOp.BatchToSpaceND.name \
or op.type == MaceOp.SpaceToBatchND.name:
strides_arg = ConverterUtil.get_arg(
op, MaceKeyword.mace_space_batch_block_shape_str)
strides_tensor = self._model.tensors.add()
strides_tensor.name = op.name + '/strides:0'
strides_tensor.data_type = mace_pb2.DT_INT32
strides_tensor.dims.extend([1, 1, 1, len(strides_arg.ints)])
strides_tensor.int32_data.extend(strides_arg.ints)
if op.type == MaceOp.BatchToSpaceND.name:
pad_arg = ConverterUtil.get_arg(
op, MaceKeyword.mace_batch_to_space_crops_str)
else:
pad_arg = ConverterUtil.get_arg(
op, MaceKeyword.mace_paddings_str)
pad_tensor = self._model.tensors.add()
pad_tensor.name = op.name + '/pad:0'
pad_tensor.data_type = mace_pb2.DT_INT32
pad_tensor.dims.extend([1, 1, len(pad_arg.ints) / 2, 2])
pad_tensor.int32_data.extend(pad_arg.ints)
op.input.extend([strides_tensor.name, pad_tensor.name])
self.add_min_max_const_node(op, op.input[0])
elif op.type == MaceOp.Pooling.name:
self.add_min_max_const_node(op, op.input[0])
window_arg = ConverterUtil.get_arg(
op, MaceKeyword.mace_kernel_str)
window_tensor = self._model.tensors.add()
window_tensor.name = op.name + '/window:0'
window_tensor.data_type = mace_pb2.DT_INT32
window_tensor.dims.extend(
[1, window_arg.ints[0], window_arg.ints[1], 1])
strides_arg = ConverterUtil.get_arg(
op, MaceKeyword.mace_strides_str)
strides_tensor = self._model.tensors.add()
strides_tensor.name = op.name + '/strides:0'
strides_tensor.data_type = mace_pb2.DT_INT32
strides_tensor.dims.extend(
[1, strides_arg.ints[0], strides_arg.ints[1], 1])
op.input.extend([window_tensor.name, strides_tensor.name])
elif op.type == MaceOp.ResizeBilinear.name:
newdim_arg = ConverterUtil.get_arg(
op, MaceKeyword.mace_resize_size_str)
newdim_tensor = self._model.tensors.add()
newdim_tensor.name = op.name + '/newdim:0'
newdim_tensor.data_type = mace_pb2.DT_INT32
newdim_tensor.dims.extend([len(newdim_arg.ints)])
newdim_tensor.int32_data.extend(newdim_arg.ints)
op.input.extend([newdim_tensor.name])
self.add_min_max_const_node(op, op.input[0])
elif op.type == MaceOp.Concat.name:
inputs = copy.deepcopy(op.input)
for ipt in inputs:
self.add_min_max_const_node(op, ipt, True, False)
for ipt in inputs:
self.add_min_max_const_node(op, ipt, False, True)
dim_arg = ConverterUtil.get_arg(
op, MaceKeyword.mace_axis_str)
dim_tensor = self._model.tensors.add()
dim_tensor.name = op.name + '/dim:0'
dim_tensor.data_type = mace_pb2.DT_INT32
dim_tensor.dims.extend([1])
dim_tensor.int32_data.extend([dim_arg.i])
op.input.insert(0, dim_tensor.name)
elif op.type in [MaceOp.Softmax.name,
MaceOp.Dequantize.name]:
self.add_min_max_const_node(op, op.input[0])
if op.type != MaceOp.Dequantize.name:
min_output_shape = op.output_shape.add()
min_output_shape.dims.extend([1])
max_output_shape = op.output_shape.add()
max_output_shape.dims.extend([1])
op.output_type.extend(
[mace_pb2.DT_UINT8, mace_pb2.DT_FLOAT, mace_pb2.DT_FLOAT])
for i in range(len(op.output_shape)):
out_max_byte_size = reduce(mul, op.output_shape[i].dims)
if op.output_type[i] == mace_pb2.DT_FLOAT:
out_max_byte_size *= 4
op.out_max_byte_size.extend([out_max_byte_size])
op.padding = padding_mode[PaddingMode.NA]
arg = ConverterUtil.get_arg(op, MaceKeyword.mace_padding_str)
if arg is not None:
op.padding = padding_mode[PaddingMode(arg.i)]
if (op.type == MaceOp.Eltwise.name
and ConverterUtil.get_arg(
op, MaceKeyword.mace_element_type_str).i
== EltwiseType.SUM.value):
op.type = 'QuantizedAdd_8p8to8'
elif op.type == MaceOp.Pooling.name:
pooling_type_arg = ConverterUtil.get_arg(
op, MaceKeyword.mace_pooling_type_str)
if PoolingType(pooling_type_arg.i) == PoolingType.AVG:
op.type = 'QuantizedAvgPool_8'
else:
op.type = 'QuantizedMaxPool_8'
else:
op.type = self._hexagon_ops.map_nn_op(op.type)
def add_min_max(self, name, val):
if name not in self._consts:
tensor = self._model.tensors.add()
self._consts[name] = tensor
tensor.name = name
tensor.data_type = mace_pb2.DT_FLOAT
tensor.dims.extend([1])
tensor.float_data.extend([val])
def add_min_max_const_node(
self, this_op, tensor_name, add_min=True, add_max=True,
diff_port=True):
op, port = get_op_and_port_from_tensor(tensor_name)
mace_check(port == 0, 'port should be 0 to add min max tensor then.')
if tensor_name in self._quantize_activation_info:
quantize_info = self._quantize_activation_info[tensor_name]
minval = quantize_info.minval
maxval = quantize_info.maxval
is_activation = True
elif tensor_name in self._consts:
tensor = self._consts[tensor_name]
minval = tensor.minval
maxval = tensor.maxval
is_activation = False
else:
raise Exception('Quantize info not found: ', tensor_name)
if add_min:
if is_activation and diff_port:
min_tensor_name = op + ':1'
else:
min_tensor_name = op + '_min:0'
self.add_min_max(min_tensor_name, minval)
this_op.input.extend([min_tensor_name])
if add_max:
if is_activation and diff_port:
max_tensor_name = op + ':2'
else:
max_tensor_name = op + '_max:0'
self.add_min_max(max_tensor_name, maxval)
this_op.input.extend([max_tensor_name])
def add_shape_const_node(self, op, values, name):
tensor = self._model.tensors.add()
node_name = op.name + '/' + name
tensor.name = node_name + ':0'
tensor.data_type = mace_pb2.DT_INT32
tensor.dims.extend(values)
return tensor.name
def add_input_output_node(self):
input_node = self._option.input_nodes.values()[0]
op_def = self._model.op.add()
op_def.name = '__input__'
op_def.type = 'INPUT'
shape = op_def.output_shape.add()
shape.dims.extend(input_node.shape)
op_def.output_type.extend([mace_pb2.DT_FLOAT])
out_max_byte_size = reduce(mul, shape.dims)
op_def.out_max_byte_size.extend([out_max_byte_size])
for op in self._model.op:
if op.name == input_node.name:
del op.input[0]
input_name = op_def.name + ':0'
op.input.extend([input_name])
self._consts[input_name] = \
self._quantize_activation_info[input_node.name]
self.add_min_max_const_node(op, input_name)
del self._consts[input_name]
break
output_node = None
if not self._option.check_nodes:
output_name = self._option.output_nodes.values()[0].name
else:
output_name = self._option.check_nodes.values()[0].name
for op in self._model.op:
if op.name.startswith(MaceKeyword.mace_output_node_name) \
and op.name.find(output_name) != -1:
output_node = op
break
mace_check(output_node is not None,
"mace_output_node_* not found.")
op_def = self._model.op.add()
op_def.name = '__output__'
op_def.type = 'OUTPUT'
op_def.input.extend([get_tensor_name_from_op(output_node.name, 0)])
def add_node_id(self):
node_id_counter = 0
node_id_map = {}
for tensor in self._model.tensors:
tensor.node_id = node_id_counter
node_id_counter += 1
tensor_op, port = get_op_and_port_from_tensor(tensor.name)
node_id_map[tensor_op] = tensor.node_id
for op in self._model.op:
op.node_id = node_id_counter
node_id_counter += 1
node_id_map[op.name] = op.node_id
for ipt in op.input:
if ipt.startswith(MaceKeyword.mace_input_node_name):
ipt = ipt[len(MaceKeyword.mace_input_node_name + '_'):]
op_name, port = get_op_and_port_from_tensor(ipt)
node_id = node_id_map[op_name]
node_input = op.node_input.add()
node_input.node_id = node_id
node_input.output_port = int(port)
......@@ -134,7 +134,9 @@ class Transformer(base_converter.ConverterInterface):
changed = transformer()
if not changed:
break
return self._model
self.add_check_nodes()
return self._model, self._quantize_activation_info
def filter_format(self):
filter_format_value = ConverterUtil.get_arg(self._model,
......@@ -284,12 +286,20 @@ class Transformer(base_converter.ConverterInterface):
input_info = net.input_info.add()
input_info.name = input_node.name
input_info.dims.extend(input_node.shape)
if self._option.quantize:
input_info.data_type = mace_pb2.DT_FLOAT
else:
input_info.data_type = self._option.data_type
for output_node in self._option.output_nodes.values():
output_info = net.output_info.add()
output_info.name = output_node.name
output_info.dims.extend(
self._producer[output_node.name].output_shape[0].dims)
if self._option.quantize:
output_info.data_type = mace_pb2.DT_FLOAT
else:
output_info.data_type = self._option.data_type
return False
......@@ -904,6 +914,8 @@ class Transformer(base_converter.ConverterInterface):
consumer_op = self._consumers[op.output[0]][0]
if consumer_op.type == MaceOp.BiasAdd.name:
print("Fold biasadd: %s(%s)" % (op.name, op.type))
op.name = consumer_op.name
op.output[0] = consumer_op.output[0]
op.input.append(consumer_op.input[1])
self.replace_quantize_info(op, consumer_op)
self.safe_remove_node(consumer_op, op)
......@@ -1306,6 +1318,11 @@ class Transformer(base_converter.ConverterInterface):
transposed_deconv_filter.add(op.input[1])
self.set_filter_format(FilterFormat.OHWI)
elif self._option.quantize and \
self._option.device == DeviceType.HEXAGON.value:
print("Transpose filters to HWIO/HWIM")
mace_check(filter_format == FilterFormat.HWIO,
"HEXAGON only support HWIO/HWIM filter format.")
else:
print("Transpose filters to OIHW/MIHW")
# transpose filter to OIHW/MIHW for tensorflow (HWIO/HWIM)
......@@ -1795,16 +1812,23 @@ class Transformer(base_converter.ConverterInterface):
check_deconv = len(ops[0].input) >= 4\
and ops[0].input[3] == tensor.name
if check_conv or check_deconv:
conv_op = ops[0]
scale_input = self._quantize_activation_info[
conv_op.input[0]].scale
if conv_op.input[1] not in self._quantized_tensor:
self.quantize_tensor(self._consts[conv_op.input[1]])
scale_filter = self._consts[conv_op.input[1]].scale
scale = scale_input * scale_filter
quantized_tensor = quantize_util.quantize_with_scale_and_zero(
tensor.float_data, scale, 0)
if self._option.device == DeviceType.CPU.value:
conv_op = ops[0]
scale_input = self._quantize_activation_info[
conv_op.input[0]].scale
if conv_op.input[1] not in self._quantized_tensor:
self.quantize_tensor(self._consts[conv_op.input[1]])
scale_filter = self._consts[conv_op.input[1]].scale
scale = scale_input * scale_filter
quantized_tensor = \
quantize_util.quantize_with_scale_and_zero(
tensor.float_data, scale, 0)
elif self._option.device == DeviceType.HEXAGON.value:
quantized_tensor = \
quantize_util.quantize_bias_for_hexagon(
tensor.float_data)
else:
mace_check(False, "wrong device.")
tensor.data_type = mace_pb2.DT_INT32
else:
quantized_tensor = quantize_util.quantize(tensor.float_data)
......@@ -1814,6 +1838,8 @@ class Transformer(base_converter.ConverterInterface):
tensor.int32_data.extend(quantized_tensor.data)
tensor.scale = quantized_tensor.scale
tensor.zero_point = quantized_tensor.zero
tensor.minval = quantized_tensor.minval
tensor.maxval = quantized_tensor.maxval
tensor.quantized = True
self._quantized_tensor.update([tensor.name])
......@@ -1828,8 +1854,8 @@ class Transformer(base_converter.ConverterInterface):
return False
def add_quantize_info(self, op, minval, maxval):
scale, zero = quantize_util.adjust_range(minval, maxval,
non_zero=False)
scale, zero, minval, maxval = \
quantize_util.adjust_range(minval, maxval, non_zero=False)
quantize_info = op.quantize_info.add()
quantize_info.minval = minval
quantize_info.maxval = maxval
......@@ -1928,8 +1954,9 @@ class Transformer(base_converter.ConverterInterface):
tensor_name, minmax = line.split("@@")[:2]
min_val, max_val = [float(i) for i in
minmax.strip().split(",")]
scale, zero = quantize_util.adjust_range(min_val, max_val,
non_zero=False)
scale, zero, min_val, max_val = \
quantize_util.adjust_range(
min_val, max_val, non_zero=False)
activation_info = mace_pb2.QuantizeActivationInfo()
activation_info.minval = min_val
activation_info.maxval = max_val
......@@ -1954,6 +1981,7 @@ class Transformer(base_converter.ConverterInterface):
for op in self._model.op:
if op.type in [MaceOp.Pooling.name,
MaceOp.Squeeze.name,
MaceOp.Reshape.name,
MaceOp.ResizeBilinear.name,
MaceOp.BatchToSpaceND.name,
MaceOp.SpaceToBatchND.name]:
......@@ -2012,12 +2040,13 @@ class Transformer(base_converter.ConverterInterface):
if input_node.name not in self._quantize_activation_info:
print("Input range %s: %s" % (input_node.name,
str(input_node.range)))
scale, zero = quantize_util.adjust_range(input_node.range[0],
input_node.range[1],
non_zero=False)
scale, zero, minval, maxval = \
quantize_util.adjust_range(input_node.range[0],
input_node.range[1],
non_zero=False)
quantize_info = mace_pb2.QuantizeActivationInfo()
quantize_info.minval = input_node.range[0]
quantize_info.maxval = input_node.range[1]
quantize_info.minval = minval
quantize_info.maxval = maxval
quantize_info.scale = scale
quantize_info.zero_point = zero
self._quantize_activation_info[input_node.name] = quantize_info
......@@ -2049,3 +2078,32 @@ class Transformer(base_converter.ConverterInterface):
arg.name = MaceKeyword.mace_opencl_mem_type
arg.i = mace_pb2.GPU_IMAGE if self._option.cl_mem_type == "image"\
else mace_pb2.GPU_BUFFER
def add_check_nodes(self):
if self._option.check_nodes:
mace_check(len(self._option.check_nodes) == 1,
"Only support one check node now.")
check_node = None
for i in six.moves.range(len(self._model.op)):
if self._model.op[i].name in self._option.check_nodes:
check_node = self._model.op[i]
del self._model.op[i+1:]
break
mace_check(check_node is not None, "check node not found.")
output_name = \
MaceKeyword.mace_output_node_name + '_' + check_node.name
op_def = self._model.op.add()
op_def.name = self.normalize_op_name(output_name)
op_def.type = MaceOp.Dequantize.name
op_def.input.extend([check_node.output[0]])
op_def.output.extend([output_name])
output_shape = op_def.output_shape.add()
output_shape.dims.extend(check_node.output_shape[0].dims)
ConverterUtil.add_data_type_arg(op_def, mace_pb2.DT_UINT8)
op_def.output_type.extend([mace_pb2.DT_FLOAT])
del self._model.output_info[:]
output_info = self._model.output_info.add()
output_info.name = check_node.name
output_info.dims.extend(check_node.output_shape[0].dims)
output_info.data_type = mace_pb2.DT_FLOAT
......@@ -63,7 +63,6 @@ def sort_mace_graph(graph_def, output_name):
for node in graph_def.op:
nodes_map[node.name] = node
sort_mace_node(nodes_map[output_name], nodes_map, ordered_nodes_map)
sorted_graph = mace_pb2.NetDef()
sorted_graph.tensors.extend(graph_def.tensors)
sorted_graph.op.extend([node for node in ordered_nodes_map.values()])
return sorted_graph
del graph_def.op[:]
graph_def.op.extend([node for node in ordered_nodes_map.values()])
return graph_def
......@@ -20,6 +20,7 @@ import hashlib
from enum import Enum
from mace.proto import mace_pb2
from mace.python.tools.convert_util import mace_check
from jinja2 import Environment, FileSystemLoader
GENERATED_NAME = set()
......@@ -107,11 +108,9 @@ class TensorInfo:
self.id = id
self.data_type = tensor.data_type
if tensor.data_type == mace_pb2.DT_HALF:
self.data_type = mace_pb2.DT_HALF
self.data = bytearray(
np.array(tensor.float_data).astype(np.float16).tobytes())
elif tensor.data_type == mace_pb2.DT_FLOAT:
self.data_type = mace_pb2.DT_FLOAT
self.data = bytearray(
np.array(tensor.float_data).astype(np.float32).tobytes())
elif tensor.data_type == mace_pb2.DT_INT32:
......@@ -139,7 +138,7 @@ def update_tensor_infos(net_def, runtime, data_type):
tensor_info = TensorInfo(counter, tensor)
tensor_infos.append(tensor_info)
# align
if tensor_info.data_type != 'DT_UINT8' and offset % 4 != 0:
if tensor_info.data_type != mace_pb2.DT_UINT8 and offset % 4 != 0:
padding = 4 - offset % 4
offset += padding
......@@ -162,10 +161,11 @@ def extract_model_data(net_def):
for tensor in net_def.tensors:
tensor_info = TensorInfo(counter, tensor)
# align
if tensor_info.data_type != mace_pb2.DT_UINT8 and offset % 4 != 0:
padding = 4 - offset % 4
model_data.extend(bytearray([0] * padding))
offset += padding
mace_check(offset <= tensor.offset,
"Current offset should be <= tensor.offset")
if offset < tensor.offset:
model_data.extend(bytearray([0] * (tensor.offset - offset)))
offset = tensor.offset
model_data.extend(tensor_info.data)
offset += len(tensor_info.data)
counter += 1
......
......@@ -7,6 +7,8 @@ class QuantizedData(object):
self._data = None
self._scale = 0
self._zero = 0
self._minval = 0.0
self._maxval = 0.0
@property
def data(self):
......@@ -20,6 +22,14 @@ class QuantizedData(object):
def zero(self):
return self._zero
@property
def minval(self):
return self._minval
@property
def maxval(self):
return self._maxval
@data.setter
def data(self, data):
self._data = data
......@@ -32,6 +42,14 @@ class QuantizedData(object):
def zero(self, zero):
self._zero = zero
@minval.setter
def minval(self, minval):
self._minval = minval
@maxval.setter
def maxval(self, maxval):
self._maxval = maxval
def adjust_range(in_min, in_max, non_zero):
out_max = max(0.0, in_max)
......@@ -54,7 +72,7 @@ def adjust_range(in_min, in_max, non_zero):
else:
zero_int = 255
return scale, zero_int
return scale, zero_int, -zero_int*scale, (255-zero_int)*scale
def cal_multiplier_and_shift(scale):
......@@ -94,13 +112,34 @@ def quantize(data):
np_data = np.array(data).astype(float)
in_min = np_data.min()
in_max = np_data.max()
scale, zero = adjust_range(in_min, in_max, non_zero=True)
scale, zero, out_min, out_max = adjust_range(in_min, in_max, non_zero=True)
output = np.clip((np.round(zero + data / scale).astype(int)), 0, 255)
quantized_data = QuantizedData()
quantized_data.data = output
quantized_data.scale = scale
quantized_data.zero = zero
quantized_data.minval = out_min
quantized_data.maxval = out_max
return quantized_data
def quantize_bias_for_hexagon(data):
np_data = np.array(data).astype(float)
max_val = max(abs(np_data.min()), abs(np_data.max()))
in_min = -max_val
in_max = max_val
scale = (in_max - in_min) / 2**32
zero = 0
output = np.clip((np.round(zero + data / scale).astype(long)),
-2**31, 2**31 - 1)
quantized_data = QuantizedData()
quantized_data.data = output
quantized_data.scale = scale
quantized_data.zero = zero
quantized_data.minval = in_min
quantized_data.maxval = in_max
return quantized_data
......
......@@ -190,6 +190,8 @@ class YAMLKeyword(object):
input_ranges = 'input_ranges'
output_tensors = 'output_tensors'
output_shapes = 'output_shapes'
check_tensors = 'check_tensors'
check_shapes = 'check_shapes'
runtime = 'runtime'
data_type = 'data_type'
input_data_types = 'input_data_types'
......@@ -460,6 +462,16 @@ def format_model_config(flags):
subgraph[key] = [value]
subgraph[key] = [str(v) for v in subgraph[key]]
for key in [YAMLKeyword.check_tensors,
YAMLKeyword.check_shapes]:
value = subgraph.get(key, "")
if value != "":
if not isinstance(value, list):
subgraph[key] = [value]
subgraph[key] = [str(v) for v in subgraph[key]]
else:
subgraph[key] = []
input_data_types = subgraph.get(YAMLKeyword.input_data_types, "")
if input_data_types:
if not isinstance(input_data_types, list):
......@@ -787,10 +799,13 @@ def convert_model(configs, cl_mem_type):
model_config[YAMLKeyword.weight_sha256_checksum],
",".join(subgraphs[0][YAMLKeyword.input_tensors]),
",".join(subgraphs[0][YAMLKeyword.output_tensors]),
",".join(subgraphs[0][YAMLKeyword.check_tensors]),
runtime,
model_name,
":".join(subgraphs[0][YAMLKeyword.input_shapes]),
":".join(subgraphs[0][YAMLKeyword.input_ranges]),
":".join(subgraphs[0][YAMLKeyword.output_shapes]),
":".join(subgraphs[0][YAMLKeyword.check_shapes]),
model_config[YAMLKeyword.nnlib_graph_mode],
embed_model_data,
model_config[YAMLKeyword.winograd],
......@@ -1216,6 +1231,12 @@ def run_specific_target(flags, configs, target_abi,
for runtime in runtime_list:
device_type = parse_device_type(runtime)
# run for specified soc
if not subgraphs[0][YAMLKeyword.check_tensors]:
output_nodes = subgraphs[0][YAMLKeyword.output_tensors]
output_shapes = subgraphs[0][YAMLKeyword.output_shapes]
else:
output_nodes = subgraphs[0][YAMLKeyword.check_tensors]
output_shapes = subgraphs[0][YAMLKeyword.check_shapes]
run_output = sh_commands.tuning_run(
abi=target_abi,
serialno=serial_num,
......@@ -1225,9 +1246,9 @@ def run_specific_target(flags, configs, target_abi,
embed_model_data=embed_model_data,
model_output_dir=model_output_dir,
input_nodes=subgraphs[0][YAMLKeyword.input_tensors],
output_nodes=subgraphs[0][YAMLKeyword.output_tensors],
output_nodes=output_nodes,
input_shapes=subgraphs[0][YAMLKeyword.input_shapes],
output_shapes=subgraphs[0][YAMLKeyword.output_shapes],
output_shapes=output_shapes,
mace_model_dir=mace_model_dir,
model_tag=model_name,
device_type=device_type,
......@@ -1261,7 +1282,8 @@ def run_specific_target(flags, configs, target_abi,
model_config[YAMLKeyword.weight_sha256_checksum])
validate_type = device_type
if model_config[YAMLKeyword.quantize] == 1:
if model_config[YAMLKeyword.quantize] == 1 \
and device_type == DeviceType.CPU:
validate_type = device_type + "_QUANTIZE"
sh_commands.validate_model(
......@@ -1272,9 +1294,9 @@ def run_specific_target(flags, configs, target_abi,
platform=model_config[YAMLKeyword.platform],
device_type=device_type,
input_nodes=subgraphs[0][YAMLKeyword.input_tensors],
output_nodes=subgraphs[0][YAMLKeyword.output_tensors],
output_nodes=output_nodes,
input_shapes=subgraphs[0][YAMLKeyword.input_shapes],
output_shapes=subgraphs[0][YAMLKeyword.output_shapes],
output_shapes=output_shapes,
model_output_dir=model_output_dir,
phone_data_dir=PHONE_DATA_DIR,
input_data_types=subgraphs[0][YAMLKeyword.input_data_types], # noqa
......
......@@ -551,10 +551,13 @@ def gen_model_code(model_codegen_dir,
weight_sha256_checksum,
input_nodes,
output_nodes,
check_nodes,
runtime,
model_tag,
input_shapes,
input_ranges,
output_shapes,
check_shapes,
dsp_mode,
embed_model_data,
winograd,
......@@ -581,11 +584,14 @@ def gen_model_code(model_codegen_dir,
"--weight_checksum=%s" % weight_sha256_checksum,
"--input_node=%s" % input_nodes,
"--output_node=%s" % output_nodes,
"--check_node=%s" % check_nodes,
"--runtime=%s" % runtime,
"--template=%s" % "mace/python/tools",
"--model_tag=%s" % model_tag,
"--input_shape=%s" % input_shapes,
"--input_range=%s" % input_ranges,
"--output_shape=%s" % output_shapes,
"--check_shape=%s" % check_shapes,
"--dsp_mode=%s" % dsp_mode,
"--embed_model_data=%s" % embed_model_data,
"--winograd=%s" % winograd,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册