diff --git a/mace/core/runtime/hexagon/hexagon_control_wrapper.cc b/mace/core/runtime/hexagon/hexagon_control_wrapper.cc index 986fe7614bcf1d40ce7feccc4700de68898dc5f4..470d946105a32711249007c03a9de48ac2f52dd6 100644 --- a/mace/core/runtime/hexagon/hexagon_control_wrapper.cc +++ b/mace/core/runtime/hexagon/hexagon_control_wrapper.cc @@ -324,7 +324,9 @@ void HexagonControlWrapper::GetPerfInfo() { } ++node_type_counters[node_type].first; node_type_counters[node_type].second += node_id_counters[node_id]; - total_duration += node_id_counters[node_id]; + if (node_type.compare("Const") != 0) { + total_duration += node_id_counters[node_id]; + } } for (auto &node_type_counter : node_type_counters) { @@ -332,7 +334,7 @@ void HexagonControlWrapper::GetPerfInfo() { << ", time: " << node_type_counter.second.first << ", duration: " << node_type_counter.second.second; } - LOG(INFO) << "total duration: " << total_duration; + LOG(INFO) << "total duration: " << std::fixed << total_duration; } void HexagonControlWrapper::ResetPerfInfo() { diff --git a/mace/proto/mace.proto b/mace/proto/mace.proto index 11f2a88dac9247d913fbcd574b61ebb3ac46305d..1609cd4d62cb780b61a46100cb9cbd9122722ddf 100644 --- a/mace/proto/mace.proto +++ b/mace/proto/mace.proto @@ -36,7 +36,9 @@ message ConstTensor { optional int64 data_size = 7; optional float scale = 8; optional int32 zero_point = 9; - optional bool quantized = 10 [default = false]; + optional float minval = 10; + optional float maxval = 11; + optional bool quantized = 12 [default = false]; optional uint32 node_id = 100; } diff --git a/mace/python/tools/BUILD b/mace/python/tools/BUILD index 8b6ff7991d576e01020b7dc76c2fc84f7f57c1df..693ed9dea2fecce8df3cdc246fa6d4ff87b47024 100644 --- a/mace/python/tools/BUILD +++ b/mace/python/tools/BUILD @@ -10,18 +10,19 @@ py_library( name = "converter_lib", srcs = [ "convert_util.py", - "graph_util.py", - "converter_tool/tf_dsp_converter.py", "converter_tool/base_converter.py", + "converter_tool/caffe_converter.py", + "converter_tool/hexagon_converter.py", "converter_tool/shape_inference.py", "converter_tool/tensorflow_converter.py", - "converter_tool/caffe_converter.py", + "converter_tool/tf_dsp_converter.py", "converter_tool/transformer.py", + "graph_util.py", ], srcs_version = "PY2AND3", deps = [ - ":quantization_lib", ":memory_optimizer", + ":quantization_lib", "//mace/proto:mace_py", "//third_party/caffe:caffe_py", ], diff --git a/mace/python/tools/converter.py b/mace/python/tools/converter.py index 4fee7dfab6f6f9833a2252a9a02762d0d38d0428..a89e3abdb1e4a75fdf3ee5489439cb7d89cbfcfd 100644 --- a/mace/python/tools/converter.py +++ b/mace/python/tools/converter.py @@ -132,15 +132,30 @@ def main(unused_args): option.add_input_node(input_node) output_node_names = FLAGS.output_node.split(',') + output_node_shapes = FLAGS.output_shape.split(':') + if len(output_node_names) != len(output_node_shapes): + raise Exception('output node count and shape count do not match.') for i in six.moves.range(len(output_node_names)): output_node = cvt.NodeInfo() output_node.name = output_node_names[i] + output_node.shape = parse_int_array_from_str(output_node_shapes[i]) option.add_output_node(output_node) + if FLAGS.check_node != '': + check_node_names = FLAGS.check_node.split(',') + check_node_shapes = FLAGS.check_shape.split(':') + if len(check_node_names) != len(check_node_shapes): + raise Exception('check node count and shape count do not match.') + for i in six.moves.range(len(check_node_names)): + check_node = cvt.NodeInfo() + check_node.name = check_node_names[i] + check_node.shape = parse_int_array_from_str(check_node_shapes[i]) + option.add_check_node(check_node) + option.build() print("Transform model to one that can better run on device") - if FLAGS.runtime == 'dsp': + if FLAGS.runtime == 'dsp' and not option.quantize: mace_check(FLAGS.platform == 'tensorflow', 'DSP only supports tensorflow') from mace.python.tools.converter_tool import tf_dsp_converter @@ -172,7 +187,7 @@ def main(unused_args): FLAGS.data_type, cvt.DeviceType.GPU.value) mace_gpu_transformer = transformer.Transformer( option, output_graph_def) - output_graph_def = mace_gpu_transformer.run() + output_graph_def, _ = mace_gpu_transformer.run() six.print_("start optimize gpu memory.") memory_optimizer.optimize_gpu_memory(output_graph_def) six.print_("GPU memory optimization done.") @@ -183,7 +198,7 @@ def main(unused_args): option.disable_transpose_filters() mace_cpu_transformer = transformer.Transformer( option, cpu_graph_def) - cpu_graph_def = mace_cpu_transformer.run() + cpu_graph_def, _ = mace_cpu_transformer.run() print("start optimize cpu memory.") memory_optimizer.optimize_cpu_memory(cpu_graph_def) print("CPU memory optimization done.") @@ -206,13 +221,21 @@ def main(unused_args): FLAGS.data_type, option.device) mace_transformer = transformer.Transformer( option, output_graph_def) - output_graph_def = mace_transformer.run() + output_graph_def, quantize_activation_info = mace_transformer.run() + + if FLAGS.runtime == 'dsp': + from mace.python.tools.converter_tool import hexagon_converter + converter = hexagon_converter.HexagonConverter( + option, output_graph_def, quantize_activation_info) + output_graph_def = converter.run() print("start optimize memory.") if FLAGS.runtime == 'gpu': memory_optimizer.optimize_gpu_memory(output_graph_def) elif FLAGS.runtime == 'cpu': memory_optimizer.optimize_cpu_memory(output_graph_def) + elif FLAGS.runtime == 'dsp': + pass else: mace_check(False, "runtime only support [gpu|cpu|dsp]") @@ -272,6 +295,8 @@ def parse_args(): help="e.g., input_node") parser.add_argument( "--output_node", type=str, default="softmax", help="e.g., softmax") + parser.add_argument( + "--check_node", type=str, default="softmax", help="e.g., softmax") parser.add_argument( "--template_dir", type=str, default="", help="template path") parser.add_argument( @@ -297,6 +322,10 @@ def parse_args(): "--input_shape", type=str, default="", help="input shape.") parser.add_argument( "--input_range", type=str, default="", help="input range.") + parser.add_argument( + "--output_shape", type=str, default="", help="output shape.") + parser.add_argument( + "--check_shape", type=str, default="", help="check shape.") parser.add_argument( "--platform", type=str, default="tensorflow", help="tensorflow/caffe") parser.add_argument( diff --git a/mace/python/tools/converter_tool/base_converter.py b/mace/python/tools/converter_tool/base_converter.py index 5e6c6f8ee0980b3cdbd0159dd236a4d41647af97..a0deec6334ad261857bd1af879ba47edd0d8a8c2 100644 --- a/mace/python/tools/converter_tool/base_converter.py +++ b/mace/python/tools/converter_tool/base_converter.py @@ -40,6 +40,7 @@ class PaddingMode(Enum): VALID = 0 SAME = 1 FULL = 2 + NA = 3 class PoolingType(Enum): @@ -270,6 +271,7 @@ class ConverterOption(object): def __init__(self): self._input_nodes = {} self._output_nodes = {} + self._check_nodes = {} self._data_type = mace_pb2.DT_FLOAT self._device = DeviceType.CPU.value self._winograd = 0 @@ -287,6 +289,10 @@ class ConverterOption(object): def output_nodes(self): return self._output_nodes + @property + def check_nodes(self): + return self._check_nodes + @property def data_type(self): return self._data_type @@ -335,6 +341,14 @@ class ConverterOption(object): def add_output_node(self, output_node): self._output_nodes[output_node.name] = output_node + @check_nodes.setter + def check_nodes(self, check_nodes): + for node in check_nodes: + self.check_nodes[node.name] = node + + def add_check_node(self, check_node): + self._check_nodes[check_node.name] = check_node + @data_type.setter def data_type(self, data_type): self._data_type = data_type diff --git a/mace/python/tools/converter_tool/hexagon_converter.py b/mace/python/tools/converter_tool/hexagon_converter.py new file mode 100644 index 0000000000000000000000000000000000000000..478c313c6f5b46d458ce1208d7744643373a0b1f --- /dev/null +++ b/mace/python/tools/converter_tool/hexagon_converter.py @@ -0,0 +1,352 @@ +# Copyright 2018 Xiaomi, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from mace.proto import mace_pb2 +from mace.python.tools.converter_tool import base_converter +from mace.python.tools.converter_tool.base_converter import ConverterUtil +from mace.python.tools.converter_tool.base_converter import EltwiseType +from mace.python.tools.converter_tool.base_converter import MaceKeyword +from mace.python.tools.converter_tool.base_converter import MaceOp +from mace.python.tools.converter_tool.base_converter import PaddingMode +from mace.python.tools.converter_tool.base_converter import PoolingType +from mace.python.tools.convert_util import mace_check +from mace.python.tools import graph_util + +import copy +from operator import mul + + +class HexagonOps(object): + def __init__(self): + self.hexagon_ops = { + 'INPUT': 'INPUT', + 'OUTPUT': 'OUTPUT', + 'Quantize': 'Quantize', + 'Dequantize': 'Dequantize', + 'Concat': 'QuantizedConcat_8', + 'Conv2D': 'Supernode_8x8p32to8', + 'DepthwiseConv2d': 'DepthwiseSupernode_8x8p32to8', + 'ResizeBilinear': 'QuantizedResizeBilinear_8', + 'SpaceToBatchND': 'SpaceToBatchND_8', + 'BatchToSpaceND': 'BatchToSpaceND_8', + 'Softmax': 'QuantizedSoftmax_8', + 'Eltwise': 'Eltwise', + 'Pooling': 'Pooling', + 'Identity': 'Nop', + 'Squeeze': 'Nop', + } + + def has_op(self, tf_op): + return tf_op in self.hexagon_ops + + def map_nn_op(self, tf_op): + if tf_op not in self.hexagon_ops: + raise Exception('Could not map nn op for: ', tf_op) + return self.hexagon_ops[tf_op] + + +padding_mode = { + PaddingMode.NA: 0, + PaddingMode.SAME: 1, + PaddingMode.VALID: 2 +} + + +def get_tensor_name_from_op(op_name, port): + return op_name + ':' + str(port) + + +def get_op_and_port_from_tensor(tensor_name): + if ':' in tensor_name: + op, port = tensor_name.split(':') + port = int(port) + else: + op = tensor_name + port = 0 + return op, port + + +class HexagonConverter(base_converter.ConverterInterface): + def __init__(self, option, model, quantize_activation_info): + self._option = option + self._model = model + self._hexagon_ops = HexagonOps() + self._consts = {} + self._quantize_activation_info = quantize_activation_info + + def run(self): + mace_check(len(self._option.input_nodes) == 1 + and len(self._option.output_nodes) == 1, + 'dsp only support single input and output') + + for tensor in self._model.tensors: + self._consts[tensor.name] = tensor + + # convert op node + self.convert_ops() + + self.add_input_output_node() + + self._model = graph_util.sort_mace_graph(self._model, '__output__') + + self.add_node_id() + + return self._model + + def convert_ops(self): + print("Convert mace graph to hexagon.") + for op in self._model.op: + if not self._hexagon_ops.has_op(op.type): + raise Exception('Unsupported op: ', op) + print('Op: ', op.name, op.type) + for i in range(len(op.input)): + if ':' not in op.input[i]: + node_name = op.input[i] + op.input[i] += ':0' + if node_name in self._quantize_activation_info: + self._quantize_activation_info[op.input[i]] = \ + self._quantize_activation_info[node_name] + + if op.type == MaceOp.Conv2D.name \ + or op.type == MaceOp.DepthwiseConv2d.name: + mace_check(len(op.input) == 3, + "Missing bias of Conv or Depthwise Conv.") + bias = op.input.pop() + self.add_min_max_const_node(op, op.input[0]) + self.add_min_max_const_node(op, op.input[1]) + strides_arg = ConverterUtil.get_arg(op, 'strides') + mace_check(strides_arg is not None, + "Missing strides of Conv or Depthwise Conv.") + strides = self.add_shape_const_node( + op, [1, strides_arg.ints[0], strides_arg.ints[1], 1], + MaceKeyword.mace_strides_str) + op.input.extend([strides, bias]) + self.add_min_max_const_node(op, bias) + self.add_min_max_const_node( + op, op.output[0], True, True, False) + elif op.type == MaceOp.Eltwise.name: + self.add_min_max_const_node(op, op.input[0]) + self.add_min_max_const_node(op, op.input[1]) + self.add_min_max_const_node( + op, op.output[0], True, True, False) + elif op.type == MaceOp.BatchToSpaceND.name \ + or op.type == MaceOp.SpaceToBatchND.name: + strides_arg = ConverterUtil.get_arg( + op, MaceKeyword.mace_space_batch_block_shape_str) + strides_tensor = self._model.tensors.add() + strides_tensor.name = op.name + '/strides:0' + strides_tensor.data_type = mace_pb2.DT_INT32 + strides_tensor.dims.extend([1, 1, 1, len(strides_arg.ints)]) + strides_tensor.int32_data.extend(strides_arg.ints) + if op.type == MaceOp.BatchToSpaceND.name: + pad_arg = ConverterUtil.get_arg( + op, MaceKeyword.mace_batch_to_space_crops_str) + else: + pad_arg = ConverterUtil.get_arg( + op, MaceKeyword.mace_paddings_str) + pad_tensor = self._model.tensors.add() + pad_tensor.name = op.name + '/pad:0' + pad_tensor.data_type = mace_pb2.DT_INT32 + pad_tensor.dims.extend([1, 1, len(pad_arg.ints) / 2, 2]) + pad_tensor.int32_data.extend(pad_arg.ints) + op.input.extend([strides_tensor.name, pad_tensor.name]) + self.add_min_max_const_node(op, op.input[0]) + elif op.type == MaceOp.Pooling.name: + self.add_min_max_const_node(op, op.input[0]) + window_arg = ConverterUtil.get_arg( + op, MaceKeyword.mace_kernel_str) + window_tensor = self._model.tensors.add() + window_tensor.name = op.name + '/window:0' + window_tensor.data_type = mace_pb2.DT_INT32 + window_tensor.dims.extend( + [1, window_arg.ints[0], window_arg.ints[1], 1]) + strides_arg = ConverterUtil.get_arg( + op, MaceKeyword.mace_strides_str) + strides_tensor = self._model.tensors.add() + strides_tensor.name = op.name + '/strides:0' + strides_tensor.data_type = mace_pb2.DT_INT32 + strides_tensor.dims.extend( + [1, strides_arg.ints[0], strides_arg.ints[1], 1]) + op.input.extend([window_tensor.name, strides_tensor.name]) + elif op.type == MaceOp.ResizeBilinear.name: + newdim_arg = ConverterUtil.get_arg( + op, MaceKeyword.mace_resize_size_str) + newdim_tensor = self._model.tensors.add() + newdim_tensor.name = op.name + '/newdim:0' + newdim_tensor.data_type = mace_pb2.DT_INT32 + newdim_tensor.dims.extend([len(newdim_arg.ints)]) + newdim_tensor.int32_data.extend(newdim_arg.ints) + op.input.extend([newdim_tensor.name]) + self.add_min_max_const_node(op, op.input[0]) + elif op.type == MaceOp.Concat.name: + inputs = copy.deepcopy(op.input) + for ipt in inputs: + self.add_min_max_const_node(op, ipt, True, False) + for ipt in inputs: + self.add_min_max_const_node(op, ipt, False, True) + dim_arg = ConverterUtil.get_arg( + op, MaceKeyword.mace_axis_str) + dim_tensor = self._model.tensors.add() + dim_tensor.name = op.name + '/dim:0' + dim_tensor.data_type = mace_pb2.DT_INT32 + dim_tensor.dims.extend([1]) + dim_tensor.int32_data.extend([dim_arg.i]) + op.input.insert(0, dim_tensor.name) + elif op.type in [MaceOp.Softmax.name, + MaceOp.Dequantize.name]: + self.add_min_max_const_node(op, op.input[0]) + + if op.type != MaceOp.Dequantize.name: + min_output_shape = op.output_shape.add() + min_output_shape.dims.extend([1]) + max_output_shape = op.output_shape.add() + max_output_shape.dims.extend([1]) + op.output_type.extend( + [mace_pb2.DT_UINT8, mace_pb2.DT_FLOAT, mace_pb2.DT_FLOAT]) + for i in range(len(op.output_shape)): + out_max_byte_size = reduce(mul, op.output_shape[i].dims) + if op.output_type[i] == mace_pb2.DT_FLOAT: + out_max_byte_size *= 4 + op.out_max_byte_size.extend([out_max_byte_size]) + + op.padding = padding_mode[PaddingMode.NA] + arg = ConverterUtil.get_arg(op, MaceKeyword.mace_padding_str) + if arg is not None: + op.padding = padding_mode[PaddingMode(arg.i)] + + if (op.type == MaceOp.Eltwise.name + and ConverterUtil.get_arg( + op, MaceKeyword.mace_element_type_str).i + == EltwiseType.SUM.value): + op.type = 'QuantizedAdd_8p8to8' + elif op.type == MaceOp.Pooling.name: + pooling_type_arg = ConverterUtil.get_arg( + op, MaceKeyword.mace_pooling_type_str) + if PoolingType(pooling_type_arg.i) == PoolingType.AVG: + op.type = 'QuantizedAvgPool_8' + else: + op.type = 'QuantizedMaxPool_8' + else: + op.type = self._hexagon_ops.map_nn_op(op.type) + + def add_min_max(self, name, val): + if name not in self._consts: + tensor = self._model.tensors.add() + self._consts[name] = tensor + tensor.name = name + tensor.data_type = mace_pb2.DT_FLOAT + tensor.dims.extend([1]) + tensor.float_data.extend([val]) + + def add_min_max_const_node( + self, this_op, tensor_name, add_min=True, add_max=True, + diff_port=True): + op, port = get_op_and_port_from_tensor(tensor_name) + mace_check(port == 0, 'port should be 0 to add min max tensor then.') + if tensor_name in self._quantize_activation_info: + quantize_info = self._quantize_activation_info[tensor_name] + minval = quantize_info.minval + maxval = quantize_info.maxval + is_activation = True + elif tensor_name in self._consts: + tensor = self._consts[tensor_name] + minval = tensor.minval + maxval = tensor.maxval + is_activation = False + else: + raise Exception('Quantize info not found: ', tensor_name) + + if add_min: + if is_activation and diff_port: + min_tensor_name = op + ':1' + else: + min_tensor_name = op + '_min:0' + self.add_min_max(min_tensor_name, minval) + this_op.input.extend([min_tensor_name]) + if add_max: + if is_activation and diff_port: + max_tensor_name = op + ':2' + else: + max_tensor_name = op + '_max:0' + self.add_min_max(max_tensor_name, maxval) + this_op.input.extend([max_tensor_name]) + + def add_shape_const_node(self, op, values, name): + tensor = self._model.tensors.add() + node_name = op.name + '/' + name + tensor.name = node_name + ':0' + tensor.data_type = mace_pb2.DT_INT32 + tensor.dims.extend(values) + return tensor.name + + def add_input_output_node(self): + input_node = self._option.input_nodes.values()[0] + op_def = self._model.op.add() + op_def.name = '__input__' + op_def.type = 'INPUT' + shape = op_def.output_shape.add() + shape.dims.extend(input_node.shape) + op_def.output_type.extend([mace_pb2.DT_FLOAT]) + out_max_byte_size = reduce(mul, shape.dims) + op_def.out_max_byte_size.extend([out_max_byte_size]) + for op in self._model.op: + if op.name == input_node.name: + del op.input[0] + input_name = op_def.name + ':0' + op.input.extend([input_name]) + self._consts[input_name] = \ + self._quantize_activation_info[input_node.name] + self.add_min_max_const_node(op, input_name) + del self._consts[input_name] + break + + output_node = None + if not self._option.check_nodes: + output_name = self._option.output_nodes.values()[0].name + else: + output_name = self._option.check_nodes.values()[0].name + for op in self._model.op: + if op.name.startswith(MaceKeyword.mace_output_node_name) \ + and op.name.find(output_name) != -1: + output_node = op + break + mace_check(output_node is not None, + "mace_output_node_* not found.") + op_def = self._model.op.add() + op_def.name = '__output__' + op_def.type = 'OUTPUT' + op_def.input.extend([get_tensor_name_from_op(output_node.name, 0)]) + + def add_node_id(self): + node_id_counter = 0 + node_id_map = {} + for tensor in self._model.tensors: + tensor.node_id = node_id_counter + node_id_counter += 1 + tensor_op, port = get_op_and_port_from_tensor(tensor.name) + node_id_map[tensor_op] = tensor.node_id + + for op in self._model.op: + op.node_id = node_id_counter + node_id_counter += 1 + node_id_map[op.name] = op.node_id + for ipt in op.input: + if ipt.startswith(MaceKeyword.mace_input_node_name): + ipt = ipt[len(MaceKeyword.mace_input_node_name + '_'):] + op_name, port = get_op_and_port_from_tensor(ipt) + node_id = node_id_map[op_name] + node_input = op.node_input.add() + node_input.node_id = node_id + node_input.output_port = int(port) diff --git a/mace/python/tools/converter_tool/transformer.py b/mace/python/tools/converter_tool/transformer.py index dc7fe58ee62ac88f68e44b5a1d94ab7408f4bbd4..08f8fcaef9cac5aae821c3ac9af7b9ddb5f4148a 100644 --- a/mace/python/tools/converter_tool/transformer.py +++ b/mace/python/tools/converter_tool/transformer.py @@ -134,7 +134,9 @@ class Transformer(base_converter.ConverterInterface): changed = transformer() if not changed: break - return self._model + + self.add_check_nodes() + return self._model, self._quantize_activation_info def filter_format(self): filter_format_value = ConverterUtil.get_arg(self._model, @@ -284,12 +286,20 @@ class Transformer(base_converter.ConverterInterface): input_info = net.input_info.add() input_info.name = input_node.name input_info.dims.extend(input_node.shape) + if self._option.quantize: + input_info.data_type = mace_pb2.DT_FLOAT + else: + input_info.data_type = self._option.data_type for output_node in self._option.output_nodes.values(): output_info = net.output_info.add() output_info.name = output_node.name output_info.dims.extend( self._producer[output_node.name].output_shape[0].dims) + if self._option.quantize: + output_info.data_type = mace_pb2.DT_FLOAT + else: + output_info.data_type = self._option.data_type return False @@ -904,6 +914,8 @@ class Transformer(base_converter.ConverterInterface): consumer_op = self._consumers[op.output[0]][0] if consumer_op.type == MaceOp.BiasAdd.name: print("Fold biasadd: %s(%s)" % (op.name, op.type)) + op.name = consumer_op.name + op.output[0] = consumer_op.output[0] op.input.append(consumer_op.input[1]) self.replace_quantize_info(op, consumer_op) self.safe_remove_node(consumer_op, op) @@ -1306,6 +1318,11 @@ class Transformer(base_converter.ConverterInterface): transposed_deconv_filter.add(op.input[1]) self.set_filter_format(FilterFormat.OHWI) + elif self._option.quantize and \ + self._option.device == DeviceType.HEXAGON.value: + print("Transpose filters to HWIO/HWIM") + mace_check(filter_format == FilterFormat.HWIO, + "HEXAGON only support HWIO/HWIM filter format.") else: print("Transpose filters to OIHW/MIHW") # transpose filter to OIHW/MIHW for tensorflow (HWIO/HWIM) @@ -1795,16 +1812,23 @@ class Transformer(base_converter.ConverterInterface): check_deconv = len(ops[0].input) >= 4\ and ops[0].input[3] == tensor.name if check_conv or check_deconv: - conv_op = ops[0] - scale_input = self._quantize_activation_info[ - conv_op.input[0]].scale - if conv_op.input[1] not in self._quantized_tensor: - self.quantize_tensor(self._consts[conv_op.input[1]]) - scale_filter = self._consts[conv_op.input[1]].scale - scale = scale_input * scale_filter - - quantized_tensor = quantize_util.quantize_with_scale_and_zero( - tensor.float_data, scale, 0) + if self._option.device == DeviceType.CPU.value: + conv_op = ops[0] + scale_input = self._quantize_activation_info[ + conv_op.input[0]].scale + if conv_op.input[1] not in self._quantized_tensor: + self.quantize_tensor(self._consts[conv_op.input[1]]) + scale_filter = self._consts[conv_op.input[1]].scale + scale = scale_input * scale_filter + quantized_tensor = \ + quantize_util.quantize_with_scale_and_zero( + tensor.float_data, scale, 0) + elif self._option.device == DeviceType.HEXAGON.value: + quantized_tensor = \ + quantize_util.quantize_bias_for_hexagon( + tensor.float_data) + else: + mace_check(False, "wrong device.") tensor.data_type = mace_pb2.DT_INT32 else: quantized_tensor = quantize_util.quantize(tensor.float_data) @@ -1814,6 +1838,8 @@ class Transformer(base_converter.ConverterInterface): tensor.int32_data.extend(quantized_tensor.data) tensor.scale = quantized_tensor.scale tensor.zero_point = quantized_tensor.zero + tensor.minval = quantized_tensor.minval + tensor.maxval = quantized_tensor.maxval tensor.quantized = True self._quantized_tensor.update([tensor.name]) @@ -1828,8 +1854,8 @@ class Transformer(base_converter.ConverterInterface): return False def add_quantize_info(self, op, minval, maxval): - scale, zero = quantize_util.adjust_range(minval, maxval, - non_zero=False) + scale, zero, minval, maxval = \ + quantize_util.adjust_range(minval, maxval, non_zero=False) quantize_info = op.quantize_info.add() quantize_info.minval = minval quantize_info.maxval = maxval @@ -1928,8 +1954,9 @@ class Transformer(base_converter.ConverterInterface): tensor_name, minmax = line.split("@@")[:2] min_val, max_val = [float(i) for i in minmax.strip().split(",")] - scale, zero = quantize_util.adjust_range(min_val, max_val, - non_zero=False) + scale, zero, min_val, max_val = \ + quantize_util.adjust_range( + min_val, max_val, non_zero=False) activation_info = mace_pb2.QuantizeActivationInfo() activation_info.minval = min_val activation_info.maxval = max_val @@ -1954,6 +1981,7 @@ class Transformer(base_converter.ConverterInterface): for op in self._model.op: if op.type in [MaceOp.Pooling.name, MaceOp.Squeeze.name, + MaceOp.Reshape.name, MaceOp.ResizeBilinear.name, MaceOp.BatchToSpaceND.name, MaceOp.SpaceToBatchND.name]: @@ -2012,12 +2040,13 @@ class Transformer(base_converter.ConverterInterface): if input_node.name not in self._quantize_activation_info: print("Input range %s: %s" % (input_node.name, str(input_node.range))) - scale, zero = quantize_util.adjust_range(input_node.range[0], - input_node.range[1], - non_zero=False) + scale, zero, minval, maxval = \ + quantize_util.adjust_range(input_node.range[0], + input_node.range[1], + non_zero=False) quantize_info = mace_pb2.QuantizeActivationInfo() - quantize_info.minval = input_node.range[0] - quantize_info.maxval = input_node.range[1] + quantize_info.minval = minval + quantize_info.maxval = maxval quantize_info.scale = scale quantize_info.zero_point = zero self._quantize_activation_info[input_node.name] = quantize_info @@ -2049,3 +2078,32 @@ class Transformer(base_converter.ConverterInterface): arg.name = MaceKeyword.mace_opencl_mem_type arg.i = mace_pb2.GPU_IMAGE if self._option.cl_mem_type == "image"\ else mace_pb2.GPU_BUFFER + + def add_check_nodes(self): + if self._option.check_nodes: + mace_check(len(self._option.check_nodes) == 1, + "Only support one check node now.") + check_node = None + for i in six.moves.range(len(self._model.op)): + if self._model.op[i].name in self._option.check_nodes: + check_node = self._model.op[i] + del self._model.op[i+1:] + break + mace_check(check_node is not None, "check node not found.") + output_name = \ + MaceKeyword.mace_output_node_name + '_' + check_node.name + op_def = self._model.op.add() + op_def.name = self.normalize_op_name(output_name) + op_def.type = MaceOp.Dequantize.name + op_def.input.extend([check_node.output[0]]) + op_def.output.extend([output_name]) + output_shape = op_def.output_shape.add() + output_shape.dims.extend(check_node.output_shape[0].dims) + ConverterUtil.add_data_type_arg(op_def, mace_pb2.DT_UINT8) + op_def.output_type.extend([mace_pb2.DT_FLOAT]) + + del self._model.output_info[:] + output_info = self._model.output_info.add() + output_info.name = check_node.name + output_info.dims.extend(check_node.output_shape[0].dims) + output_info.data_type = mace_pb2.DT_FLOAT diff --git a/mace/python/tools/graph_util.py b/mace/python/tools/graph_util.py index 22edb43a01c710e0cc2a29fb417f778634d61395..8339b7a55b911e75314adbb4de5270c5bce7286b 100644 --- a/mace/python/tools/graph_util.py +++ b/mace/python/tools/graph_util.py @@ -63,7 +63,6 @@ def sort_mace_graph(graph_def, output_name): for node in graph_def.op: nodes_map[node.name] = node sort_mace_node(nodes_map[output_name], nodes_map, ordered_nodes_map) - sorted_graph = mace_pb2.NetDef() - sorted_graph.tensors.extend(graph_def.tensors) - sorted_graph.op.extend([node for node in ordered_nodes_map.values()]) - return sorted_graph + del graph_def.op[:] + graph_def.op.extend([node for node in ordered_nodes_map.values()]) + return graph_def diff --git a/mace/python/tools/model_saver.py b/mace/python/tools/model_saver.py index 56709ef69c4b65b7c45f93fa4587472fad245115..5169982111ffdd19124bdf5a95a3d5f3e1a1d5a4 100644 --- a/mace/python/tools/model_saver.py +++ b/mace/python/tools/model_saver.py @@ -20,6 +20,7 @@ import hashlib from enum import Enum from mace.proto import mace_pb2 +from mace.python.tools.convert_util import mace_check from jinja2 import Environment, FileSystemLoader GENERATED_NAME = set() @@ -107,11 +108,9 @@ class TensorInfo: self.id = id self.data_type = tensor.data_type if tensor.data_type == mace_pb2.DT_HALF: - self.data_type = mace_pb2.DT_HALF self.data = bytearray( np.array(tensor.float_data).astype(np.float16).tobytes()) elif tensor.data_type == mace_pb2.DT_FLOAT: - self.data_type = mace_pb2.DT_FLOAT self.data = bytearray( np.array(tensor.float_data).astype(np.float32).tobytes()) elif tensor.data_type == mace_pb2.DT_INT32: @@ -139,7 +138,7 @@ def update_tensor_infos(net_def, runtime, data_type): tensor_info = TensorInfo(counter, tensor) tensor_infos.append(tensor_info) # align - if tensor_info.data_type != 'DT_UINT8' and offset % 4 != 0: + if tensor_info.data_type != mace_pb2.DT_UINT8 and offset % 4 != 0: padding = 4 - offset % 4 offset += padding @@ -162,10 +161,11 @@ def extract_model_data(net_def): for tensor in net_def.tensors: tensor_info = TensorInfo(counter, tensor) # align - if tensor_info.data_type != mace_pb2.DT_UINT8 and offset % 4 != 0: - padding = 4 - offset % 4 - model_data.extend(bytearray([0] * padding)) - offset += padding + mace_check(offset <= tensor.offset, + "Current offset should be <= tensor.offset") + if offset < tensor.offset: + model_data.extend(bytearray([0] * (tensor.offset - offset))) + offset = tensor.offset model_data.extend(tensor_info.data) offset += len(tensor_info.data) counter += 1 diff --git a/mace/python/tools/quantization/quantize_util.py b/mace/python/tools/quantization/quantize_util.py index 8776cdde9952bf0f1049370fde1032933355ca75..346073bde43fae73b07277ee0bc83c89d99c1381 100644 --- a/mace/python/tools/quantization/quantize_util.py +++ b/mace/python/tools/quantization/quantize_util.py @@ -7,6 +7,8 @@ class QuantizedData(object): self._data = None self._scale = 0 self._zero = 0 + self._minval = 0.0 + self._maxval = 0.0 @property def data(self): @@ -20,6 +22,14 @@ class QuantizedData(object): def zero(self): return self._zero + @property + def minval(self): + return self._minval + + @property + def maxval(self): + return self._maxval + @data.setter def data(self, data): self._data = data @@ -32,6 +42,14 @@ class QuantizedData(object): def zero(self, zero): self._zero = zero + @minval.setter + def minval(self, minval): + self._minval = minval + + @maxval.setter + def maxval(self, maxval): + self._maxval = maxval + def adjust_range(in_min, in_max, non_zero): out_max = max(0.0, in_max) @@ -54,7 +72,7 @@ def adjust_range(in_min, in_max, non_zero): else: zero_int = 255 - return scale, zero_int + return scale, zero_int, -zero_int*scale, (255-zero_int)*scale def cal_multiplier_and_shift(scale): @@ -94,13 +112,34 @@ def quantize(data): np_data = np.array(data).astype(float) in_min = np_data.min() in_max = np_data.max() - scale, zero = adjust_range(in_min, in_max, non_zero=True) + scale, zero, out_min, out_max = adjust_range(in_min, in_max, non_zero=True) output = np.clip((np.round(zero + data / scale).astype(int)), 0, 255) quantized_data = QuantizedData() quantized_data.data = output quantized_data.scale = scale quantized_data.zero = zero + quantized_data.minval = out_min + quantized_data.maxval = out_max + return quantized_data + + +def quantize_bias_for_hexagon(data): + np_data = np.array(data).astype(float) + max_val = max(abs(np_data.min()), abs(np_data.max())) + in_min = -max_val + in_max = max_val + scale = (in_max - in_min) / 2**32 + zero = 0 + output = np.clip((np.round(zero + data / scale).astype(long)), + -2**31, 2**31 - 1) + + quantized_data = QuantizedData() + quantized_data.data = output + quantized_data.scale = scale + quantized_data.zero = zero + quantized_data.minval = in_min + quantized_data.maxval = in_max return quantized_data diff --git a/tools/converter.py b/tools/converter.py index 28472012c934e13032152071c67f493124ce9b61..395bedc93220ddd675d5988a9c9a823f008bf081 100644 --- a/tools/converter.py +++ b/tools/converter.py @@ -190,6 +190,8 @@ class YAMLKeyword(object): input_ranges = 'input_ranges' output_tensors = 'output_tensors' output_shapes = 'output_shapes' + check_tensors = 'check_tensors' + check_shapes = 'check_shapes' runtime = 'runtime' data_type = 'data_type' input_data_types = 'input_data_types' @@ -460,6 +462,16 @@ def format_model_config(flags): subgraph[key] = [value] subgraph[key] = [str(v) for v in subgraph[key]] + for key in [YAMLKeyword.check_tensors, + YAMLKeyword.check_shapes]: + value = subgraph.get(key, "") + if value != "": + if not isinstance(value, list): + subgraph[key] = [value] + subgraph[key] = [str(v) for v in subgraph[key]] + else: + subgraph[key] = [] + input_data_types = subgraph.get(YAMLKeyword.input_data_types, "") if input_data_types: if not isinstance(input_data_types, list): @@ -787,10 +799,13 @@ def convert_model(configs, cl_mem_type): model_config[YAMLKeyword.weight_sha256_checksum], ",".join(subgraphs[0][YAMLKeyword.input_tensors]), ",".join(subgraphs[0][YAMLKeyword.output_tensors]), + ",".join(subgraphs[0][YAMLKeyword.check_tensors]), runtime, model_name, ":".join(subgraphs[0][YAMLKeyword.input_shapes]), ":".join(subgraphs[0][YAMLKeyword.input_ranges]), + ":".join(subgraphs[0][YAMLKeyword.output_shapes]), + ":".join(subgraphs[0][YAMLKeyword.check_shapes]), model_config[YAMLKeyword.nnlib_graph_mode], embed_model_data, model_config[YAMLKeyword.winograd], @@ -1216,6 +1231,12 @@ def run_specific_target(flags, configs, target_abi, for runtime in runtime_list: device_type = parse_device_type(runtime) # run for specified soc + if not subgraphs[0][YAMLKeyword.check_tensors]: + output_nodes = subgraphs[0][YAMLKeyword.output_tensors] + output_shapes = subgraphs[0][YAMLKeyword.output_shapes] + else: + output_nodes = subgraphs[0][YAMLKeyword.check_tensors] + output_shapes = subgraphs[0][YAMLKeyword.check_shapes] run_output = sh_commands.tuning_run( abi=target_abi, serialno=serial_num, @@ -1225,9 +1246,9 @@ def run_specific_target(flags, configs, target_abi, embed_model_data=embed_model_data, model_output_dir=model_output_dir, input_nodes=subgraphs[0][YAMLKeyword.input_tensors], - output_nodes=subgraphs[0][YAMLKeyword.output_tensors], + output_nodes=output_nodes, input_shapes=subgraphs[0][YAMLKeyword.input_shapes], - output_shapes=subgraphs[0][YAMLKeyword.output_shapes], + output_shapes=output_shapes, mace_model_dir=mace_model_dir, model_tag=model_name, device_type=device_type, @@ -1261,7 +1282,8 @@ def run_specific_target(flags, configs, target_abi, model_config[YAMLKeyword.weight_sha256_checksum]) validate_type = device_type - if model_config[YAMLKeyword.quantize] == 1: + if model_config[YAMLKeyword.quantize] == 1 \ + and device_type == DeviceType.CPU: validate_type = device_type + "_QUANTIZE" sh_commands.validate_model( @@ -1272,9 +1294,9 @@ def run_specific_target(flags, configs, target_abi, platform=model_config[YAMLKeyword.platform], device_type=device_type, input_nodes=subgraphs[0][YAMLKeyword.input_tensors], - output_nodes=subgraphs[0][YAMLKeyword.output_tensors], + output_nodes=output_nodes, input_shapes=subgraphs[0][YAMLKeyword.input_shapes], - output_shapes=subgraphs[0][YAMLKeyword.output_shapes], + output_shapes=output_shapes, model_output_dir=model_output_dir, phone_data_dir=PHONE_DATA_DIR, input_data_types=subgraphs[0][YAMLKeyword.input_data_types], # noqa diff --git a/tools/sh_commands.py b/tools/sh_commands.py index e9c699b9484e76b9883ba029229f8c5da6d37563..b920d43fbe85fc85bdd23831173e31e1e1e282b1 100644 --- a/tools/sh_commands.py +++ b/tools/sh_commands.py @@ -551,10 +551,13 @@ def gen_model_code(model_codegen_dir, weight_sha256_checksum, input_nodes, output_nodes, + check_nodes, runtime, model_tag, input_shapes, input_ranges, + output_shapes, + check_shapes, dsp_mode, embed_model_data, winograd, @@ -581,11 +584,14 @@ def gen_model_code(model_codegen_dir, "--weight_checksum=%s" % weight_sha256_checksum, "--input_node=%s" % input_nodes, "--output_node=%s" % output_nodes, + "--check_node=%s" % check_nodes, "--runtime=%s" % runtime, "--template=%s" % "mace/python/tools", "--model_tag=%s" % model_tag, "--input_shape=%s" % input_shapes, "--input_range=%s" % input_ranges, + "--output_shape=%s" % output_shapes, + "--check_shape=%s" % check_shapes, "--dsp_mode=%s" % dsp_mode, "--embed_model_data=%s" % embed_model_data, "--winograd=%s" % winograd,