From a68dc2f3990a7829844c058a2ee3adc1ae205f7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E5=AF=85?= Date: Mon, 23 Jul 2018 18:10:49 +0800 Subject: [PATCH] Fix tensorflow dsp converter --- docs/user_guide/basic_usage.rst | 22 ++ mace/python/tools/BUILD | 2 +- mace/python/tools/converter.py | 62 +++-- .../tf_dsp_converter.py} | 220 ++++++++++++------ mace/python/tools/dsp_ops.py | 78 ------- 5 files changed, 203 insertions(+), 181 deletions(-) rename mace/python/tools/{tf_dsp_converter_lib.py => converter_tool/tf_dsp_converter.py} (76%) delete mode 100644 mace/python/tools/dsp_ops.py diff --git a/docs/user_guide/basic_usage.rst b/docs/user_guide/basic_usage.rst index 487aa33a..fac53270 100644 --- a/docs/user_guide/basic_usage.rst +++ b/docs/user_guide/basic_usage.rst @@ -122,6 +122,28 @@ MACE now supports models from TensorFlow and Caffe (more frameworks will be supp strip_unused_nodes sort_by_execution_order' + Usage for DSP, + + .. code:: bash + + # DSP: + ./transform_graph \ + --in_graph=/path/to/your/tf_model.pb \ + --out_graph=/path/to/your/output/tf_model_opt.pb \ + --inputs='input node name' \ + --outputs='output node name' \ + --transforms='strip_unused_nodes(type=float, shape="1,64,64,3") + strip_unused_nodes(type=float, shape="1,64,64,3") + remove_nodes(op=Identity, op=CheckNumerics) + fold_constants(ignore_errors=true) + fold_batch_norms + fold_old_batch_norms + backport_concatv2 + quantize_weights(minimum_size=2) + quantize_nodes + strip_unused_nodes + sort_by_execution_order' + - Caffe Caffe 1.0+ models are supported in MACE converter tool. diff --git a/mace/python/tools/BUILD b/mace/python/tools/BUILD index f7ccc9dc..acc717d2 100644 --- a/mace/python/tools/BUILD +++ b/mace/python/tools/BUILD @@ -3,7 +3,7 @@ py_library( srcs = [ "convert_util.py", "graph_util.py", - "tf_dsp_converter_lib.py", + "converter_tool/tf_dsp_converter.py", "converter_tool/base_converter.py", "converter_tool/shape_inference.py", "converter_tool/tensorflow_converter.py", diff --git a/mace/python/tools/converter.py b/mace/python/tools/converter.py index d023358a..2a069c1e 100644 --- a/mace/python/tools/converter.py +++ b/mace/python/tools/converter.py @@ -96,39 +96,38 @@ def main(unused_args): print ("runtime %s is not supported." % FLAGS.runtime) sys.exit(-1) + if FLAGS.graph_optimize_options: + option = cvt.ConverterOption( + FLAGS.graph_optimize_options.split(',')) + else: + option = cvt.ConverterOption() + option.winograd = FLAGS.winograd + + input_node_names = FLAGS.input_node.split(',') + input_node_shapes = FLAGS.input_shape.split(':') + if len(input_node_names) != len(input_node_shapes): + raise Exception('input node count and shape count do not match.') + for i in xrange(len(input_node_names)): + input_node = cvt.NodeInfo() + input_node.name = input_node_names[i] + input_node.shape = parse_int_array_from_str(input_node_shapes[i]) + option.add_input_node(input_node) + + output_node_names = FLAGS.output_node.split(',') + for i in xrange(len(output_node_names)): + output_node = cvt.NodeInfo() + output_node.name = output_node_names[i] + option.add_output_node(output_node) + + print("Transform model to one that can better run on device") if FLAGS.runtime == 'dsp': - if FLAGS.platform == 'tensorflow': - from mace.python.tools import tf_dsp_converter_lib - output_graph_def = tf_dsp_converter_lib.convert_to_mace_pb( - FLAGS.model_file, FLAGS.input_node, FLAGS.output_node, - FLAGS.dsp_mode) - else: - print("%s does not support dsp runtime yet." % FLAGS.platform) - sys.exit(-1) + mace_check(FLAGS.platform == 'tensorflow', + 'DSP only supports tensorflow') + from mace.python.tools.converter_tool import tf_dsp_converter + converter = tf_dsp_converter.TensorflowDspConverter( + option, FLAGS.model_file) + output_graph_def = converter.run() else: - if FLAGS.graph_optimize_options: - option = cvt.ConverterOption( - FLAGS.graph_optimize_options.split(',')) - else: - option = cvt.ConverterOption() - option.winograd = FLAGS.winograd - - input_node_names = FLAGS.input_node.split(',') - input_node_shapes = FLAGS.input_shape.split(':') - if len(input_node_names) != len(input_node_shapes): - raise Exception('input node count and shape count do not match.') - for i in xrange(len(input_node_names)): - input_node = cvt.NodeInfo() - input_node.name = input_node_names[i] - input_node.shape = parse_int_array_from_str(input_node_shapes[i]) - option.add_input_node(input_node) - - output_node_names = FLAGS.output_node.split(',') - for i in xrange(len(output_node_names)): - output_node = cvt.NodeInfo() - output_node.name = output_node_names[i] - option.add_output_node(output_node) - if FLAGS.platform == 'tensorflow': from mace.python.tools.converter_tool import tensorflow_converter converter = tensorflow_converter.TensorflowConverter( @@ -144,7 +143,6 @@ def main(unused_args): output_graph_def = converter.run() - print("Transform model to one that can better run on device") if FLAGS.runtime == 'cpu+gpu': cpu_graph_def = copy.deepcopy(output_graph_def) diff --git a/mace/python/tools/tf_dsp_converter_lib.py b/mace/python/tools/converter_tool/tf_dsp_converter.py similarity index 76% rename from mace/python/tools/tf_dsp_converter_lib.py rename to mace/python/tools/converter_tool/tf_dsp_converter.py index 30236d5c..a5a1749e 100644 --- a/mace/python/tools/tf_dsp_converter_lib.py +++ b/mace/python/tools/converter_tool/tf_dsp_converter.py @@ -14,16 +14,80 @@ from mace.proto import mace_pb2 +from mace.python.tools.converter_tool import base_converter +from mace.python.tools import graph_util +from mace.python.tools.convert_util import mace_check + import tensorflow as tf -from tensorflow import gfile +from tensorflow.core.framework import tensor_shape_pb2 from operator import mul -from dsp_ops import DspOps -from mace.python.tools import graph_util +import numpy as np + + +class DspOps(object): + def __init__(self): + self.dsp_ops = { + 'INPUT': 'INPUT"', + 'OUTPUT': 'OUTPUT', + 'NoOp': 'Nop', + 'FLATTEN': 'Flatten', + 'Identity': 'Nop', + 'Placeholder': 'INPUT', + 'Const': 'Const', + 'QuantizedConv2D': 'QuantizedConv2d_8x8to32', + 'QuantizedMatMul': 'QuantizedMatMul_8x8to32', + 'QuantizeDownAndShrinkRange': 'QuantizeDownAndShrinkRange_32to8', + 'QuantizedRelu': 'QuantizedRelu_8', + 'QuantizedReluX': 'QuantizedReluX_8', + 'QuantizedMaxPool': 'QuantizedMaxPool_8', + 'QuantizedAvgPool': 'QuantizedAvgPool_8', + 'QuantizedConcat': 'QuantizedConcat_8', + 'QuantizedBiasAdd': 'QuantizedBiasAdd_8p8to32', + 'QuantizedResizeBilinear': 'QuantizedResizeBilinear_8', + 'QuantizedSpaceToBatchND': 'QuantizedSpaceToBatchND_8', + 'QuantizedBatchToSpaceND': 'QuantizedBatchToSpaceND_8', + 'QuantizedSoftmax': 'QuantizedSoftmax_8', + 'QuantizedTanh': 'QuantizedTanh_8', + 'Min': 'Min_f', + 'Max': 'Max_f', + 'QuantizeV2': 'Quantize', + 'Dequantize': 'Dequantize', + 'Softmax': 'Softmax_f', + 'Reshape': 'Reshape', + 'QuantizedReshape': 'QuantizedReshape', + 'Sigmoid': 'Sigmoid_f', + 'Slice': 'Slice_f', + 'Add': 'Add_f', + 'Mul': 'Mul_f', + 'Requantize': 'Requantize_32to8', + 'RequantizationRange': 'RequantizationRange_32', + 'Sub': 'Sub_f', + 'Pack': 'Pack_int32', + 'StridedSlice': 'StridedSlice_f', + 'ExpandDims': 'ExpandDims_f', + 'QuantizedMul': 'QuantizedMul_8x8to32', + 'QuantizedAdd': 'QuantizedAdd_8p8to32', + 'Pad': 'Pad_f', + 'SpaceToBatchND': 'SpaceToBatchND_f', + 'BatchToSpaceND': 'BatchToSpaceND_f', + 'ResizeBilinear': 'ResizeBilinear_f', + 'ConcatV2': 'ConcatV2_f', + 'Conv2DBackpropInput': 'Deconv_f', + 'Tanh': 'Tanh_f', + 'Split': 'Split_f', + 'Transpose': 'Transpose_f', + 'Concat': 'Concat_f', + 'AddN': 'AddN_f', + } + + def has_op(self, tf_op): + return tf_op in self.dsp_ops + + def map_nn_op(self, tf_op): + if tf_op not in self.dsp_ops: + raise Exception('Could not map nn op for: ', tf_op) + return self.dsp_ops[tf_op] -# converter --input ../libcv/quantized_model.pb \ -# --output quantized_model_dsp.pb \ -# --runtime dsp --input_node input_node \ -# --output_node output_node TF_DTYPE_2_MACE_DTYPE_MAP = { tf.float32: mace_pb2.DT_FLOAT, @@ -101,7 +165,6 @@ def get_input_tensor(op, index): def add_shape_const_node(net_def, op, values, name): - print('Add const node: ', op.name + '/' + name) tensor = net_def.tensors.add() node_name = op.name + '/' + name tensor.name = node_name + ':0' @@ -128,7 +191,7 @@ def convert_op_outputs(mace_op_def, tf_op): mace_op_def.output_shape.extend(output_shapes) -def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops): +def convert_ops(unresolved_ops, resolved_ops, net_def, dsp_ops): first_op = unresolved_ops[0] print('Op: ', first_op.name, first_op.type, first_op.outputs[0].shape) @@ -152,7 +215,8 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops): first_op.outputs[0].dtype == tf.quint8 or \ first_op.outputs[0].dtype == tf.quint16: tensor.int32_data.extend(tf_tensor.astype(int).flat) - + elif first_op.type == 'Shape': + resolved_ops.add(first_op.name) else: op_def = net_def.op.add() op_def.name = first_op.name @@ -162,7 +226,7 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops): if len(first_op.outputs) > 0 and first_op.type == 'Dequantize' \ and len(first_op.outputs[0].consumers()) > 0 \ and (first_op.outputs[0].consumers()[0].type == 'SpaceToBatchND' or - first_op.outputs[0].consumers()[0].type == 'BatchToSpaceND'): + first_op.outputs[0].consumers()[0].type == 'BatchToSpaceND'): # noqa input_tensor = first_op.inputs[0] min_tensor = first_op.inputs[1] max_tensor = first_op.inputs[2] @@ -183,14 +247,12 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops): op_def.input.extend([t.name for t in s2b_op.inputs[1:]]) op_def.input.extend([min_tensor.name, max_tensor.name]) convert_op_outputs(op_def, quantize_op) - elif len(first_op.outputs) > 0 and \ - first_op.type == 'QuantizedReshape' and \ - len(first_op.outputs[0].consumers()) > 0 and \ - first_op.outputs[0].consumers()[0].type == 'Dequantize' and \ - len(first_op.outputs[0].consumers()[0].outputs[0].consumers()) \ - > 0 and \ - first_op.outputs[0].consumers()[0].outputs[0].consumers()[0].type \ - == 'Softmax': + elif (len(first_op.outputs) > 0 and + first_op.type == 'QuantizedReshape' and + len(first_op.outputs[0].consumers()) > 0 and + first_op.outputs[0].consumers()[0].type == 'Dequantize' and + len(first_op.outputs[0].consumers()[0].outputs[0].consumers()) > 0 and # noqa + first_op.outputs[0].consumers()[0].outputs[0].consumers()[0].type == 'Softmax'): # noqa input_tensor = first_op.inputs[0] min_tensor = first_op.inputs[2] max_tensor = first_op.inputs[3] @@ -216,17 +278,17 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops): [input_tensor.name, min_tensor.name, max_tensor.name]) convert_op_outputs(op_def, quantize_reshape_op) # remove Squeeze - elif len(first_op.outputs) > 0 and \ - first_op.type == 'Requantize' and \ - len(first_op.outputs[0].consumers()) > 0 and \ - first_op.outputs[0].consumers()[0].type == 'Dequantize' and \ - len(first_op.outputs[0].consumers()[0].outputs[0].consumers()) \ - > 0 and \ - first_op.outputs[0].consumers()[0].outputs[0].consumers()[0].type \ - == 'Squeeze': + elif (len(first_op.outputs) > 0 and + first_op.type == 'Requantize' and + len(first_op.outputs[0].consumers()) > 0 and + first_op.outputs[0].consumers()[0].type == 'Dequantize' and + len(first_op.outputs[0].consumers()[0].outputs[0].consumers()) > 0 and # noqa + first_op.outputs[0].consumers()[0].outputs[0].consumers()[0].type == 'Squeeze'): # noqa dequantize_op = first_op.outputs[0].consumers()[0] squeeze_op = dequantize_op.outputs[0].consumers()[0] reshape_op = squeeze_op.outputs[0].consumers()[0] + if reshape_op.type == 'Shape': + reshape_op = squeeze_op.outputs[0].consumers()[1] min_op = reshape_op.outputs[0].consumers()[0] max_op = reshape_op.outputs[0].consumers()[1] quantize_op = min_op.outputs[0].consumers()[0] @@ -249,7 +311,7 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops): if next_op and len(next_op.outputs) > 0 and \ next_op.type == 'QuantizedReshape' and \ len(next_op.outputs[0].consumers()) > 0 else None - softmax_op = dequantize_op.outputs[0].consumers()[0]\ + softmax_op = dequantize_op.outputs[0].consumers()[0] \ if dequantize_op and len(dequantize_op.outputs) > 0 and \ dequantize_op.type == 'Dequantize' and \ len(dequantize_op.outputs[0].consumers()) > 0 else None @@ -280,7 +342,7 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops): convert_op_outputs(softmax_op_def, quantize_reshape_op) elif len(first_op.outputs) > 0 and first_op.type == 'Dequantize' and \ - len(first_op.outputs[0].consumers()) > 0 and \ + len(first_op.outputs[0].consumers()) > 0 and \ first_op.outputs[0].consumers()[0].type == 'Tanh': input_tensor = first_op.inputs[0] min_tensor = first_op.inputs[1] @@ -446,12 +508,10 @@ def reverse_batch_to_space_and_biasadd(net_def): new_follow_op.CopyFrom(follow_op) for i in xrange(len(follow_op.input)): for k in xrange(3): - if new_follow_op.input[ - i] == get_tensor_name_from_op( - biasadd_requantize_op.name, k): - new_follow_op.input[ - i] = get_tensor_name_from_op( - b2s_op.name, k) + if new_follow_op.input[i] == get_tensor_name_from_op( # noqa + biasadd_requantize_op.name, k): + new_follow_op.input[i] = get_tensor_name_from_op( # noqa + b2s_op.name, k) new_ops.append(new_follow_op) skip_ops.add(follow_op.name) visited_ops.add(follow_op.name) @@ -518,7 +578,7 @@ def add_input_output_info(net_def, input_node, output_node, graph, dtype): return net_def -def fuse_quantize(net_def, input_node, output_node): +def fuse_quantize(net_def): tensor_map = {} for tensor in net_def.tensors: tensor_map[tensor.name] = tensor @@ -567,51 +627,71 @@ def fuse_quantize(net_def, input_node, output_node): return new_net_def -def convert_to_mace_pb(model_file, input_node, output_node, dsp_mode): - """ - nnlib does not have batch norm, so use tensorflow optimizer to fold - batch norm with convolution. The fold optimization reorders ops, so - we sort ops first by topology. - """ - input_graph_def = tf.GraphDef() - with gfile.Open(model_file, "rb") as f: - data = f.read() - input_graph_def.ParseFromString(data) - - input_graph_def = graph_util.sort_tf_graph(input_graph_def) - net_def = mace_pb2.NetDef() - - with tf.Session() as session: - with session.graph.as_default() as graph: - tf.import_graph_def(input_graph_def, name="") - ops = graph.get_operations() - dsp_ops = DspOps() - resolved_ops = set() - # convert const node - unresolved_ops = [op for op in ops if op.type == 'Const'] +class TensorflowDspConverter(base_converter.ConverterInterface): + def __init__(self, option, src_model_file): + self._option = option + self._mace_net_def = mace_pb2.NetDef() + + # import tensorflow graph + tf_graph_def = tf.GraphDef() + with tf.gfile.Open(src_model_file, 'rb') as f: + tf_graph_def.ParseFromString(f.read()) + + self._placeholders = {} + self.add_shape_info(tf_graph_def) + + with tf.Session() as session: + with session.graph.as_default() as graph: + tf.import_graph_def(tf_graph_def, name='') + self._tf_graph = graph + + def run(self): + ops = self._tf_graph.get_operations() + dsp_ops = DspOps() + resolved_ops = set() + + mace_check(len(self._option.input_nodes) == 1 + and len(self._option.output_nodes) == 1, + 'dsp only support single input and output') + input_node = self._option.input_nodes.values()[0].name + output_node = self._option.output_nodes.values()[0].name + + # convert const node + unresolved_ops = [op for op in ops if op.type == 'Const'] + with tf.Session() as session: while len(unresolved_ops) > 0: - convert_ops(unresolved_ops, resolved_ops, net_def, output_node, + convert_ops(unresolved_ops, resolved_ops, self._mace_net_def, dsp_ops) # convert op node unresolved_ops = [op for op in ops if op.type != 'Const'] while len(unresolved_ops) > 0: - convert_ops(unresolved_ops, resolved_ops, net_def, output_node, + convert_ops(unresolved_ops, resolved_ops, self._mace_net_def, dsp_ops) - add_output_node(net_def, output_node) - net_def = reverse_batch_to_space_and_biasadd(net_def) - net_def = fuse_quantize(net_def, input_node, output_node) + add_output_node(self._mace_net_def, output_node) + net_def = reverse_batch_to_space_and_biasadd(self._mace_net_def) + net_def = fuse_quantize(net_def) sorted_net_def = graph_util.sort_mace_graph(net_def, '__output__') net_def_with_node_id = add_node_id(sorted_net_def) dtype = mace_pb2.DT_FLOAT final_net_def = add_input_output_info( - net_def_with_node_id, input_node, output_node, graph, dtype) - - arg = final_net_def.arg.add() - arg.name = 'dsp_mode' - arg.i = dsp_mode - - return final_net_def + net_def_with_node_id, input_node, output_node, + self._tf_graph, dtype) + + return final_net_def + + def add_shape_info(self, tf_graph_def): + for node in tf_graph_def.node: + for input_node in self._option.input_nodes.values(): + if node.name == input_node.name or \ + node.name + ':0' == input_node.name: + del node.attr['shape'].shape.dim[:] + node.attr['shape'].shape.dim.extend([ + tensor_shape_pb2.TensorShapeProto.Dim(size=i) for i in + input_node.shape + ]) + self._placeholders[node.name + ':0'] = \ + np.zeros(shape=input_node.shape, dtype=float) diff --git a/mace/python/tools/dsp_ops.py b/mace/python/tools/dsp_ops.py deleted file mode 100644 index f5bd7b70..00000000 --- a/mace/python/tools/dsp_ops.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright 2018 Xiaomi, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -class DspOps(object): - def __init__(self): - self.dsp_ops = { - 'INPUT': 'INPUT"', - 'OUTPUT': 'OUTPUT', - 'NoOp': 'Nop', - 'FLATTEN': 'Flatten', - 'Identity': 'Nop', - 'Placeholder': 'INPUT', - 'Const': 'Const', - 'QuantizedConv2D': 'QuantizedConv2d_8x8to32', - 'QuantizedMatMul': 'QuantizedMatMul_8x8to32', - 'QuantizeDownAndShrinkRange': 'QuantizeDownAndShrinkRange_32to8', - 'QuantizedRelu': 'QuantizedRelu_8', - 'QuantizedReluX': 'QuantizedReluX_8', - 'QuantizedMaxPool': 'QuantizedMaxPool_8', - 'QuantizedAvgPool': 'QuantizedAvgPool_8', - 'QuantizedConcat': 'QuantizedConcat_8', - 'QuantizedBiasAdd': 'QuantizedBiasAdd_8p8to32', - 'QuantizedResizeBilinear': 'QuantizedResizeBilinear_8', - 'QuantizedSpaceToBatchND': 'QuantizedSpaceToBatchND_8', - 'QuantizedBatchToSpaceND': 'QuantizedBatchToSpaceND_8', - 'QuantizedSoftmax': 'QuantizedSoftmax_8', - 'QuantizedTanh': 'QuantizedTanh_8', - 'Min': 'Min_f', - 'Max': 'Max_f', - 'QuantizeV2': 'Quantize', - 'Dequantize': 'Dequantize', - 'Softmax': 'Softmax_f', - 'Reshape': 'Reshape', - 'QuantizedReshape': 'QuantizedReshape', - 'Sigmoid': 'Sigmoid_f', - 'Slice': 'Slice_f', - 'Add': 'Add_f', - 'Mul': 'Mul_f', - 'Requantize': 'Requantize_32to8', - 'RequantizationRange': 'RequantizationRange_32', - 'Sub': 'Sub_f', - 'Pack': 'Pack_int32', - 'StridedSlice': 'StridedSlice_f', - 'ExpandDims': 'ExpandDims_f', - 'QuantizedMul': 'QuantizedMul_8x8to32', - 'QuantizedAdd': 'QuantizedAdd_8p8to32', - 'Pad': 'Pad_f', - 'SpaceToBatchND': 'SpaceToBatchND_f', - 'BatchToSpaceND': 'BatchToSpaceND_f', - 'ResizeBilinear': 'ResizeBilinear_f', - 'ConcatV2': 'ConcatV2_f', - 'Conv2DBackpropInput': 'Deconv_f', - 'Tanh': 'Tanh_f', - 'Split': 'Split_f', - 'Transpose': 'Transpose_f', - 'Concat': 'Concat_f', - 'AddN': 'AddN_f', - } - - def has_op(self, tf_op): - return tf_op in self.dsp_ops - - def map_nn_op(self, tf_op): - if tf_op not in self.dsp_ops: - raise Exception('Could not map nn op for: ', tf_op) - return self.dsp_ops[tf_op] -- GitLab