提交 c2b06975 编写于 作者: 李滨

Merge branch 'master' into 'master'

Fix tensorflow dsp converter

See merge request !679
......@@ -122,6 +122,28 @@ MACE now supports models from TensorFlow and Caffe (more frameworks will be supp
strip_unused_nodes
sort_by_execution_order'
Usage for DSP,
.. code:: bash
# DSP:
./transform_graph \
--in_graph=/path/to/your/tf_model.pb \
--out_graph=/path/to/your/output/tf_model_opt.pb \
--inputs='input node name' \
--outputs='output node name' \
--transforms='strip_unused_nodes(type=float, shape="1,64,64,3")
strip_unused_nodes(type=float, shape="1,64,64,3")
remove_nodes(op=Identity, op=CheckNumerics)
fold_constants(ignore_errors=true)
fold_batch_norms
fold_old_batch_norms
backport_concatv2
quantize_weights(minimum_size=2)
quantize_nodes
strip_unused_nodes
sort_by_execution_order'
- Caffe
Caffe 1.0+ models are supported in MACE converter tool.
......
......@@ -3,7 +3,7 @@ py_library(
srcs = [
"convert_util.py",
"graph_util.py",
"tf_dsp_converter_lib.py",
"converter_tool/tf_dsp_converter.py",
"converter_tool/base_converter.py",
"converter_tool/shape_inference.py",
"converter_tool/tensorflow_converter.py",
......
......@@ -96,39 +96,38 @@ def main(unused_args):
print ("runtime %s is not supported." % FLAGS.runtime)
sys.exit(-1)
if FLAGS.graph_optimize_options:
option = cvt.ConverterOption(
FLAGS.graph_optimize_options.split(','))
else:
option = cvt.ConverterOption()
option.winograd = FLAGS.winograd
input_node_names = FLAGS.input_node.split(',')
input_node_shapes = FLAGS.input_shape.split(':')
if len(input_node_names) != len(input_node_shapes):
raise Exception('input node count and shape count do not match.')
for i in xrange(len(input_node_names)):
input_node = cvt.NodeInfo()
input_node.name = input_node_names[i]
input_node.shape = parse_int_array_from_str(input_node_shapes[i])
option.add_input_node(input_node)
output_node_names = FLAGS.output_node.split(',')
for i in xrange(len(output_node_names)):
output_node = cvt.NodeInfo()
output_node.name = output_node_names[i]
option.add_output_node(output_node)
print("Transform model to one that can better run on device")
if FLAGS.runtime == 'dsp':
if FLAGS.platform == 'tensorflow':
from mace.python.tools import tf_dsp_converter_lib
output_graph_def = tf_dsp_converter_lib.convert_to_mace_pb(
FLAGS.model_file, FLAGS.input_node, FLAGS.output_node,
FLAGS.dsp_mode)
else:
print("%s does not support dsp runtime yet." % FLAGS.platform)
sys.exit(-1)
mace_check(FLAGS.platform == 'tensorflow',
'DSP only supports tensorflow')
from mace.python.tools.converter_tool import tf_dsp_converter
converter = tf_dsp_converter.TensorflowDspConverter(
option, FLAGS.model_file)
output_graph_def = converter.run()
else:
if FLAGS.graph_optimize_options:
option = cvt.ConverterOption(
FLAGS.graph_optimize_options.split(','))
else:
option = cvt.ConverterOption()
option.winograd = FLAGS.winograd
input_node_names = FLAGS.input_node.split(',')
input_node_shapes = FLAGS.input_shape.split(':')
if len(input_node_names) != len(input_node_shapes):
raise Exception('input node count and shape count do not match.')
for i in xrange(len(input_node_names)):
input_node = cvt.NodeInfo()
input_node.name = input_node_names[i]
input_node.shape = parse_int_array_from_str(input_node_shapes[i])
option.add_input_node(input_node)
output_node_names = FLAGS.output_node.split(',')
for i in xrange(len(output_node_names)):
output_node = cvt.NodeInfo()
output_node.name = output_node_names[i]
option.add_output_node(output_node)
if FLAGS.platform == 'tensorflow':
from mace.python.tools.converter_tool import tensorflow_converter
converter = tensorflow_converter.TensorflowConverter(
......@@ -144,7 +143,6 @@ def main(unused_args):
output_graph_def = converter.run()
print("Transform model to one that can better run on device")
if FLAGS.runtime == 'cpu+gpu':
cpu_graph_def = copy.deepcopy(output_graph_def)
......
......@@ -14,16 +14,80 @@
from mace.proto import mace_pb2
from mace.python.tools.converter_tool import base_converter
from mace.python.tools import graph_util
from mace.python.tools.convert_util import mace_check
import tensorflow as tf
from tensorflow import gfile
from tensorflow.core.framework import tensor_shape_pb2
from operator import mul
from dsp_ops import DspOps
from mace.python.tools import graph_util
import numpy as np
class DspOps(object):
def __init__(self):
self.dsp_ops = {
'INPUT': 'INPUT"',
'OUTPUT': 'OUTPUT',
'NoOp': 'Nop',
'FLATTEN': 'Flatten',
'Identity': 'Nop',
'Placeholder': 'INPUT',
'Const': 'Const',
'QuantizedConv2D': 'QuantizedConv2d_8x8to32',
'QuantizedMatMul': 'QuantizedMatMul_8x8to32',
'QuantizeDownAndShrinkRange': 'QuantizeDownAndShrinkRange_32to8',
'QuantizedRelu': 'QuantizedRelu_8',
'QuantizedReluX': 'QuantizedReluX_8',
'QuantizedMaxPool': 'QuantizedMaxPool_8',
'QuantizedAvgPool': 'QuantizedAvgPool_8',
'QuantizedConcat': 'QuantizedConcat_8',
'QuantizedBiasAdd': 'QuantizedBiasAdd_8p8to32',
'QuantizedResizeBilinear': 'QuantizedResizeBilinear_8',
'QuantizedSpaceToBatchND': 'QuantizedSpaceToBatchND_8',
'QuantizedBatchToSpaceND': 'QuantizedBatchToSpaceND_8',
'QuantizedSoftmax': 'QuantizedSoftmax_8',
'QuantizedTanh': 'QuantizedTanh_8',
'Min': 'Min_f',
'Max': 'Max_f',
'QuantizeV2': 'Quantize',
'Dequantize': 'Dequantize',
'Softmax': 'Softmax_f',
'Reshape': 'Reshape',
'QuantizedReshape': 'QuantizedReshape',
'Sigmoid': 'Sigmoid_f',
'Slice': 'Slice_f',
'Add': 'Add_f',
'Mul': 'Mul_f',
'Requantize': 'Requantize_32to8',
'RequantizationRange': 'RequantizationRange_32',
'Sub': 'Sub_f',
'Pack': 'Pack_int32',
'StridedSlice': 'StridedSlice_f',
'ExpandDims': 'ExpandDims_f',
'QuantizedMul': 'QuantizedMul_8x8to32',
'QuantizedAdd': 'QuantizedAdd_8p8to32',
'Pad': 'Pad_f',
'SpaceToBatchND': 'SpaceToBatchND_f',
'BatchToSpaceND': 'BatchToSpaceND_f',
'ResizeBilinear': 'ResizeBilinear_f',
'ConcatV2': 'ConcatV2_f',
'Conv2DBackpropInput': 'Deconv_f',
'Tanh': 'Tanh_f',
'Split': 'Split_f',
'Transpose': 'Transpose_f',
'Concat': 'Concat_f',
'AddN': 'AddN_f',
}
def has_op(self, tf_op):
return tf_op in self.dsp_ops
def map_nn_op(self, tf_op):
if tf_op not in self.dsp_ops:
raise Exception('Could not map nn op for: ', tf_op)
return self.dsp_ops[tf_op]
# converter --input ../libcv/quantized_model.pb \
# --output quantized_model_dsp.pb \
# --runtime dsp --input_node input_node \
# --output_node output_node
TF_DTYPE_2_MACE_DTYPE_MAP = {
tf.float32: mace_pb2.DT_FLOAT,
......@@ -101,7 +165,6 @@ def get_input_tensor(op, index):
def add_shape_const_node(net_def, op, values, name):
print('Add const node: ', op.name + '/' + name)
tensor = net_def.tensors.add()
node_name = op.name + '/' + name
tensor.name = node_name + ':0'
......@@ -128,7 +191,7 @@ def convert_op_outputs(mace_op_def, tf_op):
mace_op_def.output_shape.extend(output_shapes)
def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
def convert_ops(unresolved_ops, resolved_ops, net_def, dsp_ops):
first_op = unresolved_ops[0]
print('Op: ', first_op.name, first_op.type, first_op.outputs[0].shape)
......@@ -152,7 +215,8 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
first_op.outputs[0].dtype == tf.quint8 or \
first_op.outputs[0].dtype == tf.quint16:
tensor.int32_data.extend(tf_tensor.astype(int).flat)
elif first_op.type == 'Shape':
resolved_ops.add(first_op.name)
else:
op_def = net_def.op.add()
op_def.name = first_op.name
......@@ -162,7 +226,7 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
if len(first_op.outputs) > 0 and first_op.type == 'Dequantize' \
and len(first_op.outputs[0].consumers()) > 0 \
and (first_op.outputs[0].consumers()[0].type == 'SpaceToBatchND' or
first_op.outputs[0].consumers()[0].type == 'BatchToSpaceND'):
first_op.outputs[0].consumers()[0].type == 'BatchToSpaceND'): # noqa
input_tensor = first_op.inputs[0]
min_tensor = first_op.inputs[1]
max_tensor = first_op.inputs[2]
......@@ -183,14 +247,12 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
op_def.input.extend([t.name for t in s2b_op.inputs[1:]])
op_def.input.extend([min_tensor.name, max_tensor.name])
convert_op_outputs(op_def, quantize_op)
elif len(first_op.outputs) > 0 and \
first_op.type == 'QuantizedReshape' and \
len(first_op.outputs[0].consumers()) > 0 and \
first_op.outputs[0].consumers()[0].type == 'Dequantize' and \
len(first_op.outputs[0].consumers()[0].outputs[0].consumers()) \
> 0 and \
first_op.outputs[0].consumers()[0].outputs[0].consumers()[0].type \
== 'Softmax':
elif (len(first_op.outputs) > 0 and
first_op.type == 'QuantizedReshape' and
len(first_op.outputs[0].consumers()) > 0 and
first_op.outputs[0].consumers()[0].type == 'Dequantize' and
len(first_op.outputs[0].consumers()[0].outputs[0].consumers()) > 0 and # noqa
first_op.outputs[0].consumers()[0].outputs[0].consumers()[0].type == 'Softmax'): # noqa
input_tensor = first_op.inputs[0]
min_tensor = first_op.inputs[2]
max_tensor = first_op.inputs[3]
......@@ -216,17 +278,17 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
[input_tensor.name, min_tensor.name, max_tensor.name])
convert_op_outputs(op_def, quantize_reshape_op)
# remove Squeeze
elif len(first_op.outputs) > 0 and \
first_op.type == 'Requantize' and \
len(first_op.outputs[0].consumers()) > 0 and \
first_op.outputs[0].consumers()[0].type == 'Dequantize' and \
len(first_op.outputs[0].consumers()[0].outputs[0].consumers()) \
> 0 and \
first_op.outputs[0].consumers()[0].outputs[0].consumers()[0].type \
== 'Squeeze':
elif (len(first_op.outputs) > 0 and
first_op.type == 'Requantize' and
len(first_op.outputs[0].consumers()) > 0 and
first_op.outputs[0].consumers()[0].type == 'Dequantize' and
len(first_op.outputs[0].consumers()[0].outputs[0].consumers()) > 0 and # noqa
first_op.outputs[0].consumers()[0].outputs[0].consumers()[0].type == 'Squeeze'): # noqa
dequantize_op = first_op.outputs[0].consumers()[0]
squeeze_op = dequantize_op.outputs[0].consumers()[0]
reshape_op = squeeze_op.outputs[0].consumers()[0]
if reshape_op.type == 'Shape':
reshape_op = squeeze_op.outputs[0].consumers()[1]
min_op = reshape_op.outputs[0].consumers()[0]
max_op = reshape_op.outputs[0].consumers()[1]
quantize_op = min_op.outputs[0].consumers()[0]
......@@ -249,7 +311,7 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
if next_op and len(next_op.outputs) > 0 and \
next_op.type == 'QuantizedReshape' and \
len(next_op.outputs[0].consumers()) > 0 else None
softmax_op = dequantize_op.outputs[0].consumers()[0]\
softmax_op = dequantize_op.outputs[0].consumers()[0] \
if dequantize_op and len(dequantize_op.outputs) > 0 and \
dequantize_op.type == 'Dequantize' and \
len(dequantize_op.outputs[0].consumers()) > 0 else None
......@@ -280,7 +342,7 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
convert_op_outputs(softmax_op_def, quantize_reshape_op)
elif len(first_op.outputs) > 0 and first_op.type == 'Dequantize' and \
len(first_op.outputs[0].consumers()) > 0 and \
len(first_op.outputs[0].consumers()) > 0 and \
first_op.outputs[0].consumers()[0].type == 'Tanh':
input_tensor = first_op.inputs[0]
min_tensor = first_op.inputs[1]
......@@ -446,12 +508,10 @@ def reverse_batch_to_space_and_biasadd(net_def):
new_follow_op.CopyFrom(follow_op)
for i in xrange(len(follow_op.input)):
for k in xrange(3):
if new_follow_op.input[
i] == get_tensor_name_from_op(
biasadd_requantize_op.name, k):
new_follow_op.input[
i] = get_tensor_name_from_op(
b2s_op.name, k)
if new_follow_op.input[i] == get_tensor_name_from_op( # noqa
biasadd_requantize_op.name, k):
new_follow_op.input[i] = get_tensor_name_from_op( # noqa
b2s_op.name, k)
new_ops.append(new_follow_op)
skip_ops.add(follow_op.name)
visited_ops.add(follow_op.name)
......@@ -518,7 +578,7 @@ def add_input_output_info(net_def, input_node, output_node, graph, dtype):
return net_def
def fuse_quantize(net_def, input_node, output_node):
def fuse_quantize(net_def):
tensor_map = {}
for tensor in net_def.tensors:
tensor_map[tensor.name] = tensor
......@@ -567,51 +627,71 @@ def fuse_quantize(net_def, input_node, output_node):
return new_net_def
def convert_to_mace_pb(model_file, input_node, output_node, dsp_mode):
"""
nnlib does not have batch norm, so use tensorflow optimizer to fold
batch norm with convolution. The fold optimization reorders ops, so
we sort ops first by topology.
"""
input_graph_def = tf.GraphDef()
with gfile.Open(model_file, "rb") as f:
data = f.read()
input_graph_def.ParseFromString(data)
input_graph_def = graph_util.sort_tf_graph(input_graph_def)
net_def = mace_pb2.NetDef()
with tf.Session() as session:
with session.graph.as_default() as graph:
tf.import_graph_def(input_graph_def, name="")
ops = graph.get_operations()
dsp_ops = DspOps()
resolved_ops = set()
# convert const node
unresolved_ops = [op for op in ops if op.type == 'Const']
class TensorflowDspConverter(base_converter.ConverterInterface):
def __init__(self, option, src_model_file):
self._option = option
self._mace_net_def = mace_pb2.NetDef()
# import tensorflow graph
tf_graph_def = tf.GraphDef()
with tf.gfile.Open(src_model_file, 'rb') as f:
tf_graph_def.ParseFromString(f.read())
self._placeholders = {}
self.add_shape_info(tf_graph_def)
with tf.Session() as session:
with session.graph.as_default() as graph:
tf.import_graph_def(tf_graph_def, name='')
self._tf_graph = graph
def run(self):
ops = self._tf_graph.get_operations()
dsp_ops = DspOps()
resolved_ops = set()
mace_check(len(self._option.input_nodes) == 1
and len(self._option.output_nodes) == 1,
'dsp only support single input and output')
input_node = self._option.input_nodes.values()[0].name
output_node = self._option.output_nodes.values()[0].name
# convert const node
unresolved_ops = [op for op in ops if op.type == 'Const']
with tf.Session() as session:
while len(unresolved_ops) > 0:
convert_ops(unresolved_ops, resolved_ops, net_def, output_node,
convert_ops(unresolved_ops, resolved_ops, self._mace_net_def,
dsp_ops)
# convert op node
unresolved_ops = [op for op in ops if op.type != 'Const']
while len(unresolved_ops) > 0:
convert_ops(unresolved_ops, resolved_ops, net_def, output_node,
convert_ops(unresolved_ops, resolved_ops, self._mace_net_def,
dsp_ops)
add_output_node(net_def, output_node)
net_def = reverse_batch_to_space_and_biasadd(net_def)
net_def = fuse_quantize(net_def, input_node, output_node)
add_output_node(self._mace_net_def, output_node)
net_def = reverse_batch_to_space_and_biasadd(self._mace_net_def)
net_def = fuse_quantize(net_def)
sorted_net_def = graph_util.sort_mace_graph(net_def, '__output__')
net_def_with_node_id = add_node_id(sorted_net_def)
dtype = mace_pb2.DT_FLOAT
final_net_def = add_input_output_info(
net_def_with_node_id, input_node, output_node, graph, dtype)
arg = final_net_def.arg.add()
arg.name = 'dsp_mode'
arg.i = dsp_mode
return final_net_def
net_def_with_node_id, input_node, output_node,
self._tf_graph, dtype)
return final_net_def
def add_shape_info(self, tf_graph_def):
for node in tf_graph_def.node:
for input_node in self._option.input_nodes.values():
if node.name == input_node.name or \
node.name + ':0' == input_node.name:
del node.attr['shape'].shape.dim[:]
node.attr['shape'].shape.dim.extend([
tensor_shape_pb2.TensorShapeProto.Dim(size=i) for i in
input_node.shape
])
self._placeholders[node.name + ':0'] = \
np.zeros(shape=input_node.shape, dtype=float)
# Copyright 2018 Xiaomi, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
class DspOps(object):
def __init__(self):
self.dsp_ops = {
'INPUT': 'INPUT"',
'OUTPUT': 'OUTPUT',
'NoOp': 'Nop',
'FLATTEN': 'Flatten',
'Identity': 'Nop',
'Placeholder': 'INPUT',
'Const': 'Const',
'QuantizedConv2D': 'QuantizedConv2d_8x8to32',
'QuantizedMatMul': 'QuantizedMatMul_8x8to32',
'QuantizeDownAndShrinkRange': 'QuantizeDownAndShrinkRange_32to8',
'QuantizedRelu': 'QuantizedRelu_8',
'QuantizedReluX': 'QuantizedReluX_8',
'QuantizedMaxPool': 'QuantizedMaxPool_8',
'QuantizedAvgPool': 'QuantizedAvgPool_8',
'QuantizedConcat': 'QuantizedConcat_8',
'QuantizedBiasAdd': 'QuantizedBiasAdd_8p8to32',
'QuantizedResizeBilinear': 'QuantizedResizeBilinear_8',
'QuantizedSpaceToBatchND': 'QuantizedSpaceToBatchND_8',
'QuantizedBatchToSpaceND': 'QuantizedBatchToSpaceND_8',
'QuantizedSoftmax': 'QuantizedSoftmax_8',
'QuantizedTanh': 'QuantizedTanh_8',
'Min': 'Min_f',
'Max': 'Max_f',
'QuantizeV2': 'Quantize',
'Dequantize': 'Dequantize',
'Softmax': 'Softmax_f',
'Reshape': 'Reshape',
'QuantizedReshape': 'QuantizedReshape',
'Sigmoid': 'Sigmoid_f',
'Slice': 'Slice_f',
'Add': 'Add_f',
'Mul': 'Mul_f',
'Requantize': 'Requantize_32to8',
'RequantizationRange': 'RequantizationRange_32',
'Sub': 'Sub_f',
'Pack': 'Pack_int32',
'StridedSlice': 'StridedSlice_f',
'ExpandDims': 'ExpandDims_f',
'QuantizedMul': 'QuantizedMul_8x8to32',
'QuantizedAdd': 'QuantizedAdd_8p8to32',
'Pad': 'Pad_f',
'SpaceToBatchND': 'SpaceToBatchND_f',
'BatchToSpaceND': 'BatchToSpaceND_f',
'ResizeBilinear': 'ResizeBilinear_f',
'ConcatV2': 'ConcatV2_f',
'Conv2DBackpropInput': 'Deconv_f',
'Tanh': 'Tanh_f',
'Split': 'Split_f',
'Transpose': 'Transpose_f',
'Concat': 'Concat_f',
'AddN': 'AddN_f',
}
def has_op(self, tf_op):
return tf_op in self.dsp_ops
def map_nn_op(self, tf_op):
if tf_op not in self.dsp_ops:
raise Exception('Could not map nn op for: ', tf_op)
return self.dsp_ops[tf_op]
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册