提交 e6f32c57 编写于 作者: L liuqi

Support caffe model.

上级 0e4a49a8
...@@ -18,3 +18,12 @@ py_proto_library( ...@@ -18,3 +18,12 @@ py_proto_library(
srcs_version = "PY2AND3", srcs_version = "PY2AND3",
deps = ["@com_google_protobuf//:protobuf_python"], deps = ["@com_google_protobuf//:protobuf_python"],
) )
py_proto_library(
name = "caffe_py",
srcs = ["caffe.proto"],
default_runtime = "@com_google_protobuf//:protobuf_python",
protoc = "@com_google_protobuf//:protoc",
srcs_version = "PY2AND3",
deps = ["@com_google_protobuf//:protobuf_python"],
)
此差异已折叠。
...@@ -13,6 +13,18 @@ py_library( ...@@ -13,6 +13,18 @@ py_library(
], ],
) )
py_library(
name = "caffe_converter_lib",
srcs = [
"caffe_converter_lib.py",
],
srcs_version = "PY2AND3",
deps = [
":memory_optimizer",
"//lib/proto:caffe_py",
],
)
py_library( py_library(
name = "source_converter_lib", name = "source_converter_lib",
srcs = [ srcs = [
...@@ -25,11 +37,12 @@ py_library( ...@@ -25,11 +37,12 @@ py_library(
) )
py_binary( py_binary(
name = "tf_converter", name = "converter",
srcs = ["tf_converter.py"], srcs = ["converter.py"],
srcs_version = "PY2AND3", srcs_version = "PY2AND3",
deps = [ deps = [
":tf_converter_lib", ":tf_converter_lib",
":caffe_converter_lib",
":source_converter_lib", ":source_converter_lib",
"@six_archive//:six", "@six_archive//:six",
], ],
......
from lib.proto import mace_pb2
from lib.proto import caffe_pb2
from lib.python.tools import memory_optimizer
import google.protobuf.text_format
import numpy as np
import math
# TODO: support NCHW formt, now only support NHWC.
padding_mode = {
'VALID': 0,
'SAME': 1,
'FULL': 2
}
pooling_type_mode = {
'AvgPool': 1,
'MaxPool': 2
}
buffer_type_map = {
'CONV2D_FILTER' : 0,
'IN_OUT_CHANNEL' : 1,
'ARGUMENT' : 2,
'IN_OUT_HEIGHT' : 3,
'IN_OUT_WIDTH' : 4,
'WINOGRAD_FILTER' : 5,
'DW_CONV2D_FILTER' : 6,
'WEIGHT_HEIGHT' : 7,
}
data_type_map = {
'DT_HALF' : mace_pb2.DT_HALF,
'DT_FLOAT': mace_pb2.DT_FLOAT
}
activation_name_map = {
'ReLU' : 'RELU',
'PReLU' : 'PRELU',
'Sigmoid' : 'SIGMOID',
'TanH' : 'TANH',
}
MACE_INPUT_NODE_NAME = "mace_input_node"
MACE_OUTPUT_NODE_NAME = "mace_output_node"
OPENCL_IMAGE_MAX_SIZE = 16384
class Operator(object):
def __init__(self, name, type, layer):
self.name = name
self.type = type
self.layer = layer
self.parents = []
self.children = []
self.data = []
def add_parent(self, parent_op):
assert parent_op not in self.parents
self.parents.append(parent_op)
if self not in parent_op.children:
parent_op.children.append(self)
def add_child(self, child_op):
assert child_op not in self.children
self.children.append(child_op)
if self not in child_op.parents:
child_op.parents.append(self)
def BlobToNPArray(blob):
if blob.num != 0:
return (np.asarray(blob.data, dtype=np.float32).
reshape(blob.num, blob.channels, blob.height, blob.width))
else:
return np.asarray(blob.data, dtype=np.float32).reshape(blob.shape.dim)
def CommonConvert(op, mace_type, dt):
op_def = mace_pb2.OperatorDef()
arg = op_def.arg.add()
arg.name = 'T'
arg.i = dt
data_format_arg = op_def.arg.add()
data_format_arg.name = 'data_format'
data_format_arg.s = 'NHWC'
op_def.name = op.name
op_def.type = mace_type
op_def.input.extend([parent.name+':0' for parent in op.parents])
return op_def
class CaffeConverter(object):
def __init__(self, caffe_net, weights, net_def, dt, device, winograd):
self.net_def = net_def
self.caffe_net = caffe_net
self.weights = weights
self.dt = dt
self.device = device
self.winograd = winograd
self.resolved_ops = set()
layers = caffe_net.layer
# remove train layers and dropout
layers = self.remove_unused_layers(layers)
# Construct graph
# Only support single-output layer
# layer with single output often use the same top name.
self.ops = [Operator(layer.name, layer.type, layer) for layer in layers]
self.ops_map = {op.name : op for op in self.ops}
output_op = {}
for layer in layers:
op = self.ops_map[layer.name]
for input_name in layer.bottom:
assert input_name != layer.name
parent_op = output_op.get(input_name)
if parent_op is None:
parent_op = self.ops_map[input_name]
op.add_parent(parent_op)
if len(layer.top) > 1:
raise Exception('Only support single-output layers')
for output_name in layer.top:
if output_name == layer.name:
continue
output_op[output_name] = op
# Load weights
weights_layers = weights.layer
for layer in weights_layers:
if not layer.blobs:
continue
if layer.name in self.ops_map:
op = self.ops_map[layer.name]
op.data = [BlobToNPArray(blob) for blob in layer.blobs]
# toposort ops
self.ops = self.toposort_ops()
def remove_unused_layers(self, layers):
phase_map = {0: 'train', 1: 'test'}
test_layers_names = set()
test_layers = []
for layer in layers:
phase = 'test'
if len(layer.include):
phase = phase_map[layer.include[0].phase]
if len(layer.exclude):
phase = phase_map[layer.exclude[0].phase]
if phase == 'test' and layer.type != 'Dropout':
test_layers.append(layer)
assert layer.name not in test_layers_names
test_layers_names.add(layer.name)
return test_layers
def toposort_ops(self):
sorted_ops = []
temp_visited = set()
visited = set()
def search(op):
if op.name in temp_visited:
raise Exception("The model is not DAG")
if op.name in visited:
return
temp_visited.add(op.name)
for parent_op in op.parents:
search(parent_op)
temp_visited.remove(op.name)
sorted_ops.append(op)
visited.add(op.name)
for op in self.ops:
search(op)
return sorted_ops
def add_buffer_to_image(self, input_name, input_type):
output_name = input_name[:-2] + "_b2i" + input_name[-2:]
op_def = self.net_def.op.add()
op_def.name = output_name[:-2]
op_def.type = 'BufferToImage'
op_def.input.extend([input_name])
op_def.output.extend([output_name])
arg = op_def.arg.add()
arg.name = 'buffer_type'
arg.i = buffer_type_map[input_type]
arg = op_def.arg.add()
arg.name = 'mode'
arg.i = 0
arg = op_def.arg.add()
arg.name = 'T'
arg.i = self.dt
return output_name
def add_image_to_buffer(self, input_name, input_type):
output_name = input_name[:-2] + "_i2b" + input_name[-2:]
op_def = self.net_def.op.add()
op_def.name = output_name[:-2]
op_def.type = 'ImageToBuffer'
op_def.input.extend([input_name])
op_def.output.extend([output_name])
arg = op_def.arg.add()
arg.name = 'buffer_type'
arg.i = buffer_type_map[input_type]
arg = op_def.arg.add()
arg.name = 'T'
arg.i = self.dt
return output_name
def add_input_transform(self, name):
new_input_name = MACE_INPUT_NODE_NAME + ":0"
op_def = self.net_def.op.add()
op_def.name = name
op_def.type = 'BufferToImage'
op_def.input.extend([new_input_name])
if name not in self.ops_map:
raise Exception("Input name not in the model")
top_name = self.ops_map[name].layer.top[0]
op_def.output.extend([top_name+':0'])
epsilon_arg = op_def.arg.add()
epsilon_arg.name = 'buffer_type'
epsilon_arg.i = buffer_type_map['IN_OUT_CHANNEL']
arg = op_def.arg.add()
arg.name = 'T'
arg.i = self.dt
def add_output_transform(self, name):
output_name = MACE_OUTPUT_NODE_NAME + ":0"
op_def = self.net_def.op.add()
op_def.name = output_name[:-2]
op_def.type = 'ImageToBuffer'
op_def.input.extend([name+':0'])
op_def.output.extend([output_name])
epsilon_arg = op_def.arg.add()
epsilon_arg.name = 'buffer_type'
epsilon_arg.i = buffer_type_map['IN_OUT_CHANNEL']
def add_tensor(self, name, value):
tensor = self.net_def.tensors.add()
tensor.name = name
shape = list(value.shape)
tensor.dims.extend(shape)
tensor.data_type = mace_pb2.DT_FLOAT
tensor.float_data.extend(value.flat)
def add_stride_pad_kernel_arg(self, param, op_def):
try:
if len(param.stride) > 1 or len(param.kernel_size) > 1 or len(param.pad) > 1:
raise Exception('Mace does not support multiple stride/kernel_size/pad')
stride = param.stride[0] if len(param.stride) else 1
pad = param.pad[0] if len(param.pad) else 0
kernel = param.kernel_size[0] if len(param.kernel_size) else 0
except TypeError:
stride = param.stride
pad = param.pad
kernel = param.kernel_size
strides_arg = op_def.arg.add()
strides_arg.name = 'strides'
if param.HasField("stride_h") or param.HasField("stride_w"):
strides_arg.ints.extend([param.stride_h, param.stride_w])
else:
strides_arg.ints.extend([stride, stride])
# Pad
padding_arg = op_def.arg.add()
padding_arg.name = 'padding_values'
if param.HasField("pad_h") or param.HasField("pad_w"):
padding_arg.ints.extend([param.pad_h, param.pad_w])
else:
padding_arg.ints.extend([pad, pad])
# kernel
if op_def.type == 'Pooling':
kernel_arg = op_def.arg.add()
kernel_arg.name = 'kernels'
if param.HasField("kernel_h") or param.HasField("kernel_w"):
kernel_arg.ints.extend([param.kernel_h, param.kernel_w])
else:
kernel_arg.ints.extend([kernel, kernel])
def convert_conv2d(self, op):
op_def = CommonConvert(op, 'Conv2D', self.dt)
param = op.layer.convolution_param
# Add filter
weight_tensor_name = op.name + '_weight:0'
weight_data = op.data[0].transpose((2, 3, 0, 1))
self.add_tensor(weight_tensor_name, weight_data)
if self.device == 'gpu':
buffer_type = "CONV2D_FILTER"
output_name = self.add_buffer_to_image(weight_tensor_name, buffer_type)
op_def.input.extend([output_name])
else:
op_def.input.extend([weight_tensor_name])
# Add Bias
if len(op.data) == 2:
bias_tensor_name = op.name + '_bias:0'
bias_data = op.data[1]
self.add_tensor(bias_tensor_name, bias_data)
if self.device == 'gpu':
output_name = self.add_buffer_to_image(bias_tensor_name, "ARGUMENT")
op_def.input.extend([output_name])
else:
op_def.input.extend([bias_tensor_name])
self.add_stride_pad_kernel_arg(param, op_def)
if len(param.dilation) > 0:
dilation_arg = op_def.arg.add()
dilation_arg.name = 'dilations'
if len(param.dilation) == 1:
dilation_arg.ints.extend([param.dilation[0], param.dilation[0]])
elif len(param.dilation) == 2:
dilation_arg.ints.extend([param.dilation[0], param.dilation[1]])
final_op = op
self.resolved_ops.add(op.name)
if len(self.ops_map[final_op.name].children) == 1 \
and self.ops_map[final_op.name].children[0].type in activation_name_map:
activation_op = self.ops_map[final_op.name].children[0]
op_def.type = "FusedConv2D"
fused_act_arg = op_def.arg.add()
fused_act_arg.name = 'activation'
fused_act_arg.s = activation_name_map[activation_op.type]
if activation_op.type == 'PReLU':
alpha_arg = op_def.arg.add()
alpha_arg.name = 'alpha'
alpha_arg.f = activation_op.data[0][0]
final_op = activation_op
self.resolved_ops.add(activation_op.name)
op_def.output.extend([final_op.name+':0'])
self.net_def.op.extend([op_def])
def convert_batchnorm(self, op):
if len(op.children) != 1 or op.children[0].type != 'Scale':
raise Exception('Now only support BatchNorm+Scale')
op_def = CommonConvert(op, 'FoldedBatchNorm', self.dt)
scale_op = op.children[0]
epsilon_value = op.layer.batch_norm_param.eps
if op.data[2][0] != 0:
mean_value = (1. / op.data[2][0]) * op.data[0]
var_value = (1. / op.data[2][0]) * op.data[1]
else:
raise RuntimeError('scalar is zero.')
gamma_value = scale_op.data[0]
beta_value = np.zeros_like(mean_value)
if len(scale_op.data) == 2:
beta_value = scale_op.data[1]
scale_value = (
(1.0 / np.vectorize(math.sqrt)(var_value + epsilon_value)) *
gamma_value)
offset_value = (-mean_value * scale_value) + beta_value
input_names = [op.name+'_scale:0', op.name+'_offset:0']
self.add_tensor(input_names[0], scale_value)
self.add_tensor(input_names[1], offset_value)
if self.device == 'gpu':
for name in input_names:
output_name = self.add_buffer_to_image(name, "ARGUMENT")
op_def.input.extend([output_name])
else:
op_def.input.extend([name for name in input_names])
self.resolved_ops.add(op.name)
self.resolved_ops.add(scale_op.name)
final_op = scale_op
if len(self.ops_map[final_op.name].children) == 1 \
and self.ops_map[final_op.name].children[0].type in activation_name_map:
activation_op = self.ops_map[final_op.name].children[0]
fused_act_arg = op_def.arg.add()
fused_act_arg.name = 'activation'
fused_act_arg.s = activation_name_map[activation_op.type]
if activation_op.type == 'PReLU':
alpha_arg = op_def.arg.add()
alpha_arg.name = 'alpha'
alpha_arg.f = activation_op.data[0][0]
final_op = activation_op
self.resolved_ops.add(activation_op.name)
op_def.output.extend([final_op.name + ':0'])
self.net_def.op.extend([op_def])
def convert_inner_product(self, op):
param = op.layer.inner_product_param
try:
if param.axis != 1 or param.transpose:
raise ValueError('Do not support non-default axis and transpose '
'case for innner product')
except AttributeError:
pass
op_def = CommonConvert(op, 'FC', self.dt)
weight_tensor_name = op.name + '_weight:0'
if op.data[0].ndim not in [2, 4]:
raise ValueError('Unexpected weigth ndim.')
if op.data[0].ndim == 4 and list(op.data[0].shape[:2] != [1, 1]):
raise ValueError('Do not support 4D weight with shape [1, 1, *, *]')
weight_data = op.data[0].reshape(-1, op.data[0].shape[-1])
self.add_tensor(weight_tensor_name, weight_data)
if self.device == 'gpu':
buffer_type = "WEIGHT_HEIGHT"
output_name = self.add_buffer_to_image(weight_tensor_name, buffer_type)
op_def.input.extend([output_name])
else:
op_def.input.extend([weight_tensor_name])
# Add Bias
if len(op.data) == 2:
bias_tensor_name = op.name + '_bias:0'
bias_data = op.data[1]
self.add_tensor(bias_tensor_name, bias_data)
if self.device == 'gpu':
output_name = self.add_buffer_to_image(bias_tensor_name, "ARGUMENT")
op_def.input.extend([output_name])
else:
op_def.input.extend([bias_tensor_name])
self.resolved_ops.add(op.name)
op_def.output.extend([op.name + ':0'])
self.net_def.op.extend([op_def])
def convert_pooling(self, op):
op_def = CommonConvert(op, 'Pooling', self.dt)
param = op.layer.pooling_param
self.add_stride_pad_kernel_arg(param, op_def)
if param.pool == caffe_pb2.PoolingParameter.MAX:
pooling_type = "MaxPool"
elif param.pool == caffe_pb2.PoolingParameter.AVE:
pooling_type = "AvgPool"
pooling_type_arg = op_def.arg.add()
pooling_type_arg.name = 'pooling_type'
pooling_type_arg.i = pooling_type_mode[pooling_type]
op_def.output.extend([op.name + ':0'])
self.net_def.op.extend([op_def])
self.resolved_ops.add(op.name)
def convert_activation(self, op):
op_def = CommonConvert(op, 'Activation', self.dt)
activation_arg = op_def.arg.add()
activation_arg.name = 'activation'
activation_arg.s = activation_name_map[op.type]
op_def.output.extend([op.name + ':0'])
self.net_def.op.extend([op_def])
self.resolved_ops.add(op.name)
def convert_prelu(self, op):
op_def = CommonConvert(op, 'Activation', self.dt)
activation_arg = op_def.arg.add()
activation_arg.name = 'activation'
activation_arg.s = activation_name_map[op.type]
max_limit_arg = op_def.arg.add()
max_limit_arg.name = 'alpha'
max_limit_arg.f = op.data[0][0]
op_def.output.extend([op.name + ':0'])
self.net_def.op.extend([op_def])
self.resolved_ops.add(op.name)
def convert_add(self, op):
op_def = CommonConvert(op, 'AddN', self.dt)
op_def.output.extend([op.name + ':0'])
self.net_def.op.extend([op_def])
self.resolved_ops.add(op.name)
def convert_concat(self, op):
op_def = CommonConvert(op, 'Concat', self.dt)
axis_arg = op_def.arg.add()
axis_arg.name = 'axis'
axis_arg.i = 3
try:
if op.layer.concat_param.HasFeild('axis'):
axis_arg.i = op.concat_param.axis
elif op.layer.concat_param.HasFeild('concat_dim'):
axis_arg.i = op.concat_param.concat_dim
except AttributeError:
pass
op_def.output.extend([op.name + ':0'])
self.net_def.op.extend([op_def])
self.resolved_ops.add(op.name)
def convert_eltwise(self, op):
op_def = CommonConvert(op, 'Eltwise', self.dt)
param = op.layer.eltwise_param
type_arg = op_def.arg.add()
type_arg.name = 'type'
type_arg.i = param.operation
if len(param.coeff) > 0:
coeff_arg = op_def.arg.add()
coeff_arg.name = 'coeff'
coeff_arg.ints.extend(list(param.coeff))
op_def.output.extend([op.name + ':0'])
self.net_def.op.extend([op_def])
self.resolved_ops.add(op.name)
def convert_normal_op(self, op):
op_def = CommonConvert(op, op.type, self.dt)
op_def.output.extend([op.name + ':0'])
self.net_def.op.extend([op_def])
self.resolved_ops.add(op.name)
def replace_in_out_name(self, input_name, output_name):
input_name = input_name + ":0"
output_name = output_name + ":0"
for op in self.net_def.op:
if len(op.input) > 0 and op.input[0] == input_name:
op.input[0] = MACE_INPUT_NODE_NAME + ":0"
if len(op.output) > 0 and op.output[0] == output_name:
op.output[0] = MACE_OUTPUT_NODE_NAME + ":0"
def convert(self, input_node, output_node):
if self.device == 'gpu':
self.add_input_transform(input_node)
assert self.ops[0].type == 'Input'
for op in self.ops:
if op.name in self.resolved_ops:
continue
if op.type == 'Input':
self.resolved_ops.add(op.name)
elif op.type == 'Convolution':
self.convert_conv2d(op)
elif op.type == 'BatchNorm':
self.convert_batchnorm(op)
elif op.type == 'InnerProduct':
self.convert_inner_product(op)
elif op.type == 'Pooling':
self.convert_pooling(op)
elif op.type == 'PReLU':
self.convert_prelu(op)
elif op.type in ['ReLU', 'Sigmoid', 'TanH']:
self.convert_activation(op)
elif op.type == 'Add':
self.convert_add(op)
elif op.type == 'Concat':
self.convert_concat(op)
elif op.type == 'Eltwise':
self.convert_eltwise(op)
elif op.type in ['Softmax']:
self.convert_normal_op(op)
else:
raise Exception('Unknown Op: %s, type: %s' % (op.name, op.type))
if self.device == 'gpu':
self.add_output_transform(output_node)
if self.device == 'cpu':
self.replace_in_out_name(input_node, output_node)
for op in self.ops:
if op.name not in self.resolved_ops:
print 'Unresolve Op: %s with type %s' % (op.name, op.type)
def convert_to_mace_pb(model_file, weight_file, input_node, output_node, data_type, device, winograd):
net_def = mace_pb2.NetDef()
dt = data_type_map[data_type]
caffe_net = caffe_pb2.NetParameter()
with open(model_file, "r") as f:
google.protobuf.text_format.Merge(str(f.read()), caffe_net)
weights = caffe_pb2.NetParameter()
with open(weight_file, "rb") as f:
weights.MergeFromString(f.read())
converter = CaffeConverter(caffe_net, weights, net_def, dt, device, winograd)
converter.convert(input_node, output_node)
print "PB Converted."
if device == 'gpu':
print "start optimize memory."
mem_optimizer = memory_optimizer.MemoryOptimizer(net_def)
mem_optimizer.optimize()
print "Memory optimization done."
return net_def
import argparse
import sys
import hashlib
import os.path
from lib.python.tools import source_converter_lib
# ./bazel-bin/mace/python/tools/tf_converter --model_file quantized_test.pb --output quantized_test_dsp.pb --runtime dsp --input_dim input_node,1,28,28,3
FLAGS = None
def md5(fname):
hash_md5 = hashlib.md5()
with open(fname, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
def main(unused_args):
if not os.path.isfile(FLAGS.model_file):
print("Input graph file '" + FLAGS.model_file + "' does not exist!")
return -1
mode_pb_checksum = md5(FLAGS.model_file)
if FLAGS.runtime == 'dsp':
from lib.python.tools import tf_dsp_converter_lib
output_graph_def = tf_dsp_converter_lib.convert_to_mace_pb(
FLAGS.model_file, FLAGS.input_node, FLAGS.output_node, FLAGS.dsp_mode)
else:
input_shape = []
if FLAGS.input_shape != "":
input_shape.extend([int(x) for x in FLAGS.input_shape.split(',')])
if FLAGS.platform == 'tensorflow':
from lib.python.tools import tf_converter_lib
output_graph_def = tf_converter_lib.convert_to_mace_pb(
FLAGS.model_file, FLAGS.input_node, input_shape, FLAGS.output_node,
FLAGS.data_type, FLAGS.runtime, FLAGS.winograd)
elif FLAGS.platform == 'caffe':
from lib.python.tools import caffe_converter_lib
output_graph_def = caffe_converter_lib.convert_to_mace_pb(
FLAGS.model_file, FLAGS.weight_file, FLAGS.input_node, FLAGS.output_node,
FLAGS.data_type, FLAGS.runtime, FLAGS.winograd)
if FLAGS.output_type == 'source':
source_converter_lib.convert_to_source(output_graph_def, mode_pb_checksum, FLAGS.template, FLAGS.obfuscate,
FLAGS.model_tag, FLAGS.output, FLAGS.runtime, FLAGS.embed_model_data)
else:
with open(FLAGS.output, "wb") as f:
f.write(output_graph_def.SerializeToString())
with open(FLAGS.output + '_txt', "wb") as f:
# output_graph_def.ClearField('tensors')
f.write(str(output_graph_def))
print("Model conversion is completed.")
def str2bool(v):
if v.lower() in ('yes', 'true', 't', 'y', '1'):
return True
elif v.lower() in ('no', 'false', 'f', 'n', '0'):
return False
else:
raise argparse.ArgumentTypeError('Boolean value expected.')
def parse_args():
"""Parses command line arguments."""
parser = argparse.ArgumentParser()
parser.register("type", "bool", lambda v: v.lower() == "true")
parser.add_argument(
"--model_file",
type=str,
default="",
help="TensorFlow \'GraphDef\' file to load, Caffe prototxt file to load.")
parser.add_argument(
"--weight_file",
type=str,
default="",
help="Caffe data file to load.")
parser.add_argument(
"--output",
type=str,
default="",
help="File to save the output graph to.")
parser.add_argument(
"--runtime",
type=str,
default="cpu",
help="Runtime: cpu/gpu/dsp")
parser.add_argument(
"--input_node",
type=str,
default="input_node",
help="e.g., input_node")
parser.add_argument(
"--output_node",
type=str,
default="softmax",
help="e.g., softmax")
parser.add_argument(
"--data_type",
type=str,
default='DT_FLOAT',
help="e.g., DT_HALF/DT_FLOAT")
parser.add_argument(
"--output_type",
type=str,
default="pb",
help="output type: source/pb")
parser.add_argument(
"--template",
type=str,
default="",
help="template path")
parser.add_argument(
"--obfuscate",
type=str2bool,
nargs='?',
const=False,
default=False,
help="obfuscate model names")
parser.add_argument(
"--model_tag",
type=str,
default="",
help="model tag for generated function and namespace")
parser.add_argument(
"--winograd",
type=str2bool,
nargs='?',
const=False,
default=False,
help="open winograd convolution or not")
parser.add_argument(
"--dsp_mode",
type=int,
default=0,
help="dsp run mode, defalut=0")
parser.add_argument(
"--input_shape",
type=str,
default="",
help="input shape.")
parser.add_argument(
"--platform",
type=str,
default="tensorflow",
help="tensorflow/caffe")
parser.add_argument(
"--embed_model_data",
type=str2bool,
default=True,
help="input shape.")
return parser.parse_known_args()
if __name__ == '__main__':
FLAGS, unparsed = parse_args()
main(unused_args=[sys.argv[0]] + unparsed)
import struct
import os import os
import uuid import uuid
import numpy as np import numpy as np
import hashlib import hashlib
from tensorflow import gfile
from lib.proto import mace_pb2 from lib.proto import mace_pb2
from jinja2 import Environment, FileSystemLoader from jinja2 import Environment, FileSystemLoader
...@@ -82,7 +80,6 @@ def rename_tensor(net_def): ...@@ -82,7 +80,6 @@ def rename_tensor(net_def):
class TensorInfo: class TensorInfo:
def __init__(self, id, t, runtime): def __init__(self, id, t, runtime):
self.id = id self.id = id
self.name = t.name
self.data_type = mace_pb2.DataType.Name(t.data_type) self.data_type = mace_pb2.DataType.Name(t.data_type)
if t.data_type == mace_pb2.DT_FLOAT: if t.data_type == mace_pb2.DT_FLOAT:
if runtime == 'gpu': if runtime == 'gpu':
...@@ -136,7 +133,7 @@ def convert_to_source(net_def, mode_pb_checksum, template, obfuscate, model_tag, ...@@ -136,7 +133,7 @@ def convert_to_source(net_def, mode_pb_checksum, template, obfuscate, model_tag,
) )
model_data.extend(tensor_info.data) model_data.extend(tensor_info.data)
offset += len(tensor_info.data) offset += len(tensor_info.data)
with gfile.GFile(output_dir + 'tensor' + str(counter) + '.cc', "wb") as f: with open(output_dir + 'tensor' + str(counter) + '.cc', "wb") as f:
f.write(source) f.write(source)
counter += 1 counter += 1
...@@ -148,7 +145,7 @@ def convert_to_source(net_def, mode_pb_checksum, template, obfuscate, model_tag, ...@@ -148,7 +145,7 @@ def convert_to_source(net_def, mode_pb_checksum, template, obfuscate, model_tag,
model_data_size = offset, model_data_size = offset,
model_data = model_data model_data = model_data
) )
with gfile.GFile(output_dir + 'tensor_data' + '.cc', "wb") as f: with open(output_dir + 'tensor_data' + '.cc', "wb") as f:
f.write(source) f.write(source)
if not embed_model_data: if not embed_model_data:
f = open(output_dir + model_tag + '.data', "wb") f = open(output_dir + model_tag + '.data', "wb")
...@@ -167,7 +164,7 @@ def convert_to_source(net_def, mode_pb_checksum, template, obfuscate, model_tag, ...@@ -167,7 +164,7 @@ def convert_to_source(net_def, mode_pb_checksum, template, obfuscate, model_tag,
mode = 2, mode = 2,
runtime = runtime, runtime = runtime,
) )
with gfile.GFile(output_dir + 'op' + str(counter) + '.cc', "wb") as f: with open(output_dir + 'op' + str(counter) + '.cc', "wb") as f:
f.write(source) f.write(source)
counter += 1 counter += 1
...@@ -181,5 +178,5 @@ def convert_to_source(net_def, mode_pb_checksum, template, obfuscate, model_tag, ...@@ -181,5 +178,5 @@ def convert_to_source(net_def, mode_pb_checksum, template, obfuscate, model_tag,
runtime = runtime, runtime = runtime,
model_pb_checksum = mode_pb_checksum model_pb_checksum = mode_pb_checksum
) )
with gfile.GFile(output, "wb") as f: with open(output, "wb") as f:
f.write(source) f.write(source)
...@@ -3,6 +3,7 @@ import tensorflow as tf ...@@ -3,6 +3,7 @@ import tensorflow as tf
import numpy as np import numpy as np
import math import math
import copy import copy
from tensorflow import gfile
from lib.python.tools import memory_optimizer from lib.python.tools import memory_optimizer
from tensorflow.core.framework import graph_pb2 from tensorflow.core.framework import graph_pb2
from tensorflow.core.framework import tensor_shape_pb2 from tensorflow.core.framework import tensor_shape_pb2
...@@ -958,10 +959,15 @@ def add_shape_info(input_graph_def, input_node, input_shape): ...@@ -958,10 +959,15 @@ def add_shape_info(input_graph_def, input_node, input_shape):
return inputs_replaced_graph return inputs_replaced_graph
def convert_to_mace_pb(input_graph_def, input_node, input_shape, output_node, data_type, device, winograd): def convert_to_mace_pb(model_file, input_node, input_shape, output_node, data_type, device, winograd):
net_def = mace_pb2.NetDef() net_def = mace_pb2.NetDef()
dt = data_type_map[data_type] dt = data_type_map[data_type]
input_graph_def = tf.GraphDef()
with gfile.Open(model_file, "rb") as f:
data = f.read()
input_graph_def.ParseFromString(data)
input_graph_def = add_shape_info(input_graph_def, input_node, input_shape) input_graph_def = add_shape_info(input_graph_def, input_node, input_shape)
with tf.Session() as session: with tf.Session() as session:
with session.graph.as_default() as graph: with session.graph.as_default() as graph:
...@@ -971,7 +977,7 @@ def convert_to_mace_pb(input_graph_def, input_node, input_shape, output_node, da ...@@ -971,7 +977,7 @@ def convert_to_mace_pb(input_graph_def, input_node, input_shape, output_node, da
converter.convert(input_node, output_node) converter.convert(input_node, output_node)
optimizer = Optimizer(net_def, device) optimizer = Optimizer(net_def, device)
net_def = optimizer.optimize() net_def = optimizer.optimize()
print "PB Converted." print "Model Converted."
if device == 'gpu': if device == 'gpu':
print "start optimize memory." print "start optimize memory."
mem_optimizer = memory_optimizer.MemoryOptimizer(net_def) mem_optimizer = memory_optimizer.MemoryOptimizer(net_def)
......
from lib.proto import mace_pb2 from lib.proto import mace_pb2
import tensorflow as tf import tensorflow as tf
from tensorflow import gfile
from operator import mul from operator import mul
from dsp_ops import DspOps from dsp_ops import DspOps
from lib.python.tools import graph_util from lib.python.tools import graph_util
...@@ -359,12 +360,17 @@ def fuse_quantize(net_def, input_node, output_node): ...@@ -359,12 +360,17 @@ def fuse_quantize(net_def, input_node, output_node):
new_net_def.op.extend(new_ops) new_net_def.op.extend(new_ops)
return new_net_def return new_net_def
def convert_to_mace_pb(input_graph_def, input_node, output_node, dsp_mode): def convert_to_mace_pb(model_file, input_node, output_node, dsp_mode):
""" """
nnlib does not have batch norm, so use tensorflow optimizer to fold nnlib does not have batch norm, so use tensorflow optimizer to fold
batch norm with convolution. The fold optimization reorders ops, so batch norm with convolution. The fold optimization reorders ops, so
we sort ops first by topology. we sort ops first by topology.
""" """
input_graph_def = tf.GraphDef()
with gfile.Open(model_file, "rb") as f:
data = f.read()
input_graph_def.ParseFromString(data)
input_graph_def = graph_util.sort_tf_graph(input_graph_def) input_graph_def = graph_util.sort_tf_graph(input_graph_def)
net_def = mace_pb2.NetDef() net_def = mace_pb2.NetDef()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册