提交 bfbe2d6f 编写于 作者: L liuqi

Support multiple outputs' operation.

上级 eaae45c3
...@@ -85,8 +85,7 @@ message OperatorDef { ...@@ -85,8 +85,7 @@ message OperatorDef {
repeated OutputShape output_shape = 6; repeated OutputShape output_shape = 6;
repeated DataType output_type = 7; repeated DataType output_type = 7;
// Memory optimization: only support one single output op repeated int32 mem_id = 10;
optional int32 mem_id = 10 [default = -1];
// for hexagon mace-nnlib // for hexagon mace-nnlib
optional uint32 node_id = 100; optional uint32 node_id = 100;
......
...@@ -45,19 +45,11 @@ class Operator(object): ...@@ -45,19 +45,11 @@ class Operator(object):
self.parents = [] self.parents = []
self.children = [] self.children = []
self.data = [] self.data = []
self.output_shape = [] self.output_shape_map = {}
def add_parent(self, parent_op): def add_parent(self, parent_op):
assert parent_op not in self.parents
self.parents.append(parent_op) self.parents.append(parent_op)
if self not in parent_op.children: parent_op.children.append(self)
parent_op.children.append(self)
def add_child(self, child_op):
assert child_op not in self.children
self.children.append(child_op)
if self not in child_op.parents:
child_op.parents.append(self)
def get_single_parent(self): def get_single_parent(self):
if len(self.parents) != 1: if len(self.parents) != 1:
...@@ -72,18 +64,6 @@ def BlobToNPArray(blob): ...@@ -72,18 +64,6 @@ def BlobToNPArray(blob):
else: else:
return np.asarray(blob.data, dtype=np.float32).reshape(blob.shape.dim) return np.asarray(blob.data, dtype=np.float32).reshape(blob.shape.dim)
def CommonConvert(op, mace_type, dt):
op_def = mace_pb2.OperatorDef()
arg = op_def.arg.add()
arg.name = 'T'
arg.i = dt
data_format_arg = op_def.arg.add()
data_format_arg.name = 'data_format'
data_format_arg.s = 'NHWC'
op_def.name = op.name
op_def.type = mace_type
op_def.input.extend([parent.name+':0' for parent in op.parents])
return op_def
class Shapes(object): class Shapes(object):
@staticmethod @staticmethod
...@@ -111,6 +91,10 @@ class Shapes(object): ...@@ -111,6 +91,10 @@ class Shapes(object):
output_shape[axis] += input_shape[axis] output_shape[axis] += input_shape[axis]
return output_shape return output_shape
@staticmethod
def slice_shape(input_shape, num_output):
return [input_shape[0], input_shape[1], input_shape[2], input_shape[3]/num_output]
class CaffeConverter(object): class CaffeConverter(object):
def __init__(self, caffe_net, weights, net_def, dt, device, winograd): def __init__(self, caffe_net, weights, net_def, dt, device, winograd):
self.net_def = net_def self.net_def = net_def
...@@ -121,11 +105,14 @@ class CaffeConverter(object): ...@@ -121,11 +105,14 @@ class CaffeConverter(object):
self.winograd = winograd self.winograd = winograd
self.resolved_ops = set() self.resolved_ops = set()
self.ops = [] self.ops = []
self.inputs_map = {}
# Add Input operations # Add Input operations
top_name_map = {}
inputs = caffe_net.input inputs = caffe_net.input
for input in inputs: for input in inputs:
self.ops.extend([Operator(input, 'Input', None)]) self.ops.extend([Operator(input, 'Input', None)])
top_name_map[input] = input
layers = caffe_net.layer layers = caffe_net.layer
# remove train layers and dropout # remove train layers and dropout
...@@ -137,21 +124,28 @@ class CaffeConverter(object): ...@@ -137,21 +124,28 @@ class CaffeConverter(object):
self.ops.extend([Operator(layer.name, layer.type, layer) for layer in layers]) self.ops.extend([Operator(layer.name, layer.type, layer) for layer in layers])
self.ops_map = {op.name : op for op in self.ops} self.ops_map = {op.name : op for op in self.ops}
output_op = {} output_op_map = {}
for layer in layers: for layer in layers:
op = self.ops_map[layer.name] op = self.ops_map[layer.name]
for input_name in layer.bottom: for input_name in layer.bottom:
assert input_name != layer.name assert input_name != layer.name
parent_op = output_op.get(input_name) parent_op = output_op_map.get(input_name)
if parent_op is None: if parent_op is None:
parent_op = self.ops_map[input_name] parent_op = self.ops_map[input_name]
op.add_parent(parent_op) op.add_parent(parent_op)
if len(layer.top) > 1: if op.name not in self.inputs_map:
raise Exception('Only support single-output layers') self.inputs_map[op.name] = []
for output_name in layer.top: self.inputs_map[op.name].extend([top_name_map[input_name]])
for i in range(len(layer.top)):
output_name = layer.top[i]
if len(layer.top) == 1:
top_name_map[output_name] = op.name
else:
top_name_map[output_name] = op.name + '_' + str(i)
if output_name == layer.name: if output_name == layer.name:
continue continue
output_op[output_name] = op output_op_map[output_name] = op
# Load weights # Load weights
weights_layers = weights.layer weights_layers = weights.layer
...@@ -165,6 +159,19 @@ class CaffeConverter(object): ...@@ -165,6 +159,19 @@ class CaffeConverter(object):
# toposort ops # toposort ops
self.ops = self.toposort_ops() self.ops = self.toposort_ops()
def CommonConvert(self, op, mace_type):
op_def = mace_pb2.OperatorDef()
arg = op_def.arg.add()
arg.name = 'T'
arg.i = self.dt
data_format_arg = op_def.arg.add()
data_format_arg.name = 'data_format'
data_format_arg.s = 'NHWC'
op_def.name = op.name
op_def.type = mace_type
op_def.input.extend([name+':0' for name in self.inputs_map[op.name]])
return op_def
def remove_unused_layers(self, layers): def remove_unused_layers(self, layers):
phase_map = {0: 'train', 1: 'test'} phase_map = {0: 'train', 1: 'test'}
test_layers_names = set() test_layers_names = set()
...@@ -325,7 +332,7 @@ class CaffeConverter(object): ...@@ -325,7 +332,7 @@ class CaffeConverter(object):
return pad, stride, kernel return pad, stride, kernel
def convert_conv2d(self, op): def convert_conv2d(self, op):
op_def = CommonConvert(op, 'Conv2D', self.dt) op_def = self.CommonConvert(op, 'Conv2D')
param = op.layer.convolution_param param = op.layer.convolution_param
# Add filter # Add filter
...@@ -364,11 +371,11 @@ class CaffeConverter(object): ...@@ -364,11 +371,11 @@ class CaffeConverter(object):
final_op = op final_op = op
self.resolved_ops.add(op.name) self.resolved_ops.add(op.name)
output_shape = Shapes.conv_pool_shape(op.get_single_parent().output_shape, output_shape = Shapes.conv_pool_shape(op.get_single_parent().output_shape_map[op.layer.bottom[0]],
weight_data.shape, weight_data.shape,
paddings, strides, dilations, paddings, strides, dilations,
math.floor) math.floor)
op.output_shape = output_shape op.output_shape_map[op.layer.top[0]] = output_shape
if len(self.ops_map[final_op.name].children) == 1 \ if len(self.ops_map[final_op.name].children) == 1 \
and self.ops_map[final_op.name].children[0].type in activation_name_map: and self.ops_map[final_op.name].children[0].type in activation_name_map:
...@@ -378,7 +385,7 @@ class CaffeConverter(object): ...@@ -378,7 +385,7 @@ class CaffeConverter(object):
fused_act_arg.name = 'activation' fused_act_arg.name = 'activation'
fused_act_arg.s = activation_name_map[activation_op.type] fused_act_arg.s = activation_name_map[activation_op.type]
final_op = activation_op final_op = activation_op
final_op.output_shape = output_shape final_op.output_shape_map[final_op.layer.top[0]] = output_shape
self.resolved_ops.add(activation_op.name) self.resolved_ops.add(activation_op.name)
op_def.output.extend([final_op.name+':0']) op_def.output.extend([final_op.name+':0'])
...@@ -388,7 +395,7 @@ class CaffeConverter(object): ...@@ -388,7 +395,7 @@ class CaffeConverter(object):
def convert_batchnorm(self, op): def convert_batchnorm(self, op):
if len(op.children) != 1 or op.children[0].type != 'Scale': if len(op.children) != 1 or op.children[0].type != 'Scale':
raise Exception('Now only support BatchNorm+Scale') raise Exception('Now only support BatchNorm+Scale')
op_def = CommonConvert(op, 'FoldedBatchNorm', self.dt) op_def = self.CommonConvert(op, 'FoldedBatchNorm')
scale_op = op.children[0] scale_op = op.children[0]
epsilon_value = op.layer.batch_norm_param.eps epsilon_value = op.layer.batch_norm_param.eps
...@@ -422,7 +429,7 @@ class CaffeConverter(object): ...@@ -422,7 +429,7 @@ class CaffeConverter(object):
self.resolved_ops.add(scale_op.name) self.resolved_ops.add(scale_op.name)
final_op = scale_op final_op = scale_op
output_shape = op.get_single_parent().output_shape output_shape = op.get_single_parent().output_shape_map[op.layer.bottom[0]]
if len(self.ops_map[final_op.name].children) == 1 \ if len(self.ops_map[final_op.name].children) == 1 \
and self.ops_map[final_op.name].children[0].type in activation_name_map: and self.ops_map[final_op.name].children[0].type in activation_name_map:
...@@ -431,7 +438,7 @@ class CaffeConverter(object): ...@@ -431,7 +438,7 @@ class CaffeConverter(object):
fused_act_arg.name = 'activation' fused_act_arg.name = 'activation'
fused_act_arg.s = activation_name_map[activation_op.type] fused_act_arg.s = activation_name_map[activation_op.type]
final_op = activation_op final_op = activation_op
final_op.output_shape = output_shape final_op.output_shape_map[final_op.layer.top[0]] = output_shape
self.resolved_ops.add(activation_op.name) self.resolved_ops.add(activation_op.name)
op_def.output.extend([final_op.name + ':0']) op_def.output.extend([final_op.name + ':0'])
...@@ -447,13 +454,13 @@ class CaffeConverter(object): ...@@ -447,13 +454,13 @@ class CaffeConverter(object):
except AttributeError: except AttributeError:
pass pass
op_def = CommonConvert(op, 'FC', self.dt) op_def = self.CommonConvert(op, 'FC')
weight_tensor_name = op.name + '_weight:0' weight_tensor_name = op.name + '_weight:0'
if op.data[0].ndim not in [2, 4]: if op.data[0].ndim not in [2, 4]:
raise ValueError('Unexpected weigth ndim.') raise ValueError('Unexpected weigth ndim.')
if op.data[0].ndim == 4 and list(op.data[0].shape[:2] != [1, 1]): if op.data[0].ndim == 4 and list(op.data[0].shape[:2] != [1, 1]):
raise ValueError('Do not support 4D weight with shape [1, 1, *, *]') raise ValueError('Do not support 4D weight with shape [1, 1, *, *]')
input_shape = op.get_single_parent().output_shape input_shape = op.get_single_parent().output_shape_map[op.layer.bottom[0]]
weight_data = op.data[0].reshape(-1, op.data[0].shape[-1]) weight_data = op.data[0].reshape(-1, op.data[0].shape[-1])
assert weight_data.shape[1] == (input_shape[1] * input_shape[2] * input_shape[3]) assert weight_data.shape[1] == (input_shape[1] * input_shape[2] * input_shape[3])
weight_data = weight_data.reshape(-1, input_shape[3], input_shape[1], input_shape[2]) weight_data = weight_data.reshape(-1, input_shape[3], input_shape[1], input_shape[2])
...@@ -479,7 +486,7 @@ class CaffeConverter(object): ...@@ -479,7 +486,7 @@ class CaffeConverter(object):
self.resolved_ops.add(op.name) self.resolved_ops.add(op.name)
output_shape = Shapes.fully_connected_shape(input_shape, weight_data.shape) output_shape = Shapes.fully_connected_shape(input_shape, weight_data.shape)
op.output_shape = output_shape op.output_shape_map[op.layer.top[0]] = output_shape
final_op = op final_op = op
if len(self.ops_map[final_op.name].children) == 1 \ if len(self.ops_map[final_op.name].children) == 1 \
...@@ -489,7 +496,7 @@ class CaffeConverter(object): ...@@ -489,7 +496,7 @@ class CaffeConverter(object):
fused_act_arg.name = 'activation' fused_act_arg.name = 'activation'
fused_act_arg.s = activation_name_map[activation_op.type] fused_act_arg.s = activation_name_map[activation_op.type]
final_op = activation_op final_op = activation_op
final_op.output_shape = output_shape final_op.output_shape_map[final_op.layer.top[0]] = output_shape
self.resolved_ops.add(activation_op.name) self.resolved_ops.add(activation_op.name)
op_def.output.extend([final_op.name + ':0']) op_def.output.extend([final_op.name + ':0'])
...@@ -497,7 +504,7 @@ class CaffeConverter(object): ...@@ -497,7 +504,7 @@ class CaffeConverter(object):
self.net_def.op.extend([op_def]) self.net_def.op.extend([op_def])
def convert_pooling(self, op): def convert_pooling(self, op):
op_def = CommonConvert(op, 'Pooling', self.dt) op_def = self.CommonConvert(op, 'Pooling')
param = op.layer.pooling_param param = op.layer.pooling_param
paddings, strides, kernels = self.add_stride_pad_kernel_arg(param, op_def) paddings, strides, kernels = self.add_stride_pad_kernel_arg(param, op_def)
...@@ -509,11 +516,11 @@ class CaffeConverter(object): ...@@ -509,11 +516,11 @@ class CaffeConverter(object):
pooling_type_arg.name = 'pooling_type' pooling_type_arg.name = 'pooling_type'
pooling_type_arg.i = pooling_type_mode[pooling_type] pooling_type_arg.i = pooling_type_mode[pooling_type]
input_shape = op.get_single_parent().output_shape input_shape = op.get_single_parent().output_shape_map[op.layer.bottom[0]]
filter_shape = [kernels[0], kernels[1], input_shape[3], input_shape[3]] filter_shape = [kernels[0], kernels[1], input_shape[3], input_shape[3]]
output_shape = Shapes.conv_pool_shape(input_shape, filter_shape, output_shape = Shapes.conv_pool_shape(input_shape, filter_shape,
paddings, strides, [1, 1], math.ceil) paddings, strides, [1, 1], math.ceil)
op.output_shape = output_shape op.output_shape_map[op.layer.top[0]] = output_shape
op_def.output.extend([op.name + ':0']) op_def.output.extend([op.name + ':0'])
self.add_output_shape(op_def, output_shape) self.add_output_shape(op_def, output_shape)
...@@ -521,19 +528,19 @@ class CaffeConverter(object): ...@@ -521,19 +528,19 @@ class CaffeConverter(object):
self.resolved_ops.add(op.name) self.resolved_ops.add(op.name)
def convert_activation(self, op): def convert_activation(self, op):
op_def = CommonConvert(op, 'Activation', self.dt) op_def = self.CommonConvert(op, 'Activation')
activation_arg = op_def.arg.add() activation_arg = op_def.arg.add()
activation_arg.name = 'activation' activation_arg.name = 'activation'
activation_arg.s = activation_name_map[op.type] activation_arg.s = activation_name_map[op.type]
op_def.output.extend([op.name + ':0']) op_def.output.extend([op.name + ':0'])
output_shape = op.get_single_parent().output_shape output_shape = op.get_single_parent().output_shape_map[op.layer.bottom[0]]
op.output_shape = output_shape op.output_shape_map[op.layer.top[0]] = output_shape
self.add_output_shape(op_def, output_shape) self.add_output_shape(op_def, output_shape)
self.net_def.op.extend([op_def]) self.net_def.op.extend([op_def])
self.resolved_ops.add(op.name) self.resolved_ops.add(op.name)
def convert_prelu(self, op): def convert_prelu(self, op):
op_def = CommonConvert(op, 'Activation', self.dt) op_def = self.CommonConvert(op, 'Activation')
activation_arg = op_def.arg.add() activation_arg = op_def.arg.add()
activation_arg.name = 'activation' activation_arg.name = 'activation'
activation_arg.s = 'PRELU' activation_arg.s = 'PRELU'
...@@ -546,23 +553,23 @@ class CaffeConverter(object): ...@@ -546,23 +553,23 @@ class CaffeConverter(object):
else: else:
op_def.input.extend([alpha_tensor_name]) op_def.input.extend([alpha_tensor_name])
op_def.output.extend([op.name + ':0']) op_def.output.extend([op.name + ':0'])
output_shape = op.get_single_parent().output_shape output_shape = op.get_single_parent().output_shape_map[op.layer.bottom[0]]
op.output_shape = output_shape op.output_shape_map[op.layer.top[0]] = output_shape
self.add_output_shape(op_def, output_shape) self.add_output_shape(op_def, output_shape)
self.net_def.op.extend([op_def]) self.net_def.op.extend([op_def])
self.resolved_ops.add(op.name) self.resolved_ops.add(op.name)
def convert_add(self, op): def convert_add(self, op):
op_def = CommonConvert(op, 'AddN', self.dt) op_def = self.CommonConvert(op, 'AddN')
op_def.output.extend([op.name + ':0']) op_def.output.extend([op.name + ':0'])
output_shape = op.parents[0].output_shape output_shape = op.parents[0].output_shape_map[op.layer.bottom[0]]
op.output_shape = output_shape op.output_shape_map[op.layer.top[0]] = output_shape
self.add_output_shape(op_def, output_shape) self.add_output_shape(op_def, output_shape)
self.net_def.op.extend([op_def]) self.net_def.op.extend([op_def])
self.resolved_ops.add(op.name) self.resolved_ops.add(op.name)
def convert_concat(self, op): def convert_concat(self, op):
op_def = CommonConvert(op, 'Concat', self.dt) op_def = self.CommonConvert(op, 'Concat')
axis_arg = op_def.arg.add() axis_arg = op_def.arg.add()
axis_arg.name = 'axis' axis_arg.name = 'axis'
axis_arg.i = 3 axis_arg.i = 3
...@@ -575,17 +582,17 @@ class CaffeConverter(object): ...@@ -575,17 +582,17 @@ class CaffeConverter(object):
pass pass
input_shapes = [] input_shapes = []
for parent in op.parents: for i in range(len(op.parents)):
input_shapes.append(parent.output_shape) input_shapes.append(op.parents[i].output_shape_map[op.layer.bottom[i]])
output_shape = Shapes.concat_shape(input_shapes, axis_arg.i) output_shape = Shapes.concat_shape(input_shapes, axis_arg.i)
op.output_shape = output_shape op.output_shape_map[op.layer.top[0]] = output_shape
self.add_output_shape(op_def, output_shape) self.add_output_shape(op_def, output_shape)
op_def.output.extend([op.name + ':0']) op_def.output.extend([op.name + ':0'])
self.net_def.op.extend([op_def]) self.net_def.op.extend([op_def])
self.resolved_ops.add(op.name) self.resolved_ops.add(op.name)
def convert_eltwise(self, op): def convert_eltwise(self, op):
op_def = CommonConvert(op, 'Eltwise', self.dt) op_def = self.CommonConvert(op, 'Eltwise')
param = op.layer.eltwise_param param = op.layer.eltwise_param
type_arg = op_def.arg.add() type_arg = op_def.arg.add()
type_arg.name = 'type' type_arg.name = 'type'
...@@ -595,17 +602,40 @@ class CaffeConverter(object): ...@@ -595,17 +602,40 @@ class CaffeConverter(object):
coeff_arg.name = 'coeff' coeff_arg.name = 'coeff'
coeff_arg.ints.extend(list(param.coeff)) coeff_arg.ints.extend(list(param.coeff))
output_shape = op.parents[0].output_shape output_shape = op.parents[0].output_shape_map[op.layer.bottom[0]]
op.output_shape = output_shape op.output_shape_map[op.layer.top[0]] = output_shape
self.add_output_shape(op_def, output_shape) self.add_output_shape(op_def, output_shape)
op_def.output.extend([op.name + ':0']) op_def.output.extend([op.name + ':0'])
self.net_def.op.extend([op_def]) self.net_def.op.extend([op_def])
self.resolved_ops.add(op.name) self.resolved_ops.add(op.name)
def convert_slice(self, op):
op_def = self.CommonConvert(op, 'Slice')
if op.layer.HasField('slice_param'):
param = op.layer.slice_param
if param.HasField('axis') and param.axis != 1:
raise Exception('Mace do not support slice with axis ' + str(param.axis))
if len(param.slice_point) > 0:
raise Exception('Mace do not support slice with slice_point')
input_shape = op.parents[0].output_shape_map[op.layer.bottom[0]]
num_outputs = len(op.layer.top)
if (input_shape[3] % num_outputs) != 0 or \
(self.device == 'gpu' and ((input_shape[3] / num_outputs) % 4 != 0)) :
raise Exception('Mace do not support slice with input shape '
+ str(input_shape) + ' and number of output ' + str(num_outputs))
output_shape = Shapes.slice_shape(input_shape, num_outputs)
for i in range(len(op.layer.top)):
op.output_shape_map[op.layer.top[i]] = output_shape
self.add_output_shape(op_def, output_shape)
op_def.output.extend([op.name + '_' + str(i) + ':0'])
self.net_def.op.extend([op_def])
self.resolved_ops.add(op.name)
def convert_normal_op(self, op): def convert_normal_op(self, op):
op_def = CommonConvert(op, op.type, self.dt) op_def = self.CommonConvert(op, op.type)
output_shape = op.parents[0].output_shape output_shape = op.parents[0].output_shape_map[op.layer.bottom[0]]
op.output_shape = output_shape op.output_shape_map[op.layer.top[0]] = output_shape
self.add_output_shape(op_def, output_shape) self.add_output_shape(op_def, output_shape)
op_def.output.extend([op.name + ':0']) op_def.output.extend([op.name + ':0'])
self.net_def.op.extend([op_def]) self.net_def.op.extend([op_def])
...@@ -631,7 +661,7 @@ class CaffeConverter(object): ...@@ -631,7 +661,7 @@ class CaffeConverter(object):
assert len(input_nodes) == len(input_shapes) assert len(input_nodes) == len(input_shapes)
for i in range(len(input_nodes)): for i in range(len(input_nodes)):
input_op = self.ops_map[input_nodes[i]] input_op = self.ops_map[input_nodes[i]]
input_op.output_shape = input_shapes[i] input_op.output_shape_map[input_op.name] = input_shapes[i]
def convert(self, input_nodes, input_shapes, output_nodes): def convert(self, input_nodes, input_shapes, output_nodes):
is_single = len(input_nodes) == 1 and len(output_nodes) == 1 is_single = len(input_nodes) == 1 and len(output_nodes) == 1
...@@ -666,6 +696,8 @@ class CaffeConverter(object): ...@@ -666,6 +696,8 @@ class CaffeConverter(object):
self.convert_eltwise(op) self.convert_eltwise(op)
elif op.type in ['Softmax']: elif op.type in ['Softmax']:
self.convert_normal_op(op) self.convert_normal_op(op)
elif op.type == 'Slice':
self.convert_slice(op)
else: else:
raise Exception('Unknown Op: %s, type: %s' % (op.name, op.type)) raise Exception('Unknown Op: %s, type: %s' % (op.name, op.type))
......
...@@ -18,26 +18,26 @@ def file_checksum(fname): ...@@ -18,26 +18,26 @@ def file_checksum(fname):
def main(unused_args): def main(unused_args):
if not os.path.isfile(FLAGS.model_file): if not os.path.isfile(FLAGS.model_file):
print("Input graph file '" + FLAGS.model_file + "' does not exist!") print("Input graph file '" + FLAGS.model_file + "' does not exist!")
return -1 sys.exit(-1)
model_checksum = file_checksum(FLAGS.model_file) model_checksum = file_checksum(FLAGS.model_file)
if FLAGS.model_checksum != "" and FLAGS.model_checksum != model_checksum: if FLAGS.model_checksum != "" and FLAGS.model_checksum != model_checksum:
print("Model checksum mismatch: %s != %s" % (model_checksum, FLAGS.model_checksum)) print("Model checksum mismatch: %s != %s" % (model_checksum, FLAGS.model_checksum))
return -1 sys.exit(-1)
if FLAGS.platform == 'caffe': if FLAGS.platform == 'caffe':
if not os.path.isfile(FLAGS.weight_file): if not os.path.isfile(FLAGS.weight_file):
print("Input weight file '" + FLAGS.weight_file + "' does not exist!") print("Input weight file '" + FLAGS.weight_file + "' does not exist!")
return -1 sys.exit(-1)
weight_checksum = file_checksum(FLAGS.weight_file) weight_checksum = file_checksum(FLAGS.weight_file)
if FLAGS.weight_checksum != "" and FLAGS.weight_checksum != weight_checksum: if FLAGS.weight_checksum != "" and FLAGS.weight_checksum != weight_checksum:
print("Weight checksum mismatch: %s != %s" % (weight_checksum, FLAGS.weight_checksum)) print("Weight checksum mismatch: %s != %s" % (weight_checksum, FLAGS.weight_checksum))
return -1 sys.exit(-1)
if FLAGS.runtime == 'dsp': if FLAGS.runtime == 'dsp':
print("DSP not support caffe model yet.") print("DSP not support caffe model yet.")
return -1 sys.exit(-1)
from lib.python.tools import caffe_converter_lib from lib.python.tools import caffe_converter_lib
output_graph_def = caffe_converter_lib.convert_to_mace_pb( output_graph_def = caffe_converter_lib.convert_to_mace_pb(
......
...@@ -23,11 +23,12 @@ class MemoryOptimizer(object): ...@@ -23,11 +23,12 @@ class MemoryOptimizer(object):
for op in net_def.op: for op in net_def.op:
if self.is_buffer_image_op(op): if self.is_buffer_image_op(op):
continue continue
tensor_name = op.output[0] for output in op.output:
if tensor_name in consumers: tensor_name = output
self.ref_counter[tensor_name] = len(consumers[tensor_name]) if tensor_name in consumers:
else: self.ref_counter[tensor_name] = len(consumers[tensor_name])
self.ref_counter[tensor_name] = 0 else:
self.ref_counter[tensor_name] = 0
def is_buffer_image_op(self, op): def is_buffer_image_op(self, op):
return op.type == 'BufferToImage' or op.type == 'ImageToBuffer' return op.type == 'BufferToImage' or op.type == 'ImageToBuffer'
...@@ -46,25 +47,29 @@ class MemoryOptimizer(object): ...@@ -46,25 +47,29 @@ class MemoryOptimizer(object):
for op in self.net_def.op: for op in self.net_def.op:
if self.is_buffer_image_op(op): if self.is_buffer_image_op(op):
continue continue
if len(self.idle_mem) == 0:
# allocate new mem
mem_id = self.total_mem_count
self.total_mem_count += 1
else:
# reuse mem
mem_id = self.idle_mem.pop()
if not op.output_shape: if not op.output_shape:
print('WARNING: There is no output shape information to do memory optimization.') print('WARNING: There is no output shape information to do memory optimization.')
return return
op.mem_id = mem_id if len(op.output_shape) != len(op.output):
self.op_mem[op.output[0]] = mem_id print('WARNING: the number of output shape is not equal to the number of output.')
if mem_id not in self.mem_block: return
self.mem_block[mem_id] = [0, 0] for i in range(len(op.output)):
mem_size = self.mem_block[mem_id] if len(self.idle_mem) == 0:
op_mem_size = self.get_mem_size(op.type, op.output_shape[0].dims) # allocate new mem
mem_size[0] = max(mem_size[0], op_mem_size[0]) mem_id = self.total_mem_count
mem_size[1] = max(mem_size[1], op_mem_size[1]) self.total_mem_count += 1
else:
# reuse mem
mem_id = self.idle_mem.pop()
op.mem_id.extend([mem_id])
self.op_mem[op.output[i]] = mem_id
if mem_id not in self.mem_block:
self.mem_block[mem_id] = [0, 0]
mem_size = self.mem_block[mem_id]
op_mem_size = self.get_mem_size(op.type, op.output_shape[i].dims)
mem_size[0] = max(mem_size[0], op_mem_size[0])
mem_size[1] = max(mem_size[1], op_mem_size[1])
# de-ref input tensor mem # de-ref input tensor mem
for ipt in op.input: for ipt in op.input:
......
...@@ -19,13 +19,15 @@ void UpdateOp(mace::OperatorDef &op, ...@@ -19,13 +19,15 @@ void UpdateOp(mace::OperatorDef &op,
const std::vector<std::string> &inputs, const std::vector<std::string> &inputs,
const std::vector<std::string> &outputs, const std::vector<std::string> &outputs,
const std::vector<mace::DataType> &output_types, const std::vector<mace::DataType> &output_types,
uint32_t node_id) { uint32_t node_id,
const std::vector<int> &mem_ids) {
op.set_name(name); op.set_name(name);
op.set_type(type); op.set_type(type);
op.set_input(inputs); op.set_input(inputs);
op.set_output(outputs); op.set_output(outputs);
op.set_output_type(output_types); op.set_output_type(output_types);
op.set_node_id(node_id); op.set_node_id(node_id);
op.set_mem_id(mem_ids);
} }
} // namespace } // namespace
...@@ -66,10 +68,6 @@ void CreateOperator{{i}}(mace::OperatorDef &op) { ...@@ -66,10 +68,6 @@ void CreateOperator{{i}}(mace::OperatorDef &op) {
{% endif %} {% endif %}
{% endfor %} {% endfor %}
{% if net.op[i].HasField('mem_id') %}
op.set_mem_id({{net.op[i].mem_id}});
{% endif %}
{% for shape in net.op[i].output_shape %} {% for shape in net.op[i].output_shape %}
{% if shape.dims | length > 0 %} {% if shape.dims | length > 0 %}
op.add_output_shape(mace::OutputShape({ {{ shape.dims|join(', ') }} })); op.add_output_shape(mace::OutputShape({ {{ shape.dims|join(', ') }} }));
...@@ -85,7 +83,8 @@ void CreateOperator{{i}}(mace::OperatorDef &op) { ...@@ -85,7 +83,8 @@ void CreateOperator{{i}}(mace::OperatorDef &op) {
{ {{ net.op[i].input|stringfy }} }, { {{ net.op[i].input|stringfy }} },
{ {{ net.op[i].output|stringfy }} }, { {{ net.op[i].output|stringfy }} },
output_types, output_types,
{{ net.op[i].node_id }}); {{ net.op[i].node_id }},
{ {{ net.op[i].mem_id | join(', ') }} });
{% if runtime == 'dsp' %} {% if runtime == 'dsp' %}
op.set_padding({{ net.op[i].padding }}); op.set_padding({{ net.op[i].padding }});
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册