提交 32fa5711 编写于 作者: W wuchenghui

fix graph converter

上级 78f9a398
...@@ -399,10 +399,10 @@ struct Conv2dFunctor<DeviceType::CPU, float> : Conv2dFunctorBase { ...@@ -399,10 +399,10 @@ struct Conv2dFunctor<DeviceType::CPU, float> : Conv2dFunctorBase {
transformed_output.Resize(transformed_output_shape); transformed_output.Resize(transformed_output_shape);
const float *transformed_filter_ptr; const float *transformed_filter_ptr;
if (transformed_filter_.dim_size() == 0) { if (transformed_filter_.dim_size() == 0) {
transformed_filter_.Resize(transformed_filter_shape);
if (is_filter_transformed_) { if (is_filter_transformed_) {
transformed_filter_ptr = filter_data; transformed_filter_ptr = filter_data;
} else { } else {
transformed_filter_.Resize(transformed_filter_shape);
switch (winograd_out_tile_size) { switch (winograd_out_tile_size) {
case 2: case 2:
TransformFilter4x4(filter_data, TransformFilter4x4(filter_data,
......
...@@ -303,7 +303,7 @@ class CaffeConverter(object): ...@@ -303,7 +303,7 @@ class CaffeConverter(object):
arg.i = self.dt arg.i = self.dt
return output_name return output_name
def add_input_transform(self, names): def add_gpu_input_transform(self, names):
for name in names: for name in names:
new_input_name = MACE_INPUT_NODE_NAME + '_' + name + ":0" new_input_name = MACE_INPUT_NODE_NAME + '_' + name + ":0"
op_def = self.net_def.op.add() op_def = self.net_def.op.add()
...@@ -327,7 +327,7 @@ class CaffeConverter(object): ...@@ -327,7 +327,7 @@ class CaffeConverter(object):
output_shape = input_op.output_shape_map[input_op.name] output_shape = input_op.output_shape_map[input_op.name]
self.add_output_shape(op_def, output_shape) self.add_output_shape(op_def, output_shape)
def add_output_transform(self, names): def add_gpu_output_transform(self, names):
for name in names: for name in names:
output_name = MACE_OUTPUT_NODE_NAME + '_' + name + ":0" output_name = MACE_OUTPUT_NODE_NAME + '_' + name + ":0"
op_def = self.net_def.op.add() op_def = self.net_def.op.add()
...@@ -420,18 +420,16 @@ class CaffeConverter(object): ...@@ -420,18 +420,16 @@ class CaffeConverter(object):
# OIHW -> HWOI # OIHW -> HWOI
weight_data = op.data[0].transpose((2, 3, 0, 1)) weight_data = op.data[0].transpose((2, 3, 0, 1))
if self.device == 'cpu' and use_winograd: if use_winograd:
self.convert_winograd_conv_filter_cpu(op, op_def) self.convert_winograd_conv_filter_cpu(op, op_def)
else: elif self.device == 'gpu':
self.add_tensor(weight_tensor_name, weight_data)
if self.device == 'gpu':
buffer_type = "DW_CONV2D_FILTER" \ buffer_type = "DW_CONV2D_FILTER" \
if is_depthwise else "CONV2D_FILTER" if is_depthwise else "CONV2D_FILTER"
output_name = self.add_buffer_to_image(weight_tensor_name, output_name = self.add_buffer_to_image(weight_tensor_name,
buffer_type) buffer_type)
op_def.input.extend([output_name]) op_def.input.extend([output_name])
else: else:
self.add_tensor(weight_tensor_name, weight_data)
op_def.input.extend([weight_tensor_name]) op_def.input.extend([weight_tensor_name])
# Add Bias # Add Bias
...@@ -1111,14 +1109,16 @@ class CaffeConverter(object): ...@@ -1111,14 +1109,16 @@ class CaffeConverter(object):
output_shape = input_op.output_shape_map[input_op.layer.top[0]] output_shape = input_op.output_shape_map[input_op.layer.top[0]]
else: else:
output_shape = input_op.output_shape_map[input_op.name] output_shape = input_op.output_shape_map[input_op.name]
self.add_output_shape(op_def, output_shape) self.add_output_shape(op_def,
[output_shape[0], output_shape[2],
output_shape[3], output_shape[1]])
def convert(self, input_nodes, input_shapes, output_nodes): def convert(self, input_nodes, input_shapes, output_nodes):
assert self.ops[0].type == 'Input' assert self.ops[0].type == 'Input'
self.add_input_op_shape(input_nodes, input_shapes) self.add_input_op_shape(input_nodes, input_shapes)
if self.device == 'gpu': if self.device == 'gpu':
self.add_input_transform(input_nodes) self.add_gpu_input_transform(input_nodes)
if self.device == 'cpu': if self.device == 'cpu':
self.add_cpu_input_transform(input_nodes) self.add_cpu_input_transform(input_nodes)
...@@ -1164,7 +1164,7 @@ class CaffeConverter(object): ...@@ -1164,7 +1164,7 @@ class CaffeConverter(object):
op.type)) op.type))
if self.device == 'gpu': if self.device == 'gpu':
self.add_output_transform(output_nodes) self.add_gpu_output_transform(output_nodes)
if self.device == 'cpu': if self.device == 'cpu':
self.add_cpu_output_transform(output_nodes) self.add_cpu_output_transform(output_nodes)
......
...@@ -202,20 +202,39 @@ class TFConverter(object): ...@@ -202,20 +202,39 @@ class TFConverter(object):
dims_arg.name = 'dims' dims_arg.name = 'dims'
dims_arg.ints.extend([0, 2, 3, 1]) dims_arg.ints.extend([0, 2, 3, 1])
self.add_output_shape(self.ops[name].outputs, op_def) output_shapes = []
for output in self.ops[name].outputs:
old_shape = output.shape.as_list()
# NCHW -> NHWC
if len(old_shape) == 2:
new_shape = [old_shape[0], 1, 1, old_shape[1]]
else:
new_shape = [old_shape[0], old_shape[2],
old_shape[3], old_shape[1]]
output_shape = mace_pb2.OutputShape()
output_shape.dims.extend(new_shape)
output_shapes.append(output_shape)
op_def.output_shape.extend(output_shapes)
@staticmethod def add_output_shape(self, outputs, op):
def add_output_shape(outputs, op):
output_shapes = [] output_shapes = []
for output in outputs: for output in outputs:
output_shape = mace_pb2.OutputShape() old_shape = []
if isinstance(output, list): if isinstance(output, list):
output_shape.dims.extend(output) old_shape = output
elif isinstance(output, tf.Tensor): elif isinstance(output, tf.Tensor):
if output.shape.num_elements() is not None: if output.shape.num_elements() is not None:
output_shape.dims.extend(output.shape.as_list()) old_shape = output.shape.as_list()
else: else:
raise ValueError('output type not supported: ', type(output)) raise ValueError('output type not supported: ', type(output))
if len(old_shape) == 2:
old_shape = [old_shape[0], old_shape[1], 1, 1]
if self.device == 'cpu': # NHWC -> NCHW
old_shape = [old_shape[0], old_shape[3],
old_shape[1], old_shape[2]]
output_shape = mace_pb2.OutputShape()
output_shape.dims.extend(old_shape)
output_shapes.append(output_shape) output_shapes.append(output_shape)
op.output_shape.extend(output_shapes) op.output_shape.extend(output_shapes)
...@@ -1089,15 +1108,6 @@ class TFConverter(object): ...@@ -1089,15 +1108,6 @@ class TFConverter(object):
self.add_output_shape(op.outputs, op_def) self.add_output_shape(op.outputs, op_def)
self.resolved_ops[op.name] = 1 self.resolved_ops[op.name] = 1
def replace_in_out_name(self, input_names, output_names):
in_names = set([input_name + ":0" for input_name in input_names])
out_names = set([output_name + ":0" for output_name in output_names])
for op in self.net_def.op:
if op.input[0] in in_names:
op.input[0] = MACE_INPUT_NODE_NAME + '_' + op.input[0]
if op.output[0] in out_names:
op.output[0] = MACE_OUTPUT_NODE_NAME + '_' + op.output[0]
def convert(self, input_nodes, output_nodes): def convert(self, input_nodes, output_nodes):
if self.device == 'gpu': if self.device == 'gpu':
self.add_gpu_input_transform(input_nodes) self.add_gpu_input_transform(input_nodes)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册