提交 32fa5711 编写于 作者: W wuchenghui

fix graph converter

上级 78f9a398
......@@ -399,10 +399,10 @@ struct Conv2dFunctor<DeviceType::CPU, float> : Conv2dFunctorBase {
transformed_output.Resize(transformed_output_shape);
const float *transformed_filter_ptr;
if (transformed_filter_.dim_size() == 0) {
transformed_filter_.Resize(transformed_filter_shape);
if (is_filter_transformed_) {
transformed_filter_ptr = filter_data;
} else {
transformed_filter_.Resize(transformed_filter_shape);
switch (winograd_out_tile_size) {
case 2:
TransformFilter4x4(filter_data,
......
......@@ -303,7 +303,7 @@ class CaffeConverter(object):
arg.i = self.dt
return output_name
def add_input_transform(self, names):
def add_gpu_input_transform(self, names):
for name in names:
new_input_name = MACE_INPUT_NODE_NAME + '_' + name + ":0"
op_def = self.net_def.op.add()
......@@ -327,7 +327,7 @@ class CaffeConverter(object):
output_shape = input_op.output_shape_map[input_op.name]
self.add_output_shape(op_def, output_shape)
def add_output_transform(self, names):
def add_gpu_output_transform(self, names):
for name in names:
output_name = MACE_OUTPUT_NODE_NAME + '_' + name + ":0"
op_def = self.net_def.op.add()
......@@ -420,18 +420,16 @@ class CaffeConverter(object):
# OIHW -> HWOI
weight_data = op.data[0].transpose((2, 3, 0, 1))
if self.device == 'cpu' and use_winograd:
if use_winograd:
self.convert_winograd_conv_filter_cpu(op, op_def)
else:
self.add_tensor(weight_tensor_name, weight_data)
if self.device == 'gpu':
elif self.device == 'gpu':
buffer_type = "DW_CONV2D_FILTER" \
if is_depthwise else "CONV2D_FILTER"
output_name = self.add_buffer_to_image(weight_tensor_name,
buffer_type)
op_def.input.extend([output_name])
else:
self.add_tensor(weight_tensor_name, weight_data)
op_def.input.extend([weight_tensor_name])
# Add Bias
......@@ -1111,14 +1109,16 @@ class CaffeConverter(object):
output_shape = input_op.output_shape_map[input_op.layer.top[0]]
else:
output_shape = input_op.output_shape_map[input_op.name]
self.add_output_shape(op_def, output_shape)
self.add_output_shape(op_def,
[output_shape[0], output_shape[2],
output_shape[3], output_shape[1]])
def convert(self, input_nodes, input_shapes, output_nodes):
assert self.ops[0].type == 'Input'
self.add_input_op_shape(input_nodes, input_shapes)
if self.device == 'gpu':
self.add_input_transform(input_nodes)
self.add_gpu_input_transform(input_nodes)
if self.device == 'cpu':
self.add_cpu_input_transform(input_nodes)
......@@ -1164,7 +1164,7 @@ class CaffeConverter(object):
op.type))
if self.device == 'gpu':
self.add_output_transform(output_nodes)
self.add_gpu_output_transform(output_nodes)
if self.device == 'cpu':
self.add_cpu_output_transform(output_nodes)
......
......@@ -202,20 +202,39 @@ class TFConverter(object):
dims_arg.name = 'dims'
dims_arg.ints.extend([0, 2, 3, 1])
self.add_output_shape(self.ops[name].outputs, op_def)
output_shapes = []
for output in self.ops[name].outputs:
old_shape = output.shape.as_list()
# NCHW -> NHWC
if len(old_shape) == 2:
new_shape = [old_shape[0], 1, 1, old_shape[1]]
else:
new_shape = [old_shape[0], old_shape[2],
old_shape[3], old_shape[1]]
output_shape = mace_pb2.OutputShape()
output_shape.dims.extend(new_shape)
output_shapes.append(output_shape)
op_def.output_shape.extend(output_shapes)
@staticmethod
def add_output_shape(outputs, op):
def add_output_shape(self, outputs, op):
output_shapes = []
for output in outputs:
output_shape = mace_pb2.OutputShape()
old_shape = []
if isinstance(output, list):
output_shape.dims.extend(output)
old_shape = output
elif isinstance(output, tf.Tensor):
if output.shape.num_elements() is not None:
output_shape.dims.extend(output.shape.as_list())
old_shape = output.shape.as_list()
else:
raise ValueError('output type not supported: ', type(output))
if len(old_shape) == 2:
old_shape = [old_shape[0], old_shape[1], 1, 1]
if self.device == 'cpu': # NHWC -> NCHW
old_shape = [old_shape[0], old_shape[3],
old_shape[1], old_shape[2]]
output_shape = mace_pb2.OutputShape()
output_shape.dims.extend(old_shape)
output_shapes.append(output_shape)
op.output_shape.extend(output_shapes)
......@@ -1089,15 +1108,6 @@ class TFConverter(object):
self.add_output_shape(op.outputs, op_def)
self.resolved_ops[op.name] = 1
def replace_in_out_name(self, input_names, output_names):
in_names = set([input_name + ":0" for input_name in input_names])
out_names = set([output_name + ":0" for output_name in output_names])
for op in self.net_def.op:
if op.input[0] in in_names:
op.input[0] = MACE_INPUT_NODE_NAME + '_' + op.input[0]
if op.output[0] in out_names:
op.output[0] = MACE_OUTPUT_NODE_NAME + '_' + op.output[0]
def convert(self, input_nodes, output_nodes):
if self.device == 'gpu':
self.add_gpu_input_transform(input_nodes)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册