diff --git a/mace/kernels/conv_2d.h b/mace/kernels/conv_2d.h index acd05b016838f8df2bf28f2709b13fee47fa89f7..b52d8e6359ffffc629e7fba156d7920800c3dd36 100644 --- a/mace/kernels/conv_2d.h +++ b/mace/kernels/conv_2d.h @@ -399,10 +399,10 @@ struct Conv2dFunctor : Conv2dFunctorBase { transformed_output.Resize(transformed_output_shape); const float *transformed_filter_ptr; if (transformed_filter_.dim_size() == 0) { - transformed_filter_.Resize(transformed_filter_shape); if (is_filter_transformed_) { transformed_filter_ptr = filter_data; } else { + transformed_filter_.Resize(transformed_filter_shape); switch (winograd_out_tile_size) { case 2: TransformFilter4x4(filter_data, diff --git a/mace/python/tools/caffe_converter_lib.py b/mace/python/tools/caffe_converter_lib.py index 7bde783f440c02b062b65012b3ad36ca1ea67602..9513b1fe74b7e4bb4a4942db8efe39f7ac538390 100644 --- a/mace/python/tools/caffe_converter_lib.py +++ b/mace/python/tools/caffe_converter_lib.py @@ -303,7 +303,7 @@ class CaffeConverter(object): arg.i = self.dt return output_name - def add_input_transform(self, names): + def add_gpu_input_transform(self, names): for name in names: new_input_name = MACE_INPUT_NODE_NAME + '_' + name + ":0" op_def = self.net_def.op.add() @@ -327,7 +327,7 @@ class CaffeConverter(object): output_shape = input_op.output_shape_map[input_op.name] self.add_output_shape(op_def, output_shape) - def add_output_transform(self, names): + def add_gpu_output_transform(self, names): for name in names: output_name = MACE_OUTPUT_NODE_NAME + '_' + name + ":0" op_def = self.net_def.op.add() @@ -420,18 +420,16 @@ class CaffeConverter(object): # OIHW -> HWOI weight_data = op.data[0].transpose((2, 3, 0, 1)) - if self.device == 'cpu' and use_winograd: + if use_winograd: self.convert_winograd_conv_filter_cpu(op, op_def) - else: - self.add_tensor(weight_tensor_name, weight_data) - - if self.device == 'gpu': + elif self.device == 'gpu': buffer_type = "DW_CONV2D_FILTER" \ if is_depthwise else "CONV2D_FILTER" output_name = self.add_buffer_to_image(weight_tensor_name, buffer_type) op_def.input.extend([output_name]) else: + self.add_tensor(weight_tensor_name, weight_data) op_def.input.extend([weight_tensor_name]) # Add Bias @@ -1111,14 +1109,16 @@ class CaffeConverter(object): output_shape = input_op.output_shape_map[input_op.layer.top[0]] else: output_shape = input_op.output_shape_map[input_op.name] - self.add_output_shape(op_def, output_shape) + self.add_output_shape(op_def, + [output_shape[0], output_shape[2], + output_shape[3], output_shape[1]]) def convert(self, input_nodes, input_shapes, output_nodes): assert self.ops[0].type == 'Input' self.add_input_op_shape(input_nodes, input_shapes) if self.device == 'gpu': - self.add_input_transform(input_nodes) + self.add_gpu_input_transform(input_nodes) if self.device == 'cpu': self.add_cpu_input_transform(input_nodes) @@ -1164,7 +1164,7 @@ class CaffeConverter(object): op.type)) if self.device == 'gpu': - self.add_output_transform(output_nodes) + self.add_gpu_output_transform(output_nodes) if self.device == 'cpu': self.add_cpu_output_transform(output_nodes) diff --git a/mace/python/tools/tf_converter_lib.py b/mace/python/tools/tf_converter_lib.py index 354c07b3d370d9c5dac9b1e42e8bb5ef88144a95..4ee1ab280c84445a1d7df1c3e406b8cfde440889 100644 --- a/mace/python/tools/tf_converter_lib.py +++ b/mace/python/tools/tf_converter_lib.py @@ -202,20 +202,39 @@ class TFConverter(object): dims_arg.name = 'dims' dims_arg.ints.extend([0, 2, 3, 1]) - self.add_output_shape(self.ops[name].outputs, op_def) - - @staticmethod - def add_output_shape(outputs, op): + output_shapes = [] + for output in self.ops[name].outputs: + old_shape = output.shape.as_list() + # NCHW -> NHWC + if len(old_shape) == 2: + new_shape = [old_shape[0], 1, 1, old_shape[1]] + else: + new_shape = [old_shape[0], old_shape[2], + old_shape[3], old_shape[1]] + output_shape = mace_pb2.OutputShape() + output_shape.dims.extend(new_shape) + output_shapes.append(output_shape) + op_def.output_shape.extend(output_shapes) + + def add_output_shape(self, outputs, op): output_shapes = [] for output in outputs: - output_shape = mace_pb2.OutputShape() + old_shape = [] if isinstance(output, list): - output_shape.dims.extend(output) + old_shape = output elif isinstance(output, tf.Tensor): if output.shape.num_elements() is not None: - output_shape.dims.extend(output.shape.as_list()) + old_shape = output.shape.as_list() else: raise ValueError('output type not supported: ', type(output)) + if len(old_shape) == 2: + old_shape = [old_shape[0], old_shape[1], 1, 1] + + if self.device == 'cpu': # NHWC -> NCHW + old_shape = [old_shape[0], old_shape[3], + old_shape[1], old_shape[2]] + output_shape = mace_pb2.OutputShape() + output_shape.dims.extend(old_shape) output_shapes.append(output_shape) op.output_shape.extend(output_shapes) @@ -1089,15 +1108,6 @@ class TFConverter(object): self.add_output_shape(op.outputs, op_def) self.resolved_ops[op.name] = 1 - def replace_in_out_name(self, input_names, output_names): - in_names = set([input_name + ":0" for input_name in input_names]) - out_names = set([output_name + ":0" for output_name in output_names]) - for op in self.net_def.op: - if op.input[0] in in_names: - op.input[0] = MACE_INPUT_NODE_NAME + '_' + op.input[0] - if op.output[0] in out_names: - op.output[0] = MACE_OUTPUT_NODE_NAME + '_' + op.output[0] - def convert(self, input_nodes, output_nodes): if self.device == 'gpu': self.add_gpu_input_transform(input_nodes)