Caffe converter support depthwise convolution.

875ed7a5 · liuqi · a1c6ba92 · 875ed7a5 · 875ed7a5
隐藏空白更改
内联并排

Showing with 25 addition and 12 deletion

mace/python/tools/caffe_converter_lib.py mace/python/tools/caffe_converter_lib.py +23 -11

mace/python/tools/tf_converter_lib.py mace/python/tools/tf_converter_lib.py +2 -1

未找到文件。
--- a/mace/python/tools/caffe_converter_lib.py
+++ b/mace/python/tools/caffe_converter_lib.py
@@ -72,9 +72,9 @@ class Shapes(object):
    output_shape = np.zeros_like(input_shape)
    output_shape[0] = input_shape[0]
    output_shape[1] = int(round_func((input_shape[1] + paddings[0] - filter_shape[0]
-                       - (filter_shape[0] - 1) * (dilations[0] - 1)) / float(strides[0]))) + 1
+                                      - (filter_shape[0] - 1) * (dilations[0] - 1)) / float(strides[0]))) + 1
    output_shape[2] = int(round_func((input_shape[2] + paddings[1] - filter_shape[1]
-                       - (filter_shape[1] - 1) * (dilations[1] - 1)) / float(strides[1]))) + 1
+                                      - (filter_shape[1] - 1) * (dilations[1] - 1)) / float(strides[1]))) + 1
    output_shape[3] = filter_shape[2]
    return output_shape
@@ -333,8 +333,18 @@ class CaffeConverter(object):
    return pad, stride, kernel
  def convert_conv2d(self, op):
-    op_def = self.CommonConvert(op, 'Conv2D')
    param = op.layer.convolution_param
+    is_depthwise = False
+    if param.HasField('group'):
+      if param.group == op.data[0].shape[0] and op.data[0].shape[1] == 1:
+        is_depthwise = True
+      else:
+        raise Exception("Mace do not support group convolution yet")
+    if is_depthwise:
+      op_def = self.CommonConvert(op, 'DepthwiseConv2d')
+    else:
+      op_def = self.CommonConvert(op, 'Conv2D')
    # Add filter
    weight_tensor_name = op.name + '_weight:0'
@@ -342,7 +352,7 @@ class CaffeConverter(object):
    self.add_tensor(weight_tensor_name, weight_data)
    if self.device == 'gpu':
-      buffer_type = "CONV2D_FILTER"
+      buffer_type = "DW_CONV2D_FILTER" if is_depthwise else "CONV2D_FILTER"
      output_name = self.add_buffer_to_image(weight_tensor_name, buffer_type)
      op_def.input.extend([output_name])
    else:
@@ -373,15 +383,16 @@ class CaffeConverter(object):
    self.resolved_ops.add(op.name)
    output_shape = Shapes.conv_pool_shape(op.get_single_parent().output_shape_map[op.layer.bottom[0]],
-                                          weight_data.shape,
+      weight_data.shape,
-                                          paddings, strides, dilations,
+      paddings, strides, dilations,
-                                          math.floor)
+      math.floor)
    op.output_shape_map[op.layer.top[0]] = output_shape
    if len(self.ops_map[final_op.name].children) == 1 \
        and self.ops_map[final_op.name].children[0].type in activation_name_map:
      activation_op = self.ops_map[final_op.name].children[0]
-      op_def.type = "FusedConv2D"
+      if not is_depthwise:
+        op_def.type = "FusedConv2D"
      fused_act_arg = op_def.arg.add()
      fused_act_arg.name = 'activation'
      fused_act_arg.s = activation_name_map[activation_op.type]
@@ -412,7 +423,7 @@ class CaffeConverter(object):
    width = output_shape[0] * ((output_shape[1] + 1)/2) * ((output_shape[2]+1)/2)
    return self.winograd and self.device == 'gpu' and \
           filter_shape[0] == 3 and (filter_shape[0] == filter_shape[1]) and \
-           dilations[0] == 1 and (dilations[0] == dilations[1]) and\
+           dilations[0] == 1 and (dilations[0] == dilations[1]) and \
           (strides[0] == 1) and (strides[0] == strides[1]) and \
           (16 * filter_shape[2] < OPENCL_IMAGE_MAX_SIZE) and \
           (16 * filter_shape[3] < OPENCL_IMAGE_MAX_SIZE) and \
@@ -662,7 +673,7 @@ class CaffeConverter(object):
    filter_shape = [kernels[0], kernels[1], input_shape[3], input_shape[3]]
    output_shape = Shapes.conv_pool_shape(input_shape, filter_shape,
-                                          paddings, strides, [1, 1], math.ceil)
+      paddings, strides, [1, 1], math.ceil)
    op.output_shape_map[op.layer.top[0]] = output_shape
    op_def.output.extend([op.name + ':0'])
@@ -764,7 +775,7 @@ class CaffeConverter(object):
    input_shape = op.parents[0].output_shape_map[op.layer.bottom[0]]
    num_outputs = len(op.layer.top)
    if (input_shape[3] % num_outputs) != 0 or \
-      (self.device == 'gpu' and ((input_shape[3] / num_outputs) % 4 != 0)) :
+        (self.device == 'gpu' and ((input_shape[3] / num_outputs) % 4 != 0)) :
      raise Exception('Mace do not support slice with input shape '
                      + str(input_shape) + ' and number of output ' + str(num_outputs))
    output_shape = Shapes.slice_shape(input_shape, num_outputs)
@@ -966,3 +977,4 @@ def convert_to_mace_pb(model_file, weight_file, input_node_str, input_shape_str,
    print "Memory optimization done."
  return net_def
--- a/mace/python/tools/tf_converter_lib.py
+++ b/mace/python/tools/tf_converter_lib.py
@@ -362,7 +362,8 @@ class TFConverter(object):
    if len(self.tf_graph.get(final_op.name, [])) == 1 \
        and self.tf_graph[final_op.name][0].type in activation_name_map:
      activation_op = self.tf_graph[final_op.name][0]
-      op_def.type = "FusedConv2D"
+      if op_def.type == "Conv2D":
+        op_def.type = "FusedConv2D"
      fused_act_arg = op_def.arg.add()
      fused_act_arg.name = 'activation'
      fused_act_arg.s = activation_name_map[activation_op.type]