Merge branch 'support_caffe_scale_and_fix_fold_bn' into 'master'

support caffe scale op, and fix fold_batchnorm op in transformer See merge request !870

Merge branch 'support_caffe_scale_and_fix_fold_bn' into 'master'
support caffe scale op, and fix fold_batchnorm op in transformer See merge request !870
e988e951 · 刘琦 · 1eb50c6c · e44b5dd6 · e988e951 · e988e951
5 changed file
--- a/docs/faq.md
+++ b/docs/faq.md
@@ -46,7 +46,7 @@ due to high memory usage or fragmentation. Several solutions can be tried:

 Why is the performance worse than the official result for the same model?
 -------------------------------------------------------------------------
-The power options may not set properly, see `mace/public/mace_runtime.h` for
+The power options may not set properly, see `mace/public/mace.h` for
 details.

 Why is the UI getting poor responsiveness when running model with GPU runtime?
@@ -64,4 +64,4 @@ Running models on Hexagon DSP need a few prerequisites for DSP developers:
 * You need sign your phone by using testsig provided by Qualcomm. (Download Qualcomm Hexagon SDK first, plugin your phone to PC, run scripts/testsig.py)
 * You need install Hexagon nnlib backend by following nnlib README (https://github.com/XiaoMi/nnlib).

-Then, there you go. You can run Mace on Hexagon DSP.
\ No newline at end of file
+Then, there you go. You can run Mace on Hexagon DSP.
--- a/docs/user_guide/basic_usage.rst
+++ b/docs/user_guide/basic_usage.rst
@@ -299,8 +299,7 @@ header files.
    ├── include
    │   └── mace
    │       └── public
-    │           ├── mace.h
-    │           └── mace_runtime.h
+    │           └── mace.h
    ├── lib
    │   ├── arm64-v8a
    │   │   └── cpu_gpu

--- a/mace/python/tools/converter_tool/caffe_converter.py
+++ b/mace/python/tools/converter_tool/caffe_converter.py
@@ -186,6 +186,7 @@ class CaffeConverter(base_converter.ConverterInterface):
            'InnerProduct': self.convert_fully_connected,
            'BatchNorm': self.convert_folded_batchnorm,
            'Crop': self.convert_crop,
+            'Scale': self.convert_scale,
        }
        self._option = option
        self._mace_net_def = mace_pb2.NetDef()
@@ -604,3 +605,49 @@ class CaffeConverter(base_converter.ConverterInterface):
                            mace_pb2.DT_FLOAT,
                            bias_data)
            op.input.extend([bias_tensor_name])
+
+    def convert_scale(self, caffe_op):
+        op = self.convert_general_op(caffe_op)
+        op.type = MaceOp.Eltwise.name
+
+        scale_op_name = op.name
+        op.name = scale_op_name + '_prod'
+
+        type_arg = op.arg.add()
+        type_arg.name = MaceKeyword.mace_element_type_str
+        type_arg.i = EltwiseType.PROD.value
+
+        scale_tensor_name = scale_op_name + '_scale'
+        scale_data = caffe_op.blobs[0]
+        self.add_tensor(scale_tensor_name, scale_data.shape,
+                        mace_pb2.DT_FLOAT, scale_data)
+        op.input.extend([scale_tensor_name])
+
+        if len(caffe_op.blobs) == 2:
+            bias_tensor_name = scale_op_name + '_offset'
+            bias_data = caffe_op.blobs[1]
+            # caffe of old version has 4-dimension bias, so reshape it
+            # to single dimension
+            self.add_tensor(bias_tensor_name, bias_data.reshape(-1).shape,
+                            mace_pb2.DT_FLOAT,
+                            bias_data)
+            op.input.extend([bias_tensor_name])
+
+            biasadd_op = self._mace_net_def.op.add()
+            biasadd_op.name = scale_op_name + '_biasadd'
+            biasadd_op.type = MaceOp.BiasAdd.name
+            biasadd_op.output.extend(op.output)
+            op.output[:] = [op.output[0] + '_prod_output']
+            biasadd_op.input.extend(op.output)
+            biasadd_op.input.extend([op.input[2]])
+
+            biasadd_op.output_shape.extend(op.output_shape)
+
+            del op.input[2]
+
+            data_type_arg = biasadd_op.arg.add()
+            data_type_arg.name = 'T'
+            data_type_arg.i = self._option.data_type
+
+            ConverterUtil.add_data_format_arg(biasadd_op,
+                                              DataFormat.NCHW)
--- a/mace/python/tools/converter_tool/shape_inference.py
+++ b/mace/python/tools/converter_tool/shape_inference.py
@@ -47,6 +47,7 @@ class ShapeInference(object):
            MaceOp.Softmax.name: self.infer_shape_general,
            MaceOp.FullyConnected.name: self.infer_shape_fully_connected,
            MaceOp.Crop.name: self.infer_shape_crop,
+            MaceOp.BiasAdd.name: self.infer_shape_general,
        }

        self._net = net

--- a/mace/python/tools/converter_tool/transformer.py
+++ b/mace/python/tools/converter_tool/transformer.py
@@ -344,12 +344,14 @@ class Transformer(base_converter.ConverterInterface):
                    == EltwiseType.PROD.value) \
                    and len(op.input) == 2 \
                    and op.input[1] in self._consts \
+                    and op.output_shape[0].dims[-1:] == \
+                    self._consts[op.input[1]].dims \
                    and self.consumer_count(op.output[0]) == 1 \
                    and not self.is_op_output_node(op):
                consumer_op = self._consumers[op.output[0]][0]
                if (consumer_op.type == MaceOp.Eltwise.name
                    and ConverterUtil.get_arg(
-                        op, MaceKeyword.mace_element_type_str).i
+                        consumer_op, MaceKeyword.mace_element_type_str).i
                        == EltwiseType.SUM.value
                    or consumer_op.type == MaceOp.BiasAdd.name) \
                        and len(consumer_op.input) == 2 \
@@ -359,10 +361,8 @@ class Transformer(base_converter.ConverterInterface):
                    consumer_op.type = MaceOp.BatchNorm.name
                    consumer_op.input[:] = [op.input[0], op.input[1],
                                            consumer_op.input[1]]
-
-                    self.safe_remove_node(op, None)
+                    net.op.remove(op)
                    return True
-
        return False

    def fold_squared_diff_mean(self):