diff --git a/docs/faq.md b/docs/faq.md index bb66e9a436301b36274388042dc2ec72ba6dcf8c..d0f8b3953412f1378acdff18f067eaaeadc237b3 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -46,7 +46,7 @@ due to high memory usage or fragmentation. Several solutions can be tried: Why is the performance worse than the official result for the same model? ------------------------------------------------------------------------- -The power options may not set properly, see `mace/public/mace_runtime.h` for +The power options may not set properly, see `mace/public/mace.h` for details. Why is the UI getting poor responsiveness when running model with GPU runtime? @@ -64,4 +64,4 @@ Running models on Hexagon DSP need a few prerequisites for DSP developers: * You need sign your phone by using testsig provided by Qualcomm. (Download Qualcomm Hexagon SDK first, plugin your phone to PC, run scripts/testsig.py) * You need install Hexagon nnlib backend by following nnlib README (https://github.com/XiaoMi/nnlib). -Then, there you go. You can run Mace on Hexagon DSP. \ No newline at end of file +Then, there you go. You can run Mace on Hexagon DSP. diff --git a/docs/user_guide/basic_usage.rst b/docs/user_guide/basic_usage.rst index eb067c49a5b42fc2f33d8a614da6f067eb5452ff..15a4d5168ebebef494156184991c4640624d227c 100644 --- a/docs/user_guide/basic_usage.rst +++ b/docs/user_guide/basic_usage.rst @@ -299,8 +299,7 @@ header files. ├── include │   └── mace │   └── public - │   ├── mace.h - │   └── mace_runtime.h + │   └── mace.h ├── lib │   ├── arm64-v8a │   │ └── cpu_gpu diff --git a/mace/python/tools/converter_tool/caffe_converter.py b/mace/python/tools/converter_tool/caffe_converter.py index a2e89a6d45b1fc397e5e888d0368676d28234824..8ce0a0741d6b791aa16c4137b297521280e93109 100644 --- a/mace/python/tools/converter_tool/caffe_converter.py +++ b/mace/python/tools/converter_tool/caffe_converter.py @@ -186,6 +186,7 @@ class CaffeConverter(base_converter.ConverterInterface): 'InnerProduct': self.convert_fully_connected, 'BatchNorm': self.convert_folded_batchnorm, 'Crop': self.convert_crop, + 'Scale': self.convert_scale, } self._option = option self._mace_net_def = mace_pb2.NetDef() @@ -604,3 +605,49 @@ class CaffeConverter(base_converter.ConverterInterface): mace_pb2.DT_FLOAT, bias_data) op.input.extend([bias_tensor_name]) + + def convert_scale(self, caffe_op): + op = self.convert_general_op(caffe_op) + op.type = MaceOp.Eltwise.name + + scale_op_name = op.name + op.name = scale_op_name + '_prod' + + type_arg = op.arg.add() + type_arg.name = MaceKeyword.mace_element_type_str + type_arg.i = EltwiseType.PROD.value + + scale_tensor_name = scale_op_name + '_scale' + scale_data = caffe_op.blobs[0] + self.add_tensor(scale_tensor_name, scale_data.shape, + mace_pb2.DT_FLOAT, scale_data) + op.input.extend([scale_tensor_name]) + + if len(caffe_op.blobs) == 2: + bias_tensor_name = scale_op_name + '_offset' + bias_data = caffe_op.blobs[1] + # caffe of old version has 4-dimension bias, so reshape it + # to single dimension + self.add_tensor(bias_tensor_name, bias_data.reshape(-1).shape, + mace_pb2.DT_FLOAT, + bias_data) + op.input.extend([bias_tensor_name]) + + biasadd_op = self._mace_net_def.op.add() + biasadd_op.name = scale_op_name + '_biasadd' + biasadd_op.type = MaceOp.BiasAdd.name + biasadd_op.output.extend(op.output) + op.output[:] = [op.output[0] + '_prod_output'] + biasadd_op.input.extend(op.output) + biasadd_op.input.extend([op.input[2]]) + + biasadd_op.output_shape.extend(op.output_shape) + + del op.input[2] + + data_type_arg = biasadd_op.arg.add() + data_type_arg.name = 'T' + data_type_arg.i = self._option.data_type + + ConverterUtil.add_data_format_arg(biasadd_op, + DataFormat.NCHW) diff --git a/mace/python/tools/converter_tool/shape_inference.py b/mace/python/tools/converter_tool/shape_inference.py index fbc22783ce09156f34cf859c82019d882c9c2dd7..9478a3e545f9dc203ea01f03b1d32a4452ad964b 100644 --- a/mace/python/tools/converter_tool/shape_inference.py +++ b/mace/python/tools/converter_tool/shape_inference.py @@ -47,6 +47,7 @@ class ShapeInference(object): MaceOp.Softmax.name: self.infer_shape_general, MaceOp.FullyConnected.name: self.infer_shape_fully_connected, MaceOp.Crop.name: self.infer_shape_crop, + MaceOp.BiasAdd.name: self.infer_shape_general, } self._net = net diff --git a/mace/python/tools/converter_tool/transformer.py b/mace/python/tools/converter_tool/transformer.py index 7d6893442acf982529ea5c2b38425bf33795eb5a..d24823fd61adfb6c73fd8433151a8901fb00fc45 100644 --- a/mace/python/tools/converter_tool/transformer.py +++ b/mace/python/tools/converter_tool/transformer.py @@ -344,12 +344,14 @@ class Transformer(base_converter.ConverterInterface): == EltwiseType.PROD.value) \ and len(op.input) == 2 \ and op.input[1] in self._consts \ + and op.output_shape[0].dims[-1:] == \ + self._consts[op.input[1]].dims \ and self.consumer_count(op.output[0]) == 1 \ and not self.is_op_output_node(op): consumer_op = self._consumers[op.output[0]][0] if (consumer_op.type == MaceOp.Eltwise.name and ConverterUtil.get_arg( - op, MaceKeyword.mace_element_type_str).i + consumer_op, MaceKeyword.mace_element_type_str).i == EltwiseType.SUM.value or consumer_op.type == MaceOp.BiasAdd.name) \ and len(consumer_op.input) == 2 \ @@ -359,10 +361,8 @@ class Transformer(base_converter.ConverterInterface): consumer_op.type = MaceOp.BatchNorm.name consumer_op.input[:] = [op.input[0], op.input[1], consumer_op.input[1]] - - self.safe_remove_node(op, None) + net.op.remove(op) return True - return False def fold_squared_diff_mean(self):