提交 e44b5dd6 编写于 作者: Y yejianwu

support caffe scale op, and fix fold_batchnorm op in transformer

上级 fbc1d019
...@@ -46,7 +46,7 @@ due to high memory usage or fragmentation. Several solutions can be tried: ...@@ -46,7 +46,7 @@ due to high memory usage or fragmentation. Several solutions can be tried:
Why is the performance worse than the official result for the same model? Why is the performance worse than the official result for the same model?
------------------------------------------------------------------------- -------------------------------------------------------------------------
The power options may not set properly, see `mace/public/mace_runtime.h` for The power options may not set properly, see `mace/public/mace.h` for
details. details.
Why is the UI getting poor responsiveness when running model with GPU runtime? Why is the UI getting poor responsiveness when running model with GPU runtime?
...@@ -64,4 +64,4 @@ Running models on Hexagon DSP need a few prerequisites for DSP developers: ...@@ -64,4 +64,4 @@ Running models on Hexagon DSP need a few prerequisites for DSP developers:
* You need sign your phone by using testsig provided by Qualcomm. (Download Qualcomm Hexagon SDK first, plugin your phone to PC, run scripts/testsig.py) * You need sign your phone by using testsig provided by Qualcomm. (Download Qualcomm Hexagon SDK first, plugin your phone to PC, run scripts/testsig.py)
* You need install Hexagon nnlib backend by following nnlib README (https://github.com/XiaoMi/nnlib). * You need install Hexagon nnlib backend by following nnlib README (https://github.com/XiaoMi/nnlib).
Then, there you go. You can run Mace on Hexagon DSP. Then, there you go. You can run Mace on Hexagon DSP.
\ No newline at end of file
...@@ -299,8 +299,7 @@ header files. ...@@ -299,8 +299,7 @@ header files.
├── include ├── include
│   └── mace │   └── mace
│   └── public │   └── public
│   ├── mace.h │   └── mace.h
│   └── mace_runtime.h
├── lib ├── lib
│   ├── arm64-v8a │   ├── arm64-v8a
│   │ └── cpu_gpu │   │ └── cpu_gpu
......
...@@ -186,6 +186,7 @@ class CaffeConverter(base_converter.ConverterInterface): ...@@ -186,6 +186,7 @@ class CaffeConverter(base_converter.ConverterInterface):
'InnerProduct': self.convert_fully_connected, 'InnerProduct': self.convert_fully_connected,
'BatchNorm': self.convert_folded_batchnorm, 'BatchNorm': self.convert_folded_batchnorm,
'Crop': self.convert_crop, 'Crop': self.convert_crop,
'Scale': self.convert_scale,
} }
self._option = option self._option = option
self._mace_net_def = mace_pb2.NetDef() self._mace_net_def = mace_pb2.NetDef()
...@@ -604,3 +605,49 @@ class CaffeConverter(base_converter.ConverterInterface): ...@@ -604,3 +605,49 @@ class CaffeConverter(base_converter.ConverterInterface):
mace_pb2.DT_FLOAT, mace_pb2.DT_FLOAT,
bias_data) bias_data)
op.input.extend([bias_tensor_name]) op.input.extend([bias_tensor_name])
def convert_scale(self, caffe_op):
op = self.convert_general_op(caffe_op)
op.type = MaceOp.Eltwise.name
scale_op_name = op.name
op.name = scale_op_name + '_prod'
type_arg = op.arg.add()
type_arg.name = MaceKeyword.mace_element_type_str
type_arg.i = EltwiseType.PROD.value
scale_tensor_name = scale_op_name + '_scale'
scale_data = caffe_op.blobs[0]
self.add_tensor(scale_tensor_name, scale_data.shape,
mace_pb2.DT_FLOAT, scale_data)
op.input.extend([scale_tensor_name])
if len(caffe_op.blobs) == 2:
bias_tensor_name = scale_op_name + '_offset'
bias_data = caffe_op.blobs[1]
# caffe of old version has 4-dimension bias, so reshape it
# to single dimension
self.add_tensor(bias_tensor_name, bias_data.reshape(-1).shape,
mace_pb2.DT_FLOAT,
bias_data)
op.input.extend([bias_tensor_name])
biasadd_op = self._mace_net_def.op.add()
biasadd_op.name = scale_op_name + '_biasadd'
biasadd_op.type = MaceOp.BiasAdd.name
biasadd_op.output.extend(op.output)
op.output[:] = [op.output[0] + '_prod_output']
biasadd_op.input.extend(op.output)
biasadd_op.input.extend([op.input[2]])
biasadd_op.output_shape.extend(op.output_shape)
del op.input[2]
data_type_arg = biasadd_op.arg.add()
data_type_arg.name = 'T'
data_type_arg.i = self._option.data_type
ConverterUtil.add_data_format_arg(biasadd_op,
DataFormat.NCHW)
...@@ -47,6 +47,7 @@ class ShapeInference(object): ...@@ -47,6 +47,7 @@ class ShapeInference(object):
MaceOp.Softmax.name: self.infer_shape_general, MaceOp.Softmax.name: self.infer_shape_general,
MaceOp.FullyConnected.name: self.infer_shape_fully_connected, MaceOp.FullyConnected.name: self.infer_shape_fully_connected,
MaceOp.Crop.name: self.infer_shape_crop, MaceOp.Crop.name: self.infer_shape_crop,
MaceOp.BiasAdd.name: self.infer_shape_general,
} }
self._net = net self._net = net
......
...@@ -344,12 +344,14 @@ class Transformer(base_converter.ConverterInterface): ...@@ -344,12 +344,14 @@ class Transformer(base_converter.ConverterInterface):
== EltwiseType.PROD.value) \ == EltwiseType.PROD.value) \
and len(op.input) == 2 \ and len(op.input) == 2 \
and op.input[1] in self._consts \ and op.input[1] in self._consts \
and op.output_shape[0].dims[-1:] == \
self._consts[op.input[1]].dims \
and self.consumer_count(op.output[0]) == 1 \ and self.consumer_count(op.output[0]) == 1 \
and not self.is_op_output_node(op): and not self.is_op_output_node(op):
consumer_op = self._consumers[op.output[0]][0] consumer_op = self._consumers[op.output[0]][0]
if (consumer_op.type == MaceOp.Eltwise.name if (consumer_op.type == MaceOp.Eltwise.name
and ConverterUtil.get_arg( and ConverterUtil.get_arg(
op, MaceKeyword.mace_element_type_str).i consumer_op, MaceKeyword.mace_element_type_str).i
== EltwiseType.SUM.value == EltwiseType.SUM.value
or consumer_op.type == MaceOp.BiasAdd.name) \ or consumer_op.type == MaceOp.BiasAdd.name) \
and len(consumer_op.input) == 2 \ and len(consumer_op.input) == 2 \
...@@ -359,10 +361,8 @@ class Transformer(base_converter.ConverterInterface): ...@@ -359,10 +361,8 @@ class Transformer(base_converter.ConverterInterface):
consumer_op.type = MaceOp.BatchNorm.name consumer_op.type = MaceOp.BatchNorm.name
consumer_op.input[:] = [op.input[0], op.input[1], consumer_op.input[:] = [op.input[0], op.input[1],
consumer_op.input[1]] consumer_op.input[1]]
net.op.remove(op)
self.safe_remove_node(op, None)
return True return True
return False return False
def fold_squared_diff_mean(self): def fold_squared_diff_mean(self):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册