Integrate MediaTek APU Support on mt6873, mt6885 and mt6853 (#662)

* Margaux dev. * Support Pad, Activation, Mul operator for mnasnet * Support document enhance model * Apply add-init-cache-and-preemption patch * Refine code * Add APU int16 quantization and fix macro bugs * Refine code Co-authored-by: N YungChien Hsu <yungchien.hsu@mediatek.com> Co-authored-by: N Eric Chen <eric-yk.chen@mediatek.com>

Integrate MediaTek APU Support on mt6873, mt6885 and mt6853 (#662)
* Margaux dev. * Support Pad, Activation, Mul operator for mnasnet * Support document enhance model * Apply add-init-cache-and-preemption patch * Refine code * Add APU int16 quantization and fix macro bugs * Refine code Co-authored-by: N YungChien Hsu <yungchien.hsu@mediatek.com> Co-authored-by: N Eric Chen <eric-yk.chen@mediatek.com>
63feaf50 · Yi-Kai-Chen · GitHub · fb59018e · 63feaf50 · 63feaf50
12 changed file
--- a/mace/core/runtime/apu/apu_wrapper.cc
+++ b/mace/core/runtime/apu/apu_wrapper.cc
@@ -21,7 +21,8 @@
 namespace mace {

 ApuWrapper::ApuWrapper(Device *device)
-    : quantize_util_(&device->cpu_runtime()->thread_pool()) {
+    : quantize_util_uint8_(&device->cpu_runtime()->thread_pool()),
+      quantize_util_int16_(&device->cpu_runtime()->thread_pool()) {
 }

 apu_data_type ApuWrapper::MapToApuDataType(DataType mace_type) {
@@ -270,7 +271,7 @@ bool ApuWrapper::Run(const std::map<std::string, Tensor *> &input_tensors,
              "Wrong outputs num");
  // prepare input
  for (int i = 0 ; i < static_cast<int>(input_tensors.size()) ; i++) {
-    Tensor *tensor = input_tensors.at(input_infos[i].name);
+    Tensor* tensor = input_tensors.at(input_infos[i].name);

    // check size
    int element_size = input_infos[i].size;
@@ -279,18 +280,18 @@ bool ApuWrapper::Run(const std::map<std::string, Tensor *> &input_tensors,
                "Wrong input size");
    // quantize
    if (input_infos[i].data_type == APU_DATA_TYPE_INT16) {
-      quantize_util_.QuantizeWithScaleAndZeropoint(
+      quantize_util_int16_.QuantizeWithScaleAndZeropoint(
          (const float*)tensor->raw_data(),
          element_size,
          input_infos[i].scale,
          input_infos[i].zero_point,
          reinterpret_cast<int16_t*>(input_infos[i].buf.get()));
    } else if (input_infos[i].data_type == APU_DATA_TYPE_FLOAT) {
-        std::memcpy(input_infos[i].buf.get(),
+      std::memcpy(input_infos[i].buf.get(),
                    (const float*)tensor->raw_data(),
                    element_size * byte_per_element);
    } else {
-      quantize_util_.QuantizeWithScaleAndZeropoint(
+      quantize_util_uint8_.QuantizeWithScaleAndZeropoint(
          (const float*)tensor->raw_data(),
          element_size,
          input_infos[i].scale,
@@ -304,8 +305,8 @@ bool ApuWrapper::Run(const std::map<std::string, Tensor *> &input_tensors,
  MACE_CHECK(ret == true, "neuron run model failed");

  // process output
-  for (int i = 0; i < static_cast<int>(output_tensors->size()); i++) {
-    Tensor *tensor = output_tensors->at(output_infos[i].name);
+  for (int i = 0 ; i < static_cast<int>(output_tensors->size()) ; i++) {
+    Tensor* tensor = output_tensors->at(output_infos[i].name);

    // prepare out buffer
    tensor->SetDtype(DT_FLOAT);
@@ -316,7 +317,7 @@ bool ApuWrapper::Run(const std::map<std::string, Tensor *> &input_tensors,
                "Wrong output size");
    // dequantize
    if (output_infos[i].data_type == APU_DATA_TYPE_INT16) {
-      quantize_util_.Dequantize(
+      quantize_util_int16_.Dequantize(
          reinterpret_cast<int16_t*>(output_infos[i].buf.get()),
          element_size,
          output_infos[i].scale,
@@ -327,7 +328,7 @@ bool ApuWrapper::Run(const std::map<std::string, Tensor *> &input_tensors,
                    output_infos[i].buf.get(),
                    element_size * byte_per_element);
    } else {
-      quantize_util_.Dequantize(
+      quantize_util_uint8_.Dequantize(
          output_infos[i].buf.get(),
          element_size,
          output_infos[i].scale,
@@ -348,19 +349,19 @@ bool ApuWrapper::Uninit() {
 }

 int ApuWrapper::GetByteNum(apu_data_type data_type) {
-    int byte_per_element;
-    if (data_type == APU_DATA_TYPE_FLOAT || data_type == APU_DATA_TYPE_INT32) {
-        byte_per_element = 4;
-    } else if (data_type == APU_DATA_TYPE_HALF ||
-               data_type == APU_DATA_TYPE_INT16) {
-        byte_per_element = 2;
-    } else if (data_type == APU_DATA_TYPE_UINT8) {
-        byte_per_element = 1;
-    } else {
-      byte_per_element = 1;
-      MACE_CHECK(false, "unsupport data type");
-    }
-    return byte_per_element;
+  int byte_per_element;
+  if (data_type == APU_DATA_TYPE_FLOAT || data_type == APU_DATA_TYPE_INT32) {
+    byte_per_element = 4;
+  } else if (data_type == APU_DATA_TYPE_HALF ||
+             data_type == APU_DATA_TYPE_INT16) {
+    byte_per_element = 2;
+  } else if (data_type == APU_DATA_TYPE_UINT8) {
+    byte_per_element = 1;
+  } else {
+    byte_per_element = 1;
+    MACE_CHECK(false, "unsupport data type");
+  }
+  return byte_per_element;
 }

 }  // namespace mace
--- a/mace/core/runtime/apu/apu_wrapper.h
+++ b/mace/core/runtime/apu/apu_wrapper.h
@@ -59,7 +59,8 @@ class ApuWrapper {
  ApuFrontend *frontend;
  std::vector<ApuTensorInfo> input_infos;
  std::vector<ApuTensorInfo> output_infos;
-  QuantizeUtil<float, uint8_t> quantize_util_;
+  QuantizeUtil<float, uint8_t> quantize_util_uint8_;
+  QuantizeUtil<float, int16_t> quantize_util_int16_;
 };

 }  // namespace mace

--- a/mace/libmace/mace.cc
+++ b/mace/libmace/mace.cc
@@ -304,15 +304,14 @@ MaceStatus MaceEngineConfig::Impl::SetCPUThreadPolicy(
  return MaceStatus::MACE_SUCCESS;
 }

-#ifdef MACE_ENABLE_HEXAGON
 MaceStatus MaceEngineConfig::Impl::SetHexagonToUnsignedPD() {
  bool ret = false;
+#ifdef MACE_ENABLE_HEXAGON
  ret = HexagonDSPWrapper::RequestUnsignedPD();
+#endif
  return ret ? MaceStatus::MACE_SUCCESS : MaceStatus::MACE_RUNTIME_ERROR;
 }
-#endif

-#ifdef MACE_ENABLE_HEXAGON
 MaceStatus MaceEngineConfig::Impl::SetHexagonPower(
    HexagonNNCornerType corner,
    bool dcvs_enable,
@@ -321,12 +320,12 @@ MaceStatus MaceEngineConfig::Impl::SetHexagonPower(
  hexagon_dcvs_enable_ = dcvs_enable;
  hexagon_latency_ = latency;
  bool ret = false;
+#ifdef MACE_ENABLE_HEXAGON
  ret = HexagonDSPWrapper::SetPower(corner, dcvs_enable, latency);
+#endif
  return ret ? MaceStatus::MACE_SUCCESS : MaceStatus::MACE_RUNTIME_ERROR;
 }
-#endif

-#ifdef MACE_ENABLE_APU
 MaceStatus MaceEngineConfig::Impl::SetAPUCache(
    APUCachePolicy policy,
    const std::string &binary_file,
@@ -335,10 +334,11 @@ MaceStatus MaceEngineConfig::Impl::SetAPUCache(
  apu_cache_policy_ = policy;
  apu_binary_file_ = binary_file;
  apu_storage_file_ = storage_file;
+#ifdef MACE_ENABLE_APU
  ret = true;
+#endif
  return ret ? MaceStatus::MACE_SUCCESS : MaceStatus::MACE_RUNTIME_ERROR;
 }
-#endif

 MaceEngineConfig::MaceEngineConfig(
    const DeviceType device_type)

--- a/third_party/apu/mt6853/libapu-frontend.so
+++ b/third_party/apu/mt6853/libapu-frontend.so
--- a/third_party/apu/mt6853/libapu-platform.so
+++ b/third_party/apu/mt6853/libapu-platform.so
--- a/third_party/apu/mt6873/libapu-frontend.so
+++ b/third_party/apu/mt6873/libapu-frontend.so
--- a/third_party/apu/mt6873/libapu-platform.so
+++ b/third_party/apu/mt6873/libapu-platform.so
--- a/third_party/apu/mt6885/libapu-frontend.so
+++ b/third_party/apu/mt6885/libapu-frontend.so
--- a/third_party/apu/mt6885/libapu-platform.so
+++ b/third_party/apu/mt6885/libapu-platform.so
--- a/tools/python/transform/apu_converter.py
+++ b/tools/python/transform/apu_converter.py
@@ -37,9 +37,12 @@ ApuSupportedOps = [
    'Concat',
    'Conv2D',
    'DepthwiseConv2d',
+    'Deconv2D',
    'Eltwise',
+    'FullyConnected',
    'Pad',
    'Pooling',
+    'PRelu',
    'Reduce',
    'ResizeBilinear',
    'Reshape',
@@ -56,7 +59,9 @@ class ApuOps(object):
            MaceOp.Concat.name: ApuOp.Concat.name,
            MaceOp.Conv2D.name: ApuOp.Conv2D.name,
            MaceOp.DepthwiseConv2d.name: ApuOp.DepthwiseConv2d.name,
+            MaceOp.Deconv2D.name: ApuOp.Deconv2D.name,
            MaceOp.Eltwise.name: ApuOp.Eltwise.name,
+            MaceOp.FullyConnected.name: ApuOp.FullyConnected.name,
            MaceOp.Pad.name: ApuOp.Pad.name,
            MaceOp.Pooling.name: ApuOp.Pooling.name,
            MaceOp.Reduce.name: ApuOp.Reduce.name,
@@ -135,7 +140,8 @@ class ApuConverter(base_converter.ConverterInterface):
            act_mode_arg = ConverterUtil.get_arg(
                               op, MaceKeyword.mace_activation_type_str)
            if act_mode_arg is not None:
-                mace_check(act_mode_arg.s == b'RELU'
+                mace_check(act_mode_arg.s == b'PRELU'
+                           or act_mode_arg.s == b'RELU'
                           or act_mode_arg.s == b'RELUX'
                           or act_mode_arg.s == b'TANH'
                           or act_mode_arg.s == b'SIGMOID',
@@ -179,6 +185,15 @@ class ApuConverter(base_converter.ConverterInterface):
                            multiplier.int32_data.extend([tensor.dims[0]])
                            break
                    op.input.extend([multiplier.name])
+            elif op.type == MaceOp.Deconv2D.name:
+                mace_check(len(op.input) == 4,
+                           op.name + ': apu only support ' + op.type + ' op'
+                           ' with 4 input')
+                self.add_size_tensor_from_arg(
+                    op, MaceKeyword.mace_strides_str)
+                self.add_padding_value_tensor_from_arg(op)
+                self.add_size_tensor_from_list(
+                    op, MaceKeyword.mace_dilations_str, [1, 1])
            elif op.type == MaceOp.Eltwise.name:
                eltwise_type = ConverterUtil.get_arg(
                               op, MaceKeyword.mace_element_type_str).i
@@ -276,8 +291,8 @@ class ApuConverter(base_converter.ConverterInterface):
                           op.name + ': apu only support squeeze op with 1'
                           ' input')
                self.add_shape_tensor_from_axis_arg(op)
-
            op.type = self._apu_ops.map_nn_op(op.type)
+        self.change_activation_to_prelu()

    def add_op_output_type(self):
        type_map = {}
@@ -371,6 +386,14 @@ class ApuConverter(base_converter.ConverterInterface):
        size_value_tensor.int32_data.extend(size_value_arg.ints)
        op.input.extend([size_value_tensor.name])

+    def add_size_tensor_from_list(self, op, keyword, list_value):
+        size_value_tensor = self._model.tensors.add()
+        size_value_tensor.name = op.name + '/' + keyword + ':0'
+        size_value_tensor.data_type = mace_pb2.DT_INT32
+        size_value_tensor.dims.extend([len(list_value)])
+        size_value_tensor.int32_data.extend(list_value)
+        op.input.extend([size_value_tensor.name])
+
    def add_int_tensor_from_arg(self, op, keyword):
        int_value_arg = ConverterUtil.get_arg(op, keyword)
        mace_check(int_value_arg.i is not None,
@@ -420,7 +443,6 @@ class ApuConverter(base_converter.ConverterInterface):
                               op, MaceKeyword.mace_padding_str)
            if padding_type is None:
                continue
-
            padding_arg = op.arg.add()
            padding_arg.name = MaceKeyword.mace_padding_values_str
            if padding_type.i == PaddingMode.VALID.value:
@@ -431,7 +453,8 @@ class ApuConverter(base_converter.ConverterInterface):
                kernel = []
                dilation = [1, 1]
                if op.type == MaceOp.Conv2D.name or \
-                   op.type == MaceOp.DepthwiseConv2d.name:
+                   op.type == MaceOp.DepthwiseConv2d.name or \
+                   op.type == MaceOp.Deconv2D.name:
                    if ConverterUtil.get_arg(
                           op, MaceKeyword.mace_dilations_str) is not None:
                        dilation = ConverterUtil.get_arg(
@@ -456,22 +479,37 @@ class ApuConverter(base_converter.ConverterInterface):
                    if len(in_size) > 0:
                        break
                out_size = op.output_shape[0].dims[1:3]
-                h = (out_size[0] - 1) * stride[0] \
-                    + ((kernel[0] - 1) * dilation[0] + 1) - in_size[0]
-                w = (out_size[1] - 1) * stride[1] \
-                    + ((kernel[1] - 1) * dilation[1] + 1) - in_size[1]
+                if(op.type == MaceOp.Deconv2D.name):
+                    h = (in_size[0] - 1) * stride[0] + kernel[0] - out_size[0]
+                    w = (in_size[1] - 1) * stride[1] + kernel[1] - out_size[1]
+                else:
+                    h = (out_size[0] - 1) * stride[0] \
+                        + ((kernel[0] - 1) * dilation[0] + 1) - in_size[0]
+                    w = (out_size[1] - 1) * stride[1] \
+                        + ((kernel[1] - 1) * dilation[1] + 1) - in_size[1]
                top = int(np.floor(h/2))
                left = int(np.floor(w/2))
                bottom = h - top
                right = w - left
                padding_arg.ints.extend([top, right, bottom, left])

+    def change_activation_to_prelu(self):
+        for op in self._model.op:
+            if op.type == ApuOp.Activation.name and \
+                ConverterUtil.get_arg(
+                    op, MaceKeyword.mace_activation_type_str).s == b'PRELU':
+                op.type = ApuOp.PRelu.name
+
    def ensure_bias_vector(self):
        for _op in self._model.op:
-            if _op.type != MaceOp.Conv2D.name and \
-               _op.type != MaceOp.DepthwiseConv2d.name:
-                continue
-            if len(_op.input) != 2:
+            ensure_input = -1
+            if _op.type == MaceOp.Conv2D.name or \
+               _op.type == MaceOp.DepthwiseConv2d.name or \
+               _op.type == MaceOp.FullyConnected.name:
+                ensure_input = 3
+            if _op.type == MaceOp.Deconv2D.name:
+                ensure_input = 4
+            if ensure_input == -1 or len(_op.input) != ensure_input - 1:
                continue

            tensor = self._model.tensors.add()
@@ -522,15 +560,14 @@ class ApuConverter(base_converter.ConverterInterface):
            const_tensor.name = _op.name + '/' + \
                MaceKeyword.mace_scalar_input_str + ':0'
            const_tensor.dims.extend([1])
+            const_tensor.data_type = _op.output_type[0]
            if _op.output_type[0] == mace_pb2.DT_UINT8 or \
                    _op.output_type[0] == mace_pb2.DT_INT16:
-                const_tensor.data_type = _op.output_type[0]
                const_tensor.scale = scalar
                const_tensor.zero_point = 0
                const_tensor.quantized = True
                const_tensor.int32_data.extend([1])
            elif _op.output_type[0] == mace_pb2.DT_FLOAT:
-                const_tensor.data_type = mace_pb2.DT_FLOAT
                const_tensor.float_data.extend([scalar])
            _op.input.extend([const_tensor.name])
            ConverterUtil.del_arg(

--- a/tools/python/transform/base_converter.py
+++ b/tools/python/transform/base_converter.py
@@ -340,6 +340,8 @@ class TransformerRule(Enum):
    QUANTIZE_LARGE_WEIGHTS = 43
    TRANSPOSE_SHAPE_TENSOR_TO_PARAM = 44
    TRANSFORM_SINGLE_BN_TO_DEPTHWISE_CONV = 45
+    TRANSFORM_MUL_MAX_TO_PRELU = 46
+    TRANSFORM_EXPAND_DIMS_TO_RESHAPE = 47


 class ConverterInterface(object):
@@ -610,6 +612,8 @@ class ConverterOption(object):
            if self._device == DeviceType.APU.value:
                self._transformer_option = self._transformer_option + [
                    TransformerRule.TRANSFORM_SINGLE_BN_TO_DEPTHWISE_CONV,
+                    TransformerRule.TRANSFORM_MUL_MAX_TO_PRELU,
+                    TransformerRule.TRANSFORM_EXPAND_DIMS_TO_RESHAPE,
                ]
            if self.quantize_large_weights:
                self._transformer_option = self._transformer_option + [

--- a/tools/python/transform/transformer.py
+++ b/tools/python/transform/transformer.py
@@ -117,6 +117,10 @@ class Transformer(base_converter.ConverterInterface):
                self.quantize_large_weights,
            TransformerRule.TRANSFORM_SINGLE_BN_TO_DEPTHWISE_CONV:
                self.transform_single_bn_to_depthwise_conv,
+            TransformerRule.TRANSFORM_MUL_MAX_TO_PRELU:
+                self.transform_mul_max_to_prelu,
+            TransformerRule.TRANSFORM_EXPAND_DIMS_TO_RESHAPE:
+                self.transform_expand_dims_to_reshape,
        }

        self._option = option
@@ -962,17 +966,23 @@ class Transformer(base_converter.ConverterInterface):
                or op.type == MaceOp.BatchNorm.name) \
                    and len(self._consumers.get(op.output[0], [])) == 1:
                consumer_op = self._consumers[op.output[0]][0]
+                fold_consumer = False
                if consumer_op.type == MaceOp.Activation.name:
                    act_type_arg = ConverterUtil.get_arg(
                        consumer_op, MaceKeyword.mace_activation_type_str)
                    act_type = act_type_arg.s.decode()
-                    if act_type == ActivationType.PRELU.name:
-                        continue
+                    if self._option.device == DeviceType.APU.value:
+                        fold_consumer = (act_type in
+                                         [ActivationType.RELU.name,
+                                          ActivationType.RELUX.name])
+                    else:
+                        fold_consumer = (act_type != ActivationType.PRELU.name)
                    # during quantization, only fold relu/relux
                    if (self._option.quantize_stat or self._option.quantize) \
                            and act_type not in [ActivationType.RELU.name,
                                                 ActivationType.RELUX.name]:
                        continue
+                if fold_consumer:
                    print("Fold activation: %s(%s)" % (op.name, op.type))
                    op.name = consumer_op.name
                    op.output[0] = consumer_op.output[0]
@@ -1032,6 +1042,8 @@ class Transformer(base_converter.ConverterInterface):
        return False

    def reshape_fc_weight(self):
+        if self._option.device == DeviceType.APU.value:
+            return
        net = self._model
        filter_format = self.filter_format()
        for op in net.op:
@@ -1348,6 +1360,36 @@ class Transformer(base_converter.ConverterInterface):
                    weight.dims[:] = [1, 1] + list(weight_data.shape)
                    return True

+            if self._option.device == DeviceType.APU.value:
+                if op.type == MaceOp.MatMul.name:
+                    transpose_a_arg = ConverterUtil.get_arg(op, MaceKeyword.mace_transpose_a_str)  # noqa
+                    transpose_b_arg = ConverterUtil.get_arg(op, MaceKeyword.mace_transpose_b_str)  # noqa
+                    transpose_a = transpose_a_arg is not None and transpose_a_arg.i == 1  # noqa
+                    transpose_b = transpose_b_arg is not None and transpose_b_arg.i == 1  # noqa
+                    if transpose_a is False and transpose_b is False and \
+                            op.input[1] in self._consts and \
+                            len(self.get_tensor_shape(op.input[0])) == 2 and \
+                            len(self.get_tensor_shape(op.input[1])) == 2:
+                        op.type = MaceOp.FullyConnected.name
+                        del op.arg[:]
+                        rhs = op.input[1]
+                        if rhs in self._consts and \
+                                len(self._consts[rhs].dims) == 2:
+                            arg = ConverterUtil.get_arg(op, MaceKeyword.mace_transpose_b_str)  # noqa
+                            if arg is None:
+                                arg = op.arg.add()
+                                arg.name = MaceKeyword.mace_transpose_b_str
+                                arg.i = 0
+                            if arg.i == 0:
+                                arg.i = 1
+                                filter = self._consts[rhs]
+                                filter_data = np.array(filter.float_data) \
+                                    .reshape(filter.dims)
+                                filter_data = filter_data.transpose(1, 0)
+                                filter.float_data[:] = filter_data.flat
+                                filter.dims[:] = filter_data.shape
+                                six.print_('Transpose matmul weight to shape:',
+                                           filter.dims)
        return False

    def update_float_op_data_type(self):
@@ -2476,5 +2518,68 @@ class Transformer(base_converter.ConverterInterface):
                    tensor.dims[:] = [1, 1, 1, tensor.dims[0]]
                    break
            return True
+        return False

+    def transform_mul_max_to_prelu(self):
+        if self._option.device != DeviceType.APU.value:
+            return False
+        net = self._model
+        for op in net.op:
+            if op.type != MaceOp.Eltwise.name or \
+                    ConverterUtil.get_arg(
+                        op, MaceKeyword.mace_element_type_str).i \
+                    != EltwiseType.PROD.value or \
+                    op.output[0] not in self._consumers:
+                continue
+            if len(op.input) != 1:
+                continue
+            consumer_op = self._consumers[op.output[0]][0]
+            if consumer_op.type != MaceOp.Eltwise.name or \
+                    ConverterUtil.get_arg(
+                        consumer_op, MaceKeyword.mace_element_type_str).i \
+                    != EltwiseType.MAX.value:
+                continue
+            if op.input[0] not in consumer_op.input:
+                continue
+            float_value_arg = ConverterUtil.get_arg(
+                op, MaceKeyword.mace_scalar_input_str)
+            mace_check(float_value_arg is not None,
+                       op.name + ': ' + MaceKeyword.mace_scalar_input_str +
+                       ' value float should not be None')
+            scalar = float_value_arg.f
+            if scalar < 0:
+                continue
+            if scalar > 1:
+                scalar = 1
+            # Change Mul op to Prelu
+            print("Change mul and max to prelu: %s(%s)" % (op.name, op.type))
+            op.name = consumer_op.name
+            op.output[0] = consumer_op.output[0]
+            alpha_tensor = net.tensors.add()
+            alpha_tensor.name = op.name + '_alpha'
+            alpha_tensor.dims.append(1)
+            alpha_tensor.data_type = mace_pb2.DT_FLOAT
+            alpha_tensor.float_data.extend([scalar])
+            op.input.extend([alpha_tensor.name])
+            ConverterUtil.del_arg(op, MaceKeyword.mace_scalar_input_str)
+            ConverterUtil.del_arg(
+                op, MaceKeyword.mace_scalar_input_index_str)
+            op.type = MaceOp.Activation.name
+            type_arg = op.arg.add()
+            type_arg.name = MaceKeyword.mace_activation_type_str
+            type_arg.s = six.b(ActivationType.PRELU.name)
+            self.replace_quantize_info(op, consumer_op)
+            self.safe_remove_node(consumer_op, op)
+            return True
+        return False
+
+    def transform_expand_dims_to_reshape(self):
+        if self._option.device != DeviceType.APU.value:
+            return False
+        net = self._model
+        for op in net.op:
+            if op.type == MaceOp.ExpandDims.name:
+                op.type = MaceOp.Reshape.name
+                del op.arg[:]
+                return True
        return False