diff --git a/mace/core/runtime/apu/apu_wrapper.cc b/mace/core/runtime/apu/apu_wrapper.cc
index 6feac5c699cab4503904037833b925a5ddb0f545..d4e2ba01dd4f9f42277706b3b8ba86ad7c46a7bf 100644
--- a/mace/core/runtime/apu/apu_wrapper.cc
+++ b/mace/core/runtime/apu/apu_wrapper.cc
@@ -21,7 +21,8 @@
 namespace mace {
 
 ApuWrapper::ApuWrapper(Device *device)
-    : quantize_util_(&device->cpu_runtime()->thread_pool()) {
+    : quantize_util_uint8_(&device->cpu_runtime()->thread_pool()),
+      quantize_util_int16_(&device->cpu_runtime()->thread_pool()) {
 }
 
 apu_data_type ApuWrapper::MapToApuDataType(DataType mace_type) {
@@ -270,7 +271,7 @@ bool ApuWrapper::Run(const std::map<std::string, Tensor *> &input_tensors,
               "Wrong outputs num");
   // prepare input
   for (int i = 0 ; i < static_cast<int>(input_tensors.size()) ; i++) {
-    Tensor *tensor = input_tensors.at(input_infos[i].name);
+    Tensor* tensor = input_tensors.at(input_infos[i].name);
 
     // check size
     int element_size = input_infos[i].size;
@@ -279,18 +280,18 @@ bool ApuWrapper::Run(const std::map<std::string, Tensor *> &input_tensors,
                 "Wrong input size");
     // quantize
     if (input_infos[i].data_type == APU_DATA_TYPE_INT16) {
-      quantize_util_.QuantizeWithScaleAndZeropoint(
+      quantize_util_int16_.QuantizeWithScaleAndZeropoint(
           (const float*)tensor->raw_data(),
           element_size,
           input_infos[i].scale,
           input_infos[i].zero_point,
           reinterpret_cast<int16_t*>(input_infos[i].buf.get()));
     } else if (input_infos[i].data_type == APU_DATA_TYPE_FLOAT) {
-        std::memcpy(input_infos[i].buf.get(),
+      std::memcpy(input_infos[i].buf.get(),
                     (const float*)tensor->raw_data(),
                     element_size * byte_per_element);
     } else {
-      quantize_util_.QuantizeWithScaleAndZeropoint(
+      quantize_util_uint8_.QuantizeWithScaleAndZeropoint(
           (const float*)tensor->raw_data(),
           element_size,
           input_infos[i].scale,
@@ -304,8 +305,8 @@ bool ApuWrapper::Run(const std::map<std::string, Tensor *> &input_tensors,
   MACE_CHECK(ret == true, "neuron run model failed");
 
   // process output
-  for (int i = 0; i < static_cast<int>(output_tensors->size()); i++) {
-    Tensor *tensor = output_tensors->at(output_infos[i].name);
+  for (int i = 0 ; i < static_cast<int>(output_tensors->size()) ; i++) {
+    Tensor* tensor = output_tensors->at(output_infos[i].name);
 
     // prepare out buffer
     tensor->SetDtype(DT_FLOAT);
@@ -316,7 +317,7 @@ bool ApuWrapper::Run(const std::map<std::string, Tensor *> &input_tensors,
                 "Wrong output size");
     // dequantize
     if (output_infos[i].data_type == APU_DATA_TYPE_INT16) {
-      quantize_util_.Dequantize(
+      quantize_util_int16_.Dequantize(
           reinterpret_cast<int16_t*>(output_infos[i].buf.get()),
           element_size,
           output_infos[i].scale,
@@ -327,7 +328,7 @@ bool ApuWrapper::Run(const std::map<std::string, Tensor *> &input_tensors,
                     output_infos[i].buf.get(),
                     element_size * byte_per_element);
     } else {
-      quantize_util_.Dequantize(
+      quantize_util_uint8_.Dequantize(
           output_infos[i].buf.get(),
           element_size,
           output_infos[i].scale,
@@ -348,19 +349,19 @@ bool ApuWrapper::Uninit() {
 }
 
 int ApuWrapper::GetByteNum(apu_data_type data_type) {
-    int byte_per_element;
-    if (data_type == APU_DATA_TYPE_FLOAT || data_type == APU_DATA_TYPE_INT32) {
-        byte_per_element = 4;
-    } else if (data_type == APU_DATA_TYPE_HALF ||
-               data_type == APU_DATA_TYPE_INT16) {
-        byte_per_element = 2;
-    } else if (data_type == APU_DATA_TYPE_UINT8) {
-        byte_per_element = 1;
-    } else {
-      byte_per_element = 1;
-      MACE_CHECK(false, "unsupport data type");
-    }
-    return byte_per_element;
+  int byte_per_element;
+  if (data_type == APU_DATA_TYPE_FLOAT || data_type == APU_DATA_TYPE_INT32) {
+    byte_per_element = 4;
+  } else if (data_type == APU_DATA_TYPE_HALF ||
+             data_type == APU_DATA_TYPE_INT16) {
+    byte_per_element = 2;
+  } else if (data_type == APU_DATA_TYPE_UINT8) {
+    byte_per_element = 1;
+  } else {
+    byte_per_element = 1;
+    MACE_CHECK(false, "unsupport data type");
+  }
+  return byte_per_element;
 }
 
 }  // namespace mace
diff --git a/mace/core/runtime/apu/apu_wrapper.h b/mace/core/runtime/apu/apu_wrapper.h
index a18694edd0681f0b9a65a2ceaa922a1e6bf0582c..46d5d32e921fec157011b50d9d2d279dc3c4fac6 100644
--- a/mace/core/runtime/apu/apu_wrapper.h
+++ b/mace/core/runtime/apu/apu_wrapper.h
@@ -59,7 +59,8 @@ class ApuWrapper {
   ApuFrontend *frontend;
   std::vector<ApuTensorInfo> input_infos;
   std::vector<ApuTensorInfo> output_infos;
-  QuantizeUtil<float, uint8_t> quantize_util_;
+  QuantizeUtil<float, uint8_t> quantize_util_uint8_;
+  QuantizeUtil<float, int16_t> quantize_util_int16_;
 };
 
 }  // namespace mace
diff --git a/mace/libmace/mace.cc b/mace/libmace/mace.cc
index d31f9eb56e1415f3691da8593926263abfb6b846..44e025d09703600e315753bc0e71fd6940da066b 100644
--- a/mace/libmace/mace.cc
+++ b/mace/libmace/mace.cc
@@ -304,15 +304,14 @@ MaceStatus MaceEngineConfig::Impl::SetCPUThreadPolicy(
   return MaceStatus::MACE_SUCCESS;
 }
 
-#ifdef MACE_ENABLE_HEXAGON
 MaceStatus MaceEngineConfig::Impl::SetHexagonToUnsignedPD() {
   bool ret = false;
+#ifdef MACE_ENABLE_HEXAGON
   ret = HexagonDSPWrapper::RequestUnsignedPD();
+#endif
   return ret ? MaceStatus::MACE_SUCCESS : MaceStatus::MACE_RUNTIME_ERROR;
 }
-#endif
 
-#ifdef MACE_ENABLE_HEXAGON
 MaceStatus MaceEngineConfig::Impl::SetHexagonPower(
     HexagonNNCornerType corner,
     bool dcvs_enable,
@@ -321,12 +320,12 @@ MaceStatus MaceEngineConfig::Impl::SetHexagonPower(
   hexagon_dcvs_enable_ = dcvs_enable;
   hexagon_latency_ = latency;
   bool ret = false;
+#ifdef MACE_ENABLE_HEXAGON
   ret = HexagonDSPWrapper::SetPower(corner, dcvs_enable, latency);
+#endif
   return ret ? MaceStatus::MACE_SUCCESS : MaceStatus::MACE_RUNTIME_ERROR;
 }
-#endif
 
-#ifdef MACE_ENABLE_APU
 MaceStatus MaceEngineConfig::Impl::SetAPUCache(
     APUCachePolicy policy,
     const std::string &binary_file,
@@ -335,10 +334,11 @@ MaceStatus MaceEngineConfig::Impl::SetAPUCache(
   apu_cache_policy_ = policy;
   apu_binary_file_ = binary_file;
   apu_storage_file_ = storage_file;
+#ifdef MACE_ENABLE_APU
   ret = true;
+#endif
   return ret ? MaceStatus::MACE_SUCCESS : MaceStatus::MACE_RUNTIME_ERROR;
 }
-#endif
 
 MaceEngineConfig::MaceEngineConfig(
     const DeviceType device_type)
diff --git a/third_party/apu/mt6853/libapu-frontend.so b/third_party/apu/mt6853/libapu-frontend.so
index 2144858555caa0c0926de15a16eeab9ce3aabf46..81568bbf07ac6eb10de10229df13bd23bbf8bb3d 100644
Binary files a/third_party/apu/mt6853/libapu-frontend.so and b/third_party/apu/mt6853/libapu-frontend.so differ
diff --git a/third_party/apu/mt6853/libapu-platform.so b/third_party/apu/mt6853/libapu-platform.so
index 7537371553ec0daf3c97c6277a2ba16a3275b173..47873fae1fd5e7daa32b832decd275bd7fc69677 100644
Binary files a/third_party/apu/mt6853/libapu-platform.so and b/third_party/apu/mt6853/libapu-platform.so differ
diff --git a/third_party/apu/mt6873/libapu-frontend.so b/third_party/apu/mt6873/libapu-frontend.so
index 453f5388c1986bd749fec9d0249dc7c0fbe7e530..aa71b41c9aca644cab5c414be4b555f4c207b9c3 100644
Binary files a/third_party/apu/mt6873/libapu-frontend.so and b/third_party/apu/mt6873/libapu-frontend.so differ
diff --git a/third_party/apu/mt6873/libapu-platform.so b/third_party/apu/mt6873/libapu-platform.so
index af29cee6a9f6554595fd8c9066dbaff12a4fe07c..e9ee3a1771dd0039c020cdd58c7c4f1f2e858cde 100644
Binary files a/third_party/apu/mt6873/libapu-platform.so and b/third_party/apu/mt6873/libapu-platform.so differ
diff --git a/third_party/apu/mt6885/libapu-frontend.so b/third_party/apu/mt6885/libapu-frontend.so
index 453f5388c1986bd749fec9d0249dc7c0fbe7e530..aa71b41c9aca644cab5c414be4b555f4c207b9c3 100644
Binary files a/third_party/apu/mt6885/libapu-frontend.so and b/third_party/apu/mt6885/libapu-frontend.so differ
diff --git a/third_party/apu/mt6885/libapu-platform.so b/third_party/apu/mt6885/libapu-platform.so
index af29cee6a9f6554595fd8c9066dbaff12a4fe07c..e9ee3a1771dd0039c020cdd58c7c4f1f2e858cde 100644
Binary files a/third_party/apu/mt6885/libapu-platform.so and b/third_party/apu/mt6885/libapu-platform.so differ
diff --git a/tools/python/transform/apu_converter.py b/tools/python/transform/apu_converter.py
index faeb0be688010cbbe776f635f4b9545b4444e931..7a9c2068b3e4c0ebe9dc670c18ed4cc265c90376 100644
--- a/tools/python/transform/apu_converter.py
+++ b/tools/python/transform/apu_converter.py
@@ -37,9 +37,12 @@ ApuSupportedOps = [
     'Concat',
     'Conv2D',
     'DepthwiseConv2d',
+    'Deconv2D',
     'Eltwise',
+    'FullyConnected',
     'Pad',
     'Pooling',
+    'PRelu',
     'Reduce',
     'ResizeBilinear',
     'Reshape',
@@ -56,7 +59,9 @@ class ApuOps(object):
             MaceOp.Concat.name: ApuOp.Concat.name,
             MaceOp.Conv2D.name: ApuOp.Conv2D.name,
             MaceOp.DepthwiseConv2d.name: ApuOp.DepthwiseConv2d.name,
+            MaceOp.Deconv2D.name: ApuOp.Deconv2D.name,
             MaceOp.Eltwise.name: ApuOp.Eltwise.name,
+            MaceOp.FullyConnected.name: ApuOp.FullyConnected.name,
             MaceOp.Pad.name: ApuOp.Pad.name,
             MaceOp.Pooling.name: ApuOp.Pooling.name,
             MaceOp.Reduce.name: ApuOp.Reduce.name,
@@ -135,7 +140,8 @@ class ApuConverter(base_converter.ConverterInterface):
             act_mode_arg = ConverterUtil.get_arg(
                                op, MaceKeyword.mace_activation_type_str)
             if act_mode_arg is not None:
-                mace_check(act_mode_arg.s == b'RELU'
+                mace_check(act_mode_arg.s == b'PRELU'
+                           or act_mode_arg.s == b'RELU'
                            or act_mode_arg.s == b'RELUX'
                            or act_mode_arg.s == b'TANH'
                            or act_mode_arg.s == b'SIGMOID',
@@ -179,6 +185,15 @@ class ApuConverter(base_converter.ConverterInterface):
                             multiplier.int32_data.extend([tensor.dims[0]])
                             break
                     op.input.extend([multiplier.name])
+            elif op.type == MaceOp.Deconv2D.name:
+                mace_check(len(op.input) == 4,
+                           op.name + ': apu only support ' + op.type + ' op'
+                           ' with 4 input')
+                self.add_size_tensor_from_arg(
+                    op, MaceKeyword.mace_strides_str)
+                self.add_padding_value_tensor_from_arg(op)
+                self.add_size_tensor_from_list(
+                    op, MaceKeyword.mace_dilations_str, [1, 1])
             elif op.type == MaceOp.Eltwise.name:
                 eltwise_type = ConverterUtil.get_arg(
                                op, MaceKeyword.mace_element_type_str).i
@@ -276,8 +291,8 @@ class ApuConverter(base_converter.ConverterInterface):
                            op.name + ': apu only support squeeze op with 1'
                            ' input')
                 self.add_shape_tensor_from_axis_arg(op)
-
             op.type = self._apu_ops.map_nn_op(op.type)
+        self.change_activation_to_prelu()
 
     def add_op_output_type(self):
         type_map = {}
@@ -371,6 +386,14 @@ class ApuConverter(base_converter.ConverterInterface):
         size_value_tensor.int32_data.extend(size_value_arg.ints)
         op.input.extend([size_value_tensor.name])
 
+    def add_size_tensor_from_list(self, op, keyword, list_value):
+        size_value_tensor = self._model.tensors.add()
+        size_value_tensor.name = op.name + '/' + keyword + ':0'
+        size_value_tensor.data_type = mace_pb2.DT_INT32
+        size_value_tensor.dims.extend([len(list_value)])
+        size_value_tensor.int32_data.extend(list_value)
+        op.input.extend([size_value_tensor.name])
+
     def add_int_tensor_from_arg(self, op, keyword):
         int_value_arg = ConverterUtil.get_arg(op, keyword)
         mace_check(int_value_arg.i is not None,
@@ -420,7 +443,6 @@ class ApuConverter(base_converter.ConverterInterface):
                                op, MaceKeyword.mace_padding_str)
             if padding_type is None:
                 continue
-
             padding_arg = op.arg.add()
             padding_arg.name = MaceKeyword.mace_padding_values_str
             if padding_type.i == PaddingMode.VALID.value:
@@ -431,7 +453,8 @@ class ApuConverter(base_converter.ConverterInterface):
                 kernel = []
                 dilation = [1, 1]
                 if op.type == MaceOp.Conv2D.name or \
-                   op.type == MaceOp.DepthwiseConv2d.name:
+                   op.type == MaceOp.DepthwiseConv2d.name or \
+                   op.type == MaceOp.Deconv2D.name:
                     if ConverterUtil.get_arg(
                            op, MaceKeyword.mace_dilations_str) is not None:
                         dilation = ConverterUtil.get_arg(
@@ -456,22 +479,37 @@ class ApuConverter(base_converter.ConverterInterface):
                     if len(in_size) > 0:
                         break
                 out_size = op.output_shape[0].dims[1:3]
-                h = (out_size[0] - 1) * stride[0] \
-                    + ((kernel[0] - 1) * dilation[0] + 1) - in_size[0]
-                w = (out_size[1] - 1) * stride[1] \
-                    + ((kernel[1] - 1) * dilation[1] + 1) - in_size[1]
+                if(op.type == MaceOp.Deconv2D.name):
+                    h = (in_size[0] - 1) * stride[0] + kernel[0] - out_size[0]
+                    w = (in_size[1] - 1) * stride[1] + kernel[1] - out_size[1]
+                else:
+                    h = (out_size[0] - 1) * stride[0] \
+                        + ((kernel[0] - 1) * dilation[0] + 1) - in_size[0]
+                    w = (out_size[1] - 1) * stride[1] \
+                        + ((kernel[1] - 1) * dilation[1] + 1) - in_size[1]
                 top = int(np.floor(h/2))
                 left = int(np.floor(w/2))
                 bottom = h - top
                 right = w - left
                 padding_arg.ints.extend([top, right, bottom, left])
 
+    def change_activation_to_prelu(self):
+        for op in self._model.op:
+            if op.type == ApuOp.Activation.name and \
+                ConverterUtil.get_arg(
+                    op, MaceKeyword.mace_activation_type_str).s == b'PRELU':
+                op.type = ApuOp.PRelu.name
+
     def ensure_bias_vector(self):
         for _op in self._model.op:
-            if _op.type != MaceOp.Conv2D.name and \
-               _op.type != MaceOp.DepthwiseConv2d.name:
-                continue
-            if len(_op.input) != 2:
+            ensure_input = -1
+            if _op.type == MaceOp.Conv2D.name or \
+               _op.type == MaceOp.DepthwiseConv2d.name or \
+               _op.type == MaceOp.FullyConnected.name:
+                ensure_input = 3
+            if _op.type == MaceOp.Deconv2D.name:
+                ensure_input = 4
+            if ensure_input == -1 or len(_op.input) != ensure_input - 1:
                 continue
 
             tensor = self._model.tensors.add()
@@ -522,15 +560,14 @@ class ApuConverter(base_converter.ConverterInterface):
             const_tensor.name = _op.name + '/' + \
                 MaceKeyword.mace_scalar_input_str + ':0'
             const_tensor.dims.extend([1])
+            const_tensor.data_type = _op.output_type[0]
             if _op.output_type[0] == mace_pb2.DT_UINT8 or \
                     _op.output_type[0] == mace_pb2.DT_INT16:
-                const_tensor.data_type = _op.output_type[0]
                 const_tensor.scale = scalar
                 const_tensor.zero_point = 0
                 const_tensor.quantized = True
                 const_tensor.int32_data.extend([1])
             elif _op.output_type[0] == mace_pb2.DT_FLOAT:
-                const_tensor.data_type = mace_pb2.DT_FLOAT
                 const_tensor.float_data.extend([scalar])
             _op.input.extend([const_tensor.name])
             ConverterUtil.del_arg(
diff --git a/tools/python/transform/base_converter.py b/tools/python/transform/base_converter.py
index 4fce320aee7e0a0c67935eccb205efb4bd2df220..696a59551701d12274a0f26d0dc0d95c5820ec60 100644
--- a/tools/python/transform/base_converter.py
+++ b/tools/python/transform/base_converter.py
@@ -340,6 +340,8 @@ class TransformerRule(Enum):
     QUANTIZE_LARGE_WEIGHTS = 43
     TRANSPOSE_SHAPE_TENSOR_TO_PARAM = 44
     TRANSFORM_SINGLE_BN_TO_DEPTHWISE_CONV = 45
+    TRANSFORM_MUL_MAX_TO_PRELU = 46
+    TRANSFORM_EXPAND_DIMS_TO_RESHAPE = 47
 
 
 class ConverterInterface(object):
@@ -610,6 +612,8 @@ class ConverterOption(object):
             if self._device == DeviceType.APU.value:
                 self._transformer_option = self._transformer_option + [
                     TransformerRule.TRANSFORM_SINGLE_BN_TO_DEPTHWISE_CONV,
+                    TransformerRule.TRANSFORM_MUL_MAX_TO_PRELU,
+                    TransformerRule.TRANSFORM_EXPAND_DIMS_TO_RESHAPE,
                 ]
             if self.quantize_large_weights:
                 self._transformer_option = self._transformer_option + [
diff --git a/tools/python/transform/transformer.py b/tools/python/transform/transformer.py
index 73f6b66b7eb455d6b82cf7bc72b6e8afdc555b61..8f032acefbc352b94cd488f8653a3a1392ce5e94 100644
--- a/tools/python/transform/transformer.py
+++ b/tools/python/transform/transformer.py
@@ -117,6 +117,10 @@ class Transformer(base_converter.ConverterInterface):
                 self.quantize_large_weights,
             TransformerRule.TRANSFORM_SINGLE_BN_TO_DEPTHWISE_CONV:
                 self.transform_single_bn_to_depthwise_conv,
+            TransformerRule.TRANSFORM_MUL_MAX_TO_PRELU:
+                self.transform_mul_max_to_prelu,
+            TransformerRule.TRANSFORM_EXPAND_DIMS_TO_RESHAPE:
+                self.transform_expand_dims_to_reshape,
         }
 
         self._option = option
@@ -962,17 +966,23 @@ class Transformer(base_converter.ConverterInterface):
                 or op.type == MaceOp.BatchNorm.name) \
                     and len(self._consumers.get(op.output[0], [])) == 1:
                 consumer_op = self._consumers[op.output[0]][0]
+                fold_consumer = False
                 if consumer_op.type == MaceOp.Activation.name:
                     act_type_arg = ConverterUtil.get_arg(
                         consumer_op, MaceKeyword.mace_activation_type_str)
                     act_type = act_type_arg.s.decode()
-                    if act_type == ActivationType.PRELU.name:
-                        continue
+                    if self._option.device == DeviceType.APU.value:
+                        fold_consumer = (act_type in
+                                         [ActivationType.RELU.name,
+                                          ActivationType.RELUX.name])
+                    else:
+                        fold_consumer = (act_type != ActivationType.PRELU.name)
                     # during quantization, only fold relu/relux
                     if (self._option.quantize_stat or self._option.quantize) \
                             and act_type not in [ActivationType.RELU.name,
                                                  ActivationType.RELUX.name]:
                         continue
+                if fold_consumer:
                     print("Fold activation: %s(%s)" % (op.name, op.type))
                     op.name = consumer_op.name
                     op.output[0] = consumer_op.output[0]
@@ -1032,6 +1042,8 @@ class Transformer(base_converter.ConverterInterface):
         return False
 
     def reshape_fc_weight(self):
+        if self._option.device == DeviceType.APU.value:
+            return
         net = self._model
         filter_format = self.filter_format()
         for op in net.op:
@@ -1348,6 +1360,36 @@ class Transformer(base_converter.ConverterInterface):
                     weight.dims[:] = [1, 1] + list(weight_data.shape)
                     return True
 
+            if self._option.device == DeviceType.APU.value:
+                if op.type == MaceOp.MatMul.name:
+                    transpose_a_arg = ConverterUtil.get_arg(op, MaceKeyword.mace_transpose_a_str)  # noqa
+                    transpose_b_arg = ConverterUtil.get_arg(op, MaceKeyword.mace_transpose_b_str)  # noqa
+                    transpose_a = transpose_a_arg is not None and transpose_a_arg.i == 1  # noqa
+                    transpose_b = transpose_b_arg is not None and transpose_b_arg.i == 1  # noqa
+                    if transpose_a is False and transpose_b is False and \
+                            op.input[1] in self._consts and \
+                            len(self.get_tensor_shape(op.input[0])) == 2 and \
+                            len(self.get_tensor_shape(op.input[1])) == 2:
+                        op.type = MaceOp.FullyConnected.name
+                        del op.arg[:]
+                        rhs = op.input[1]
+                        if rhs in self._consts and \
+                                len(self._consts[rhs].dims) == 2:
+                            arg = ConverterUtil.get_arg(op, MaceKeyword.mace_transpose_b_str)  # noqa
+                            if arg is None:
+                                arg = op.arg.add()
+                                arg.name = MaceKeyword.mace_transpose_b_str
+                                arg.i = 0
+                            if arg.i == 0:
+                                arg.i = 1
+                                filter = self._consts[rhs]
+                                filter_data = np.array(filter.float_data) \
+                                    .reshape(filter.dims)
+                                filter_data = filter_data.transpose(1, 0)
+                                filter.float_data[:] = filter_data.flat
+                                filter.dims[:] = filter_data.shape
+                                six.print_('Transpose matmul weight to shape:',
+                                           filter.dims)
         return False
 
     def update_float_op_data_type(self):
@@ -2476,5 +2518,68 @@ class Transformer(base_converter.ConverterInterface):
                     tensor.dims[:] = [1, 1, 1, tensor.dims[0]]
                     break
             return True
+        return False
 
+    def transform_mul_max_to_prelu(self):
+        if self._option.device != DeviceType.APU.value:
+            return False
+        net = self._model
+        for op in net.op:
+            if op.type != MaceOp.Eltwise.name or \
+                    ConverterUtil.get_arg(
+                        op, MaceKeyword.mace_element_type_str).i \
+                    != EltwiseType.PROD.value or \
+                    op.output[0] not in self._consumers:
+                continue
+            if len(op.input) != 1:
+                continue
+            consumer_op = self._consumers[op.output[0]][0]
+            if consumer_op.type != MaceOp.Eltwise.name or \
+                    ConverterUtil.get_arg(
+                        consumer_op, MaceKeyword.mace_element_type_str).i \
+                    != EltwiseType.MAX.value:
+                continue
+            if op.input[0] not in consumer_op.input:
+                continue
+            float_value_arg = ConverterUtil.get_arg(
+                op, MaceKeyword.mace_scalar_input_str)
+            mace_check(float_value_arg is not None,
+                       op.name + ': ' + MaceKeyword.mace_scalar_input_str +
+                       ' value float should not be None')
+            scalar = float_value_arg.f
+            if scalar < 0:
+                continue
+            if scalar > 1:
+                scalar = 1
+            # Change Mul op to Prelu
+            print("Change mul and max to prelu: %s(%s)" % (op.name, op.type))
+            op.name = consumer_op.name
+            op.output[0] = consumer_op.output[0]
+            alpha_tensor = net.tensors.add()
+            alpha_tensor.name = op.name + '_alpha'
+            alpha_tensor.dims.append(1)
+            alpha_tensor.data_type = mace_pb2.DT_FLOAT
+            alpha_tensor.float_data.extend([scalar])
+            op.input.extend([alpha_tensor.name])
+            ConverterUtil.del_arg(op, MaceKeyword.mace_scalar_input_str)
+            ConverterUtil.del_arg(
+                op, MaceKeyword.mace_scalar_input_index_str)
+            op.type = MaceOp.Activation.name
+            type_arg = op.arg.add()
+            type_arg.name = MaceKeyword.mace_activation_type_str
+            type_arg.s = six.b(ActivationType.PRELU.name)
+            self.replace_quantize_info(op, consumer_op)
+            self.safe_remove_node(consumer_op, op)
+            return True
+        return False
+
+    def transform_expand_dims_to_reshape(self):
+        if self._option.device != DeviceType.APU.value:
+            return False
+        net = self._model
+        for op in net.op:
+            if op.type == MaceOp.ExpandDims.name:
+                op.type = MaceOp.Reshape.name
+                del op.arg[:]
+                return True
         return False