diff --git a/mace/core/runtime/hexagon/hexagon_dsp_wrapper.cc b/mace/core/runtime/hexagon/hexagon_dsp_wrapper.cc
index e76c20f4bc48671091429432cdd13d929f3ccf85..9d17ab44c18f406c86d1c23fcd60ca22041cb462 100644
--- a/mace/core/runtime/hexagon/hexagon_dsp_wrapper.cc
+++ b/mace/core/runtime/hexagon/hexagon_dsp_wrapper.cc
@@ -114,7 +114,8 @@ HexagonDSPWrapper::HexagonDSPWrapper() {
   if (env_log_execute_time_str.empty()) {
     log_execute_time_ = false;
   } else {
-    log_execute_time_ = static_cast<bool>(std::stoi(env_log_execute_time_str));
+    log_execute_time_ = static_cast<bool>(
+        std::atoi(env_log_execute_time_str.c_str()));
   }
 }
 
diff --git a/mace/core/runtime/opencl/opencl_allocator.cc b/mace/core/runtime/opencl/opencl_allocator.cc
index b0b4e9f477d51fea46b90f5f8f75dd236958983f..d999416959f96b58d9fc4c5a288a5cadb6065910 100644
--- a/mace/core/runtime/opencl/opencl_allocator.cc
+++ b/mace/core/runtime/opencl/opencl_allocator.cc
@@ -74,8 +74,8 @@ MaceStatus OpenCLAllocator::NewImage(const std::vector<size_t> &image_shape,
                                      const DataType dt,
                                      void **result) const {
   MACE_CHECK(image_shape.size() == 2, "Image shape's size must equal 2");
-  VLOG(3) << "Allocate OpenCL image: " << image_shape[0] << ", "
-          << image_shape[1];
+  MACE_LATENCY_LOGGER(1, "Allocate OpenCL image: ",
+                      image_shape[0], ", ", image_shape[1]);
 
   if (ShouldMockRuntimeFailure()) {
     return MaceStatus::MACE_OUT_OF_RESOURCES;
@@ -109,7 +109,7 @@ MaceStatus OpenCLAllocator::NewImage(const std::vector<size_t> &image_shape,
 }
 
 void OpenCLAllocator::Delete(void *buffer) const {
-  VLOG(3) << "Free OpenCL buffer";
+  MACE_LATENCY_LOGGER(1, "Free OpenCL buffer");
   if (buffer != nullptr) {
     cl::Buffer *cl_buffer = static_cast<cl::Buffer *>(buffer);
     delete cl_buffer;
@@ -117,7 +117,7 @@ void OpenCLAllocator::Delete(void *buffer) const {
 }
 
 void OpenCLAllocator::DeleteImage(void *buffer) const {
-  VLOG(3) << "Free OpenCL image";
+  MACE_LATENCY_LOGGER(1, "Free OpenCL image");
   if (buffer != nullptr) {
     cl::Image2D *cl_image = static_cast<cl::Image2D *>(buffer);
     delete cl_image;
@@ -125,7 +125,7 @@ void OpenCLAllocator::DeleteImage(void *buffer) const {
 }
 
 void *OpenCLAllocator::Map(void *buffer, size_t offset, size_t nbytes) const {
-  VLOG(3) << "Map OpenCL buffer";
+  MACE_LATENCY_LOGGER(1, "Map OpenCL buffer");
   auto cl_buffer = static_cast<cl::Buffer *>(buffer);
   auto queue = opencl_runtime_->command_queue();
   // TODO(heliangliang) Non-blocking call
@@ -144,7 +144,7 @@ void *OpenCLAllocator::Map(void *buffer, size_t offset, size_t nbytes) const {
 void *OpenCLAllocator::MapImage(void *buffer,
                                 const std::vector<size_t> &image_shape,
                                 std::vector<size_t> *mapped_image_pitch) const {
-  VLOG(3) << "Map OpenCL Image";
+  MACE_LATENCY_LOGGER(1, "Map OpenCL Image");
   MACE_CHECK(image_shape.size() == 2) << "Just support map 2d image";
   auto cl_image = static_cast<cl::Image2D *>(buffer);
   std::array<size_t, 3> origin = {{0, 0, 0}};
@@ -164,7 +164,7 @@ void *OpenCLAllocator::MapImage(void *buffer,
 }
 
 void OpenCLAllocator::Unmap(void *buffer, void *mapped_ptr) const {
-  VLOG(3) << "Unmap OpenCL buffer/Image";
+  MACE_LATENCY_LOGGER(1, "Unmap OpenCL buffer/Image");
   auto cl_buffer = static_cast<cl::Buffer *>(buffer);
   auto queue = opencl_runtime_->command_queue();
   cl_int error = queue.enqueueUnmapMemObject(*cl_buffer, mapped_ptr,
diff --git a/mace/ops/deconv_2d.cc b/mace/ops/deconv_2d.cc
index 3779174d063d29c14e0c33bb101305d88392826a..98298e0c9b709e51c9c8bda1a260bdd6dc8ed6e5 100644
--- a/mace/ops/deconv_2d.cc
+++ b/mace/ops/deconv_2d.cc
@@ -170,8 +170,8 @@ class Deconv2dOp<DeviceType::CPU, float> : public Deconv2dOpBase {
 template<>
 class Deconv2dOp<DeviceType::GPU, float> : public Deconv2dOpBase {
  public:
-  explicit Deconv2dOp(OpConstructContext *context)
-      : Deconv2dOpBase(context) {
+  explicit Deconv2dOp(OpConstructContext *context) : Deconv2dOpBase(context),
+      dim_(Operation::GetRepeatedArgs<index_t>("dim")) {
     MemoryType mem_type = MemoryType::GPU_IMAGE;
     if (context->GetOpMemoryType() == MemoryType::GPU_IMAGE) {
       kernel_ = make_unique<opencl::image::Deconv2dKernel>();
@@ -219,12 +219,16 @@ class Deconv2dOp<DeviceType::GPU, float> : public Deconv2dOpBase {
 
     std::vector<index_t> out_shape;
     if (output_shape_tensor) {
-      Tensor::MappingGuard out_shape_guard(output_shape_tensor);
-      MACE_CHECK(output_shape_tensor->size() == 4,
-                 "output shape should be 4-dims");
-      out_shape =
-          std::vector<index_t>(output_shape_tensor->data<int32_t>(),
-                               output_shape_tensor->data<int32_t>() + 4);
+      if (dim_.size() < 2) {
+        Tensor::MappingGuard out_shape_guard(output_shape_tensor);
+        MACE_CHECK(output_shape_tensor->size() == 4,
+                   "output shape should be 4-dims");
+        out_shape =
+            std::vector<index_t>(output_shape_tensor->data<int32_t>(),
+                                 output_shape_tensor->data<int32_t>() + 4);
+      } else {
+        out_shape = dim_;
+      }
     }
     std::vector<int> in_paddings;
     std::vector<int> out_paddings;
@@ -249,6 +253,7 @@ class Deconv2dOp<DeviceType::GPU, float> : public Deconv2dOpBase {
   }
 
  private:
+  std::vector<index_t> dim_;
   std::unique_ptr<OpenCLDeconv2dKernel> kernel_;
 };
 #endif  // MACE_ENABLE_OPENCL
diff --git a/mace/ops/opencl/image/resize_nearest_neighbor.cc b/mace/ops/opencl/image/resize_nearest_neighbor.cc
index afb4b151d4ed0ea6ad17030025bf82123adf5d3d..9f9dd1c8d6a29a5c4f24ba33a350134c282c9a75 100644
--- a/mace/ops/opencl/image/resize_nearest_neighbor.cc
+++ b/mace/ops/opencl/image/resize_nearest_neighbor.cc
@@ -25,14 +25,22 @@ MaceStatus ResizeNearestNeighborKernel::Compute(
     OpContext *context,
     const Tensor *input,
     const Tensor *size,
+    const std::vector<index_t> &dims,
     Tensor *output) {
   const index_t batch = input->dim(0);
   const index_t in_height = input->dim(1);
   const index_t in_width = input->dim(2);
   const index_t channels = input->dim(3);
-  Tensor::MappingGuard size_mapper(size);
-  const index_t out_height = size->data<int32_t>()[0];
-  const index_t out_width = size->data<int32_t>()[1];
+  index_t out_height = 0;
+  index_t out_width = 0;
+  if (dims.size() < 2) {
+    Tensor::MappingGuard size_mapper(size);
+    out_height = size->data<int32_t>()[0];
+    out_width = size->data<int32_t>()[1];
+  } else {
+    out_height = dims[0];
+    out_width = dims[1];
+  }
   const index_t channel_blocks = RoundUpDiv4(channels);
 
   const uint32_t gws[3] = {static_cast<uint32_t>(channel_blocks),
diff --git a/mace/ops/opencl/image/resize_nearest_neighbor.h b/mace/ops/opencl/image/resize_nearest_neighbor.h
index 98ef37b28944521123996fbb38f6688d90a277c0..9e2cec61a822e4e86e139e6bfe299771a94794d6 100644
--- a/mace/ops/opencl/image/resize_nearest_neighbor.h
+++ b/mace/ops/opencl/image/resize_nearest_neighbor.h
@@ -73,6 +73,7 @@ class ResizeNearestNeighborKernel : public OpenCLResizeNearestNeighborKernel {
       OpContext *context,
       const Tensor *input,
       const Tensor *size,
+      const std::vector<index_t> &dims,
       Tensor *output) override;
 
  private:
diff --git a/mace/ops/opencl/resize_nearest_neighbor.h b/mace/ops/opencl/resize_nearest_neighbor.h
index b0178827ac6190d413b179b4a98c367d1e5f9c37..c98fc955e476e25db9b7a312fec93d5d13879544 100644
--- a/mace/ops/opencl/resize_nearest_neighbor.h
+++ b/mace/ops/opencl/resize_nearest_neighbor.h
@@ -15,6 +15,8 @@
 #ifndef MACE_OPS_OPENCL_RESIZE_NEAREST_NEIGHBOR_H_
 #define MACE_OPS_OPENCL_RESIZE_NEAREST_NEIGHBOR_H_
 
+#include <vector>
+
 #include "mace/core/types.h"
 #include "mace/public/mace.h"
 #include "mace/utils/math.h"
@@ -31,6 +33,7 @@ class OpenCLResizeNearestNeighborKernel {
       OpContext *context,
       const Tensor *input,
       const Tensor *size,
+      const std::vector<index_t> &dims,
       Tensor *output) = 0;
   MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLResizeNearestNeighborKernel);
 };
diff --git a/mace/ops/resize_nearest_neighbor.cc b/mace/ops/resize_nearest_neighbor.cc
index 89ed473c44e43c5dd4c6415fe2badfd9f738c844..6ac6b9e7157684805a7faf5a45ce9be169ba2af3 100644
--- a/mace/ops/resize_nearest_neighbor.cc
+++ b/mace/ops/resize_nearest_neighbor.cc
@@ -145,7 +145,7 @@ template<>
 class ResizeNearestNeighborOp<DeviceType::GPU, float> : public Operation {
  public:
   explicit ResizeNearestNeighborOp(OpConstructContext *context)
-      : Operation(context) {
+      : Operation(context), dim_(Operation::GetRepeatedArgs<index_t>("dim")) {
     bool align_corners = Operation::GetOptionalArg<bool>(
         "align_corners", false);
     if (context->GetOpMemoryType() == MemoryType::GPU_IMAGE) {
@@ -163,10 +163,11 @@ class ResizeNearestNeighborOp<DeviceType::GPU, float> : public Operation {
                "input must be 4-dimensional and size must be 1-dimensional.",
                input->dim_size(), size->dim_size());
 
-    return kernel_->Compute(context, input, size, output);
+    return kernel_->Compute(context, input, size, dim_, output);
   }
 
  private:
+  std::vector<index_t> dim_;
   std::unique_ptr<OpenCLResizeNearestNeighborKernel> kernel_;
 };
 #endif  // MACE_ENABLE_OPENCL
diff --git a/tools/layers_validate.py b/tools/layers_validate.py
index 893db5e4b4188433f733d24c7cc64a881a55b91b..b6a63b85b284c14bc84487ed3e19483cac493fc8 100644
--- a/tools/layers_validate.py
+++ b/tools/layers_validate.py
@@ -144,8 +144,9 @@ def convert(model_file, output_dir, layers):
             output_info.data_format = data_format
             output_info.dims.extend(op.output_shape[i].dims)
             output_info.data_type = mace_pb2.DT_FLOAT
-            output_info.scale = op.quantize_info[0].scale
-            output_info.zero_point = op.quantize_info[0].zero_point
+            if is_quantize:
+                output_info.scale = op.quantize_info[0].scale
+                output_info.zero_point = op.quantize_info[0].zero_point
             # modify output op
             if is_quantize:
                 output_name = op.output[i]
diff --git a/tools/python/transform/base_converter.py b/tools/python/transform/base_converter.py
index 6db141cedc91e4141984bf2cf2fddbac6524af14..524027999983fcfb544182815e2217a56fe0d6e4 100644
--- a/tools/python/transform/base_converter.py
+++ b/tools/python/transform/base_converter.py
@@ -324,6 +324,7 @@ class TransformerRule(Enum):
     FP16_MATMUL_WEIGHT = 41
     FP16_GATHER_WEIGHT = 42
     QUANTIZE_LARGE_WEIGHTS = 43
+    TRANSPOSE_SHAPE_TENSOR_TO_PARAM = 44
 
 
 class ConverterInterface(object):
@@ -534,6 +535,7 @@ class ConverterOption(object):
                 TransformerRule.TRANSFORM_LSTMCELL_ZEROSTATE,
                 TransformerRule.TRANSFORM_BASIC_LSTMCELL,
                 TransformerRule.TRANSPOSE_RESHAPE_AND_FLATTEN,
+                TransformerRule.TRANSPOSE_SHAPE_TENSOR_TO_PARAM,
                 TransformerRule.FOLD_RESHAPE,
                 TransformerRule.TRANSFORM_MATMUL_TO_FC,
                 # For StoB -> conv -> BtoS -> BN pattern
diff --git a/tools/python/transform/transformer.py b/tools/python/transform/transformer.py
index 2b67b1c9678332e4ccaa13fb4f40ecec44f2e9a1..4b5b9811344b985dff83b0e1f59fd6139c0b26e1 100644
--- a/tools/python/transform/transformer.py
+++ b/tools/python/transform/transformer.py
@@ -99,6 +99,8 @@ class Transformer(base_converter.ConverterInterface):
             TransformerRule.UPDATE_DATA_FORMAT: self.update_data_format,
             TransformerRule.TRANSPOSE_RESHAPE_AND_FLATTEN:
                 self.transform_reshape_and_flatten,
+            TransformerRule.TRANSPOSE_SHAPE_TENSOR_TO_PARAM:
+                self.transform_shape_tensor_to_param,
             TransformerRule.TRANSPOSE_DATA_FORMAT: self.transpose_data_format,
             TransformerRule.CHECK_QUANTIZE_INFO:
                 self.check_quantize_info,
@@ -2119,9 +2121,21 @@ class Transformer(base_converter.ConverterInterface):
                     mace_check(False, "Only support reshape and flatten")
                 shape_tensor.int32_data.extend(dims)
                 op.input.append(shape_tensor.name)
-            if len(op.input) == 2 and dim_arg is None:
-                if shape_tensor is None and op.input[1] in self._consts:
-                    shape_tensor = self._consts[op.input[1]]
+
+    def transform_shape_tensor_to_param(self):
+        kOpTypeInputIdxMap = {
+            MaceOp.ResizeNearestNeighbor.name: 1,
+            MaceOp.Deconv2D.name: 2,
+            MaceOp.Reshape.name: 1,
+        }
+        net = self._model
+        for op in net.op:
+            if op.type not in kOpTypeInputIdxMap:
+                continue
+            shape_idx = kOpTypeInputIdxMap[op.type]
+            dim_arg = ConverterUtil.get_arg(op, MaceKeyword.mace_dim_str)
+            if len(op.input) > shape_idx and dim_arg is None:
+                shape_tensor = self._consts[op.input[shape_idx]]
                 if shape_tensor is not None:
                     dim_arg = op.arg.add()
                     dim_arg.name = MaceKeyword.mace_dim_str