diff --git a/mace/core/memory_optimizer.cc b/mace/core/memory_optimizer.cc
index eacdddd964fb8dcfe19bd08e5984bd08837a571c..39020384a003d9bee4f9700869c9068c3e9e7d15 100644
--- a/mace/core/memory_optimizer.cc
+++ b/mace/core/memory_optimizer.cc
@@ -222,7 +222,11 @@ void MemoryOptimizer::Optimize(
           idle_blocks_.insert(mem_id);
         }
       } else {
-        MACE_CHECK(tensor_ref_count_.at(input_name) >= 0);
+        MACE_CHECK(tensor_ref_count_.at(input_name) >= 0,
+                   "Reference count of tensor ",
+                   input_name,
+                   " is ",
+                   tensor_ref_count_.at(input_name));
       }
     }
   }
diff --git a/mace/core/tensor.h b/mace/core/tensor.h
index 2db8c8ffae5bda5f0d1fe33ee3bc79e155c81a3e..ae999b05df7b7cc1df91cf4a716ea1b48da1b7e8 100644
--- a/mace/core/tensor.h
+++ b/mace/core/tensor.h
@@ -348,9 +348,10 @@ class Tensor {
       MACE_CHECK(image_shape[0] <= buffer_->shape()[0] &&
                      image_shape[1] <= buffer_->shape()[1],
                  "tensor (source op ", name_,
-                 "): current physical image shape: ", buffer_->shape()[0],
-                 ", ", buffer_->shape()[1], " < logical image shape: ",
-                 image_shape[0], ", ", image_shape[1]);
+                 "): current logical image shape:",
+                 image_shape[0], ", ", image_shape[1],
+                 " > physical image shape: ",
+                 buffer_->shape()[0], ", ", buffer_->shape()[1]);
       return MaceStatus::MACE_SUCCESS;
     }
   }
diff --git a/mace/ops/opencl/buffer_transformer.h b/mace/ops/opencl/buffer_transformer.h
index acefd6abdec2e3cfda6b9a25c13a64f2ed87e7b0..e65ae3701efe51068bb81a39e533f170502c792e 100644
--- a/mace/ops/opencl/buffer_transformer.h
+++ b/mace/ops/opencl/buffer_transformer.h
@@ -66,7 +66,7 @@ class OpenCLBufferTransformer {
         VLOG(2) << "Transform CPU Buffer " << input->name()
                 << " to GPU Buffer " << internal_tensor->name()
                 << " with data type " << dt;
-        if (data_format == DataFormat::NCHW && input->shape().size() == 4) {
+        if (data_format == DataFormat::NHWC && input->shape().size() == 4) {
           // 1. (NCHW -> NHWC)
           std::vector<int> dst_dims = {0, 2, 3, 1};
           std::vector<index_t> output_shape =
diff --git a/mace/ops/reshape.cc b/mace/ops/reshape.cc
index bb05a065e4828e90869a4b222625b5cae0e2a369..e0748343288394eaa6b99b904ee468d74471cb3c 100644
--- a/mace/ops/reshape.cc
+++ b/mace/ops/reshape.cc
@@ -77,13 +77,6 @@ class ReshapeOp : public Operation {
     }
 
     Tensor *output = this->Output(OUTPUT);
-    // NCHW -> NHWC
-    if (D == DeviceType::GPU && out_shape.size() == 4) {
-      std::vector<int> dst_dims = {0, 2, 3, 1};
-      std::vector<index_t> out_shape_gpu = TransposeShape<index_t, index_t>(
-          out_shape, dst_dims);
-      out_shape = out_shape_gpu;
-    }
 
     output->ReuseTensorBuffer(*input);
     output->Reshape(out_shape);
diff --git a/mace/python/tools/converter_tool/caffe_converter.py b/mace/python/tools/converter_tool/caffe_converter.py
index bc3e092e91716def277d29309dc5ec0b05389822..6c46f35dd4a09aa3981240750f7c8ce1b5cfb854 100644
--- a/mace/python/tools/converter_tool/caffe_converter.py
+++ b/mace/python/tools/converter_tool/caffe_converter.py
@@ -137,7 +137,6 @@ class CaffeNet(object):
             layer.top[i] = new_name
             self._alias_op_output_name[old_name] = new_name
             self._used_op_output_name.update([new_name])
-
         for input_tensor in layer.bottom:
             if input_tensor not in self._consumers:
                 self._consumers[input_tensor] = []
@@ -248,7 +247,8 @@ class CaffeConverter(base_converter.ConverterInterface):
         for op in ops:
             for i in six.moves.range(len(op.output)):
                 original_output_name = op.output[i].split('#')[0]
-                if original_output_name not in visited:
+                if original_output_name not in visited and\
+                        original_output_name not in self._option.input_nodes:
                     self.replace_input_name(
                         consumers.get(op.output[i], []),
                         op.output[i],