diff --git a/mace/core/tensor.h b/mace/core/tensor.h
index a72d9461cb82a03f701a6fdda105ee921c76b83d..93f3e93d2064f86d09aaa23207b3798642d5de2f 100644
--- a/mace/core/tensor.h
+++ b/mace/core/tensor.h
@@ -212,7 +212,7 @@ class Tensor {
     image_shape_.clear();
     if (buffer_ != nullptr) {
       MACE_CHECK(!has_opencl_image(), "Cannot resize image, use ResizeImage.");
-      buffer_->Resize(raw_size());
+      if (raw_size() > buffer_->size()) buffer_->Resize(raw_size());
     } else {
       MACE_CHECK(is_buffer_owner_);
       buffer_ = new Buffer(allocator_, raw_size());
diff --git a/mace/core/workspace.cc b/mace/core/workspace.cc
index c369b15cd00d4cde99bbb172d468d1e4b0147c9f..14caa64931d9eee19f92c0c6f13c05dc7f77765e 100644
--- a/mace/core/workspace.cc
+++ b/mace/core/workspace.cc
@@ -170,7 +170,6 @@ void Workspace::CreateOutputTensorBuffer(const NetDef &net_def,
         std::unique_ptr<Tensor> tensor
             (new Tensor(preallocated_allocator_.GetBuffer(mem_ids[i]), dtype));
         tensor->SetSourceOpName(op.name());
-        tensor_map_[op.output(i)] = std::move(tensor);
         if (device_type == DeviceType::OPENCL) {
           VLOG(3) << "Tensor: " << op.name() << "(" << op.type() << ")"
                   << " Mem: "  << mem_ids[i]
@@ -180,7 +179,12 @@ void Workspace::CreateOutputTensorBuffer(const NetDef &net_def,
                   << ", "
                   << dynamic_cast<Image *>(tensor->UnderlyingBuffer())
                       ->image_shape()[1];
+        } else if (device_type == DeviceType::CPU) {
+          VLOG(3) << "Tensor: " << op.name() << "(" << op.type() << ")"
+                  << " Mem: "  << mem_ids[i]
+                  << ", Buffer size: " << tensor->UnderlyingBuffer()->size();
         }
+        tensor_map_[op.output(i)] = std::move(tensor);
       }
     }
   }
diff --git a/mace/python/tools/caffe_converter_lib.py b/mace/python/tools/caffe_converter_lib.py
index cc961c36f334ab2c5080b34cfec41c6e210bbf98..7bde783f440c02b062b65012b3ad36ca1ea67602 100644
--- a/mace/python/tools/caffe_converter_lib.py
+++ b/mace/python/tools/caffe_converter_lib.py
@@ -529,7 +529,8 @@ class CaffeConverter(object):
         # Add filter
         weight_tensor_name = op.name + '_weight:0'
         weight_data = op.data[0]  # OIHW
-        input_shape = op.data[1].shape
+        input_shape = op.get_single_parent().output_shape_map[
+            op.layer.bottom[0]]
         if input_shape[2] > 16 and input_shape[3] > 16:
             G = np.array([
                 [1.0, 0.0, 0.0],
@@ -1085,6 +1086,13 @@ class CaffeConverter(object):
             arg.name = 'T'
             arg.i = self.dt
 
+            input_op = self.ops_map[name]
+            if input_op.layer is not None:
+                output_shape = input_op.output_shape_map[input_op.layer.top[0]]
+            else:
+                output_shape = input_op.output_shape_map[input_op.name]
+            self.add_output_shape(op_def, output_shape)
+
     def add_cpu_output_transform(self, names):
         for name in names:
             output_name = MACE_OUTPUT_NODE_NAME + '_' + name + ":0"
@@ -1098,6 +1106,13 @@ class CaffeConverter(object):
             dims_arg.name = 'dims'
             dims_arg.ints.extend([0, 2, 3, 1])  # NCHW -> NHWC
 
+            input_op = self.ops_map[name]
+            if input_op.layer is not None:
+                output_shape = input_op.output_shape_map[input_op.layer.top[0]]
+            else:
+                output_shape = input_op.output_shape_map[input_op.name]
+            self.add_output_shape(op_def, output_shape)
+
     def convert(self, input_nodes, input_shapes, output_nodes):
         assert self.ops[0].type == 'Input'
         self.add_input_op_shape(input_nodes, input_shapes)
diff --git a/mace/python/tools/tf_converter_lib.py b/mace/python/tools/tf_converter_lib.py
index fc057dd0f29b8509ca403ea158f5f228066eca28..354c07b3d370d9c5dac9b1e42e8bb5ef88144a95 100644
--- a/mace/python/tools/tf_converter_lib.py
+++ b/mace/python/tools/tf_converter_lib.py
@@ -174,6 +174,8 @@ class TFConverter(object):
             arg.name = 'T'
             arg.i = self.dt
 
+            self.add_output_shape(self.ops[name].outputs, op_def)
+
     def add_gpu_output_transform(self, names):
         for name in names:
             output_name = MACE_OUTPUT_NODE_NAME + '_' + name + ":0"
@@ -200,6 +202,8 @@ class TFConverter(object):
             dims_arg.name = 'dims'
             dims_arg.ints.extend([0, 2, 3, 1])
 
+            self.add_output_shape(self.ops[name].outputs, op_def)
+
     @staticmethod
     def add_output_shape(outputs, op):
         output_shapes = []