From 78f9a3985e6d300eb19675ffa6eeefc56aa7187c Mon Sep 17 00:00:00 2001
From: wuchenghui <wuchenghui@xiaomi.com>
Date: Thu, 26 Apr 2018 20:51:52 +0800
Subject: [PATCH] fix memory optimization

---
 mace/core/tensor.h                       |  2 +-
 mace/core/workspace.cc                   |  6 +++++-
 mace/python/tools/caffe_converter_lib.py | 17 ++++++++++++++++-
 mace/python/tools/tf_converter_lib.py    |  4 ++++
 4 files changed, 26 insertions(+), 3 deletions(-)
diff --git a/mace/core/tensor.h b/mace/core/tensor.h
index a72d9461..93f3e93d 100644
--- a/mace/core/tensor.h
+++ b/mace/core/tensor.h
@@ -212,7 +212,7 @@ class Tensor {
     image_shape_.clear();
     if (buffer_ != nullptr) {
       MACE_CHECK(!has_opencl_image(), "Cannot resize image, use ResizeImage.");
-      buffer_->Resize(raw_size());
+      if (raw_size() > buffer_->size()) buffer_->Resize(raw_size());
     } else {
       MACE_CHECK(is_buffer_owner_);
       buffer_ = new Buffer(allocator_, raw_size());
diff --git a/mace/core/workspace.cc b/mace/core/workspace.cc
index c369b15c..14caa649 100644
--- a/mace/core/workspace.cc
+++ b/mace/core/workspace.cc
@@ -170,7 +170,6 @@ void Workspace::CreateOutputTensorBuffer(const NetDef &net_def,
         std::unique_ptr<Tensor> tensor
             (new Tensor(preallocated_allocator_.GetBuffer(mem_ids[i]), dtype));
         tensor->SetSourceOpName(op.name());
-        tensor_map_[op.output(i)] = std::move(tensor);
         if (device_type == DeviceType::OPENCL) {
           VLOG(3) << "Tensor: " << op.name() << "(" << op.type() << ")"
                   << " Mem: "  << mem_ids[i]
@@ -180,7 +179,12 @@ void Workspace::CreateOutputTensorBuffer(const NetDef &net_def,
                   << ", "
                   << dynamic_cast<Image *>(tensor->UnderlyingBuffer())
                       ->image_shape()[1];
+        } else if (device_type == DeviceType::CPU) {
+          VLOG(3) << "Tensor: " << op.name() << "(" << op.type() << ")"
+                  << " Mem: "  << mem_ids[i]
+                  << ", Buffer size: " << tensor->UnderlyingBuffer()->size();
         }
+        tensor_map_[op.output(i)] = std::move(tensor);
       }
     }
   }
diff --git a/mace/python/tools/caffe_converter_lib.py b/mace/python/tools/caffe_converter_lib.py
index cc961c36..7bde783f 100644
--- a/mace/python/tools/caffe_converter_lib.py
+++ b/mace/python/tools/caffe_converter_lib.py
@@ -529,7 +529,8 @@ class CaffeConverter(object):
         # Add filter
         weight_tensor_name = op.name + '_weight:0'
         weight_data = op.data[0]  # OIHW
-        input_shape = op.data[1].shape
+        input_shape = op.get_single_parent().output_shape_map[
+            op.layer.bottom[0]]
         if input_shape[2] > 16 and input_shape[3] > 16:
             G = np.array([
                 [1.0, 0.0, 0.0],
@@ -1085,6 +1086,13 @@ class CaffeConverter(object):
             arg.name = 'T'
             arg.i = self.dt
 
+            input_op = self.ops_map[name]
+            if input_op.layer is not None:
+                output_shape = input_op.output_shape_map[input_op.layer.top[0]]
+            else:
+                output_shape = input_op.output_shape_map[input_op.name]
+            self.add_output_shape(op_def, output_shape)
+
     def add_cpu_output_transform(self, names):
         for name in names:
             output_name = MACE_OUTPUT_NODE_NAME + '_' + name + ":0"
@@ -1098,6 +1106,13 @@ class CaffeConverter(object):
             dims_arg.name = 'dims'
             dims_arg.ints.extend([0, 2, 3, 1])  # NCHW -> NHWC
 
+            input_op = self.ops_map[name]
+            if input_op.layer is not None:
+                output_shape = input_op.output_shape_map[input_op.layer.top[0]]
+            else:
+                output_shape = input_op.output_shape_map[input_op.name]
+            self.add_output_shape(op_def, output_shape)
+
     def convert(self, input_nodes, input_shapes, output_nodes):
         assert self.ops[0].type == 'Input'
         self.add_input_op_shape(input_nodes, input_shapes)
diff --git a/mace/python/tools/tf_converter_lib.py b/mace/python/tools/tf_converter_lib.py
index fc057dd0..354c07b3 100644
--- a/mace/python/tools/tf_converter_lib.py
+++ b/mace/python/tools/tf_converter_lib.py
@@ -174,6 +174,8 @@ class TFConverter(object):
             arg.name = 'T'
             arg.i = self.dt
 
+            self.add_output_shape(self.ops[name].outputs, op_def)
+
     def add_gpu_output_transform(self, names):
         for name in names:
             output_name = MACE_OUTPUT_NODE_NAME + '_' + name + ":0"
@@ -200,6 +202,8 @@ class TFConverter(object):
             dims_arg.name = 'dims'
             dims_arg.ints.extend([0, 2, 3, 1])
 
+            self.add_output_shape(self.ops[name].outputs, op_def)
+
     @staticmethod
     def add_output_shape(outputs, op):
         output_shapes = []
-- 
GitLab