fix memory optimization

78f9a398 · wuchenghui · 773ff815 · 78f9a398 · 78f9a398 · 78f9a398
4 changed file
--- a/mace/core/tensor.h
+++ b/mace/core/tensor.h
@@ -212,7 +212,7 @@ class Tensor {
    image_shape_.clear();
    if (buffer_ != nullptr) {
      MACE_CHECK(!has_opencl_image(), "Cannot resize image, use ResizeImage.");
-      buffer_->Resize(raw_size());
+      if (raw_size() > buffer_->size()) buffer_->Resize(raw_size());
    } else {
      MACE_CHECK(is_buffer_owner_);
      buffer_ = new Buffer(allocator_, raw_size());

--- a/mace/core/workspace.cc
+++ b/mace/core/workspace.cc
@@ -170,7 +170,6 @@ void Workspace::CreateOutputTensorBuffer(const NetDef &net_def,
        std::unique_ptr<Tensor> tensor
            (new Tensor(preallocated_allocator_.GetBuffer(mem_ids[i]), dtype));
        tensor->SetSourceOpName(op.name());
-        tensor_map_[op.output(i)] = std::move(tensor);
        if (device_type == DeviceType::OPENCL) {
          VLOG(3) << "Tensor: " << op.name() << "(" << op.type() << ")"
                  << " Mem: "  << mem_ids[i]
@@ -180,7 +179,12 @@ void Workspace::CreateOutputTensorBuffer(const NetDef &net_def,
                  << ", "
                  << dynamic_cast<Image *>(tensor->UnderlyingBuffer())
                      ->image_shape()[1];
+        } else if (device_type == DeviceType::CPU) {
+          VLOG(3) << "Tensor: " << op.name() << "(" << op.type() << ")"
+                  << " Mem: "  << mem_ids[i]
+                  << ", Buffer size: " << tensor->UnderlyingBuffer()->size();
        }
+        tensor_map_[op.output(i)] = std::move(tensor);
      }
    }
  }

--- a/mace/python/tools/caffe_converter_lib.py
+++ b/mace/python/tools/caffe_converter_lib.py
@@ -529,7 +529,8 @@ class CaffeConverter(object):
        # Add filter
        weight_tensor_name = op.name + '_weight:0'
        weight_data = op.data[0]  # OIHW
-        input_shape = op.data[1].shape
+        input_shape = op.get_single_parent().output_shape_map[
+            op.layer.bottom[0]]
        if input_shape[2] > 16 and input_shape[3] > 16:
            G = np.array([
                [1.0, 0.0, 0.0],
@@ -1085,6 +1086,13 @@ class CaffeConverter(object):
            arg.name = 'T'
            arg.i = self.dt
+            input_op = self.ops_map[name]
+            if input_op.layer is not None:
+                output_shape = input_op.output_shape_map[input_op.layer.top[0]]
+            else:
+                output_shape = input_op.output_shape_map[input_op.name]
+            self.add_output_shape(op_def, output_shape)
    def add_cpu_output_transform(self, names):
        for name in names:
            output_name = MACE_OUTPUT_NODE_NAME + '_' + name + ":0"
@@ -1098,6 +1106,13 @@ class CaffeConverter(object):
            dims_arg.name = 'dims'
            dims_arg.ints.extend([0, 2, 3, 1])  # NCHW -> NHWC
+            input_op = self.ops_map[name]
+            if input_op.layer is not None:
+                output_shape = input_op.output_shape_map[input_op.layer.top[0]]
+            else:
+                output_shape = input_op.output_shape_map[input_op.name]
+            self.add_output_shape(op_def, output_shape)
    def convert(self, input_nodes, input_shapes, output_nodes):
        assert self.ops[0].type == 'Input'
        self.add_input_op_shape(input_nodes, input_shapes)

--- a/mace/python/tools/tf_converter_lib.py
+++ b/mace/python/tools/tf_converter_lib.py
@@ -174,6 +174,8 @@ class TFConverter(object):
            arg.name = 'T'
            arg.i = self.dt
+            self.add_output_shape(self.ops[name].outputs, op_def)
    def add_gpu_output_transform(self, names):
        for name in names:
            output_name = MACE_OUTPUT_NODE_NAME + '_' + name + ":0"
@@ -200,6 +202,8 @@ class TFConverter(object):
            dims_arg.name = 'dims'
            dims_arg.ints.extend([0, 2, 3, 1])
+            self.add_output_shape(self.ops[name].outputs, op_def)
    @staticmethod
    def add_output_shape(outputs, op):
        output_shapes = []