提交 78f9a398 编写于 作者: W wuchenghui

fix memory optimization

上级 773ff815
...@@ -212,7 +212,7 @@ class Tensor { ...@@ -212,7 +212,7 @@ class Tensor {
image_shape_.clear(); image_shape_.clear();
if (buffer_ != nullptr) { if (buffer_ != nullptr) {
MACE_CHECK(!has_opencl_image(), "Cannot resize image, use ResizeImage."); MACE_CHECK(!has_opencl_image(), "Cannot resize image, use ResizeImage.");
buffer_->Resize(raw_size()); if (raw_size() > buffer_->size()) buffer_->Resize(raw_size());
} else { } else {
MACE_CHECK(is_buffer_owner_); MACE_CHECK(is_buffer_owner_);
buffer_ = new Buffer(allocator_, raw_size()); buffer_ = new Buffer(allocator_, raw_size());
......
...@@ -170,7 +170,6 @@ void Workspace::CreateOutputTensorBuffer(const NetDef &net_def, ...@@ -170,7 +170,6 @@ void Workspace::CreateOutputTensorBuffer(const NetDef &net_def,
std::unique_ptr<Tensor> tensor std::unique_ptr<Tensor> tensor
(new Tensor(preallocated_allocator_.GetBuffer(mem_ids[i]), dtype)); (new Tensor(preallocated_allocator_.GetBuffer(mem_ids[i]), dtype));
tensor->SetSourceOpName(op.name()); tensor->SetSourceOpName(op.name());
tensor_map_[op.output(i)] = std::move(tensor);
if (device_type == DeviceType::OPENCL) { if (device_type == DeviceType::OPENCL) {
VLOG(3) << "Tensor: " << op.name() << "(" << op.type() << ")" VLOG(3) << "Tensor: " << op.name() << "(" << op.type() << ")"
<< " Mem: " << mem_ids[i] << " Mem: " << mem_ids[i]
...@@ -180,7 +179,12 @@ void Workspace::CreateOutputTensorBuffer(const NetDef &net_def, ...@@ -180,7 +179,12 @@ void Workspace::CreateOutputTensorBuffer(const NetDef &net_def,
<< ", " << ", "
<< dynamic_cast<Image *>(tensor->UnderlyingBuffer()) << dynamic_cast<Image *>(tensor->UnderlyingBuffer())
->image_shape()[1]; ->image_shape()[1];
} else if (device_type == DeviceType::CPU) {
VLOG(3) << "Tensor: " << op.name() << "(" << op.type() << ")"
<< " Mem: " << mem_ids[i]
<< ", Buffer size: " << tensor->UnderlyingBuffer()->size();
} }
tensor_map_[op.output(i)] = std::move(tensor);
} }
} }
} }
......
...@@ -529,7 +529,8 @@ class CaffeConverter(object): ...@@ -529,7 +529,8 @@ class CaffeConverter(object):
# Add filter # Add filter
weight_tensor_name = op.name + '_weight:0' weight_tensor_name = op.name + '_weight:0'
weight_data = op.data[0] # OIHW weight_data = op.data[0] # OIHW
input_shape = op.data[1].shape input_shape = op.get_single_parent().output_shape_map[
op.layer.bottom[0]]
if input_shape[2] > 16 and input_shape[3] > 16: if input_shape[2] > 16 and input_shape[3] > 16:
G = np.array([ G = np.array([
[1.0, 0.0, 0.0], [1.0, 0.0, 0.0],
...@@ -1085,6 +1086,13 @@ class CaffeConverter(object): ...@@ -1085,6 +1086,13 @@ class CaffeConverter(object):
arg.name = 'T' arg.name = 'T'
arg.i = self.dt arg.i = self.dt
input_op = self.ops_map[name]
if input_op.layer is not None:
output_shape = input_op.output_shape_map[input_op.layer.top[0]]
else:
output_shape = input_op.output_shape_map[input_op.name]
self.add_output_shape(op_def, output_shape)
def add_cpu_output_transform(self, names): def add_cpu_output_transform(self, names):
for name in names: for name in names:
output_name = MACE_OUTPUT_NODE_NAME + '_' + name + ":0" output_name = MACE_OUTPUT_NODE_NAME + '_' + name + ":0"
...@@ -1098,6 +1106,13 @@ class CaffeConverter(object): ...@@ -1098,6 +1106,13 @@ class CaffeConverter(object):
dims_arg.name = 'dims' dims_arg.name = 'dims'
dims_arg.ints.extend([0, 2, 3, 1]) # NCHW -> NHWC dims_arg.ints.extend([0, 2, 3, 1]) # NCHW -> NHWC
input_op = self.ops_map[name]
if input_op.layer is not None:
output_shape = input_op.output_shape_map[input_op.layer.top[0]]
else:
output_shape = input_op.output_shape_map[input_op.name]
self.add_output_shape(op_def, output_shape)
def convert(self, input_nodes, input_shapes, output_nodes): def convert(self, input_nodes, input_shapes, output_nodes):
assert self.ops[0].type == 'Input' assert self.ops[0].type == 'Input'
self.add_input_op_shape(input_nodes, input_shapes) self.add_input_op_shape(input_nodes, input_shapes)
......
...@@ -174,6 +174,8 @@ class TFConverter(object): ...@@ -174,6 +174,8 @@ class TFConverter(object):
arg.name = 'T' arg.name = 'T'
arg.i = self.dt arg.i = self.dt
self.add_output_shape(self.ops[name].outputs, op_def)
def add_gpu_output_transform(self, names): def add_gpu_output_transform(self, names):
for name in names: for name in names:
output_name = MACE_OUTPUT_NODE_NAME + '_' + name + ":0" output_name = MACE_OUTPUT_NODE_NAME + '_' + name + ":0"
...@@ -200,6 +202,8 @@ class TFConverter(object): ...@@ -200,6 +202,8 @@ class TFConverter(object):
dims_arg.name = 'dims' dims_arg.name = 'dims'
dims_arg.ints.extend([0, 2, 3, 1]) dims_arg.ints.extend([0, 2, 3, 1])
self.add_output_shape(self.ops[name].outputs, op_def)
@staticmethod @staticmethod
def add_output_shape(outputs, op): def add_output_shape(outputs, op):
output_shapes = [] output_shapes = []
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册