提交 7bb7468c 编写于 作者: 李寅

Merge branch 'feature_wuch' into 'master'

fix memory optimization

See merge request !430
......@@ -212,7 +212,7 @@ class Tensor {
image_shape_.clear();
if (buffer_ != nullptr) {
MACE_CHECK(!has_opencl_image(), "Cannot resize image, use ResizeImage.");
buffer_->Resize(raw_size());
if (raw_size() > buffer_->size()) buffer_->Resize(raw_size());
} else {
MACE_CHECK(is_buffer_owner_);
buffer_ = new Buffer(allocator_, raw_size());
......
......@@ -170,7 +170,6 @@ void Workspace::CreateOutputTensorBuffer(const NetDef &net_def,
std::unique_ptr<Tensor> tensor
(new Tensor(preallocated_allocator_.GetBuffer(mem_ids[i]), dtype));
tensor->SetSourceOpName(op.name());
tensor_map_[op.output(i)] = std::move(tensor);
if (device_type == DeviceType::OPENCL) {
VLOG(3) << "Tensor: " << op.name() << "(" << op.type() << ")"
<< " Mem: " << mem_ids[i]
......@@ -180,7 +179,12 @@ void Workspace::CreateOutputTensorBuffer(const NetDef &net_def,
<< ", "
<< dynamic_cast<Image *>(tensor->UnderlyingBuffer())
->image_shape()[1];
} else if (device_type == DeviceType::CPU) {
VLOG(3) << "Tensor: " << op.name() << "(" << op.type() << ")"
<< " Mem: " << mem_ids[i]
<< ", Buffer size: " << tensor->UnderlyingBuffer()->size();
}
tensor_map_[op.output(i)] = std::move(tensor);
}
}
}
......
......@@ -529,7 +529,8 @@ class CaffeConverter(object):
# Add filter
weight_tensor_name = op.name + '_weight:0'
weight_data = op.data[0] # OIHW
input_shape = op.data[1].shape
input_shape = op.get_single_parent().output_shape_map[
op.layer.bottom[0]]
if input_shape[2] > 16 and input_shape[3] > 16:
G = np.array([
[1.0, 0.0, 0.0],
......@@ -1085,6 +1086,13 @@ class CaffeConverter(object):
arg.name = 'T'
arg.i = self.dt
input_op = self.ops_map[name]
if input_op.layer is not None:
output_shape = input_op.output_shape_map[input_op.layer.top[0]]
else:
output_shape = input_op.output_shape_map[input_op.name]
self.add_output_shape(op_def, output_shape)
def add_cpu_output_transform(self, names):
for name in names:
output_name = MACE_OUTPUT_NODE_NAME + '_' + name + ":0"
......@@ -1098,6 +1106,13 @@ class CaffeConverter(object):
dims_arg.name = 'dims'
dims_arg.ints.extend([0, 2, 3, 1]) # NCHW -> NHWC
input_op = self.ops_map[name]
if input_op.layer is not None:
output_shape = input_op.output_shape_map[input_op.layer.top[0]]
else:
output_shape = input_op.output_shape_map[input_op.name]
self.add_output_shape(op_def, output_shape)
def convert(self, input_nodes, input_shapes, output_nodes):
assert self.ops[0].type == 'Input'
self.add_input_op_shape(input_nodes, input_shapes)
......
......@@ -174,6 +174,8 @@ class TFConverter(object):
arg.name = 'T'
arg.i = self.dt
self.add_output_shape(self.ops[name].outputs, op_def)
def add_gpu_output_transform(self, names):
for name in names:
output_name = MACE_OUTPUT_NODE_NAME + '_' + name + ":0"
......@@ -200,6 +202,8 @@ class TFConverter(object):
dims_arg.name = 'dims'
dims_arg.ints.extend([0, 2, 3, 1])
self.add_output_shape(self.ops[name].outputs, op_def)
@staticmethod
def add_output_shape(outputs, op):
output_shapes = []
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册