提交 bec34bb7 编写于 作者: L liuqi

Fix memory optimization bug and add some tuning parameters.

上级 bc576436
......@@ -54,6 +54,7 @@ static void AddN(const std::vector<const Tensor *> &input_tensors,
local_ws[0] = std::min<uint32_t>(width_pixels, kwg_size);
local_ws[1] = std::min<uint32_t>(batch_height_pixels, kwg_size / local_ws[0]);
return {{local_ws[0], local_ws[1]},
{local_ws[1], local_ws[0]},
{kwg_size / 16, 16},
{kwg_size / 32, 32},
{kwg_size / 64, 64},
......
......@@ -67,6 +67,7 @@ void BatchNormFunctor<DeviceType::OPENCL, T>::operator()(
local_ws[2] = std::min<uint32_t>(height * batch, kwg_size / (local_ws[0] * local_ws[1]));
return {{8, 128, 1}, //SNPE size
{local_ws[0], local_ws[1], local_ws[2]},
{local_ws[2], local_ws[1], local_ws[0]},
{kwg_size / 16, 4, 4},
{kwg_size / 32, 4, 8},
{kwg_size / 32, 8, 4},
......
......@@ -57,6 +57,7 @@ static void Concat2(const Tensor *input0,
local_ws[2] = std::min<uint32_t>(height * batch, kwg_size / (local_ws[0] * local_ws[1]));
return {{4, 15, 8}, //SNPE size
{local_ws[0], local_ws[1], local_ws[2]},
{local_ws[2], local_ws[1], local_ws[0]},
{kwg_size / 16, 4, 4},
{kwg_size / 32, 4, 8},
{kwg_size / 32, 8, 4},
......
......@@ -74,6 +74,7 @@ void Conv1x1(const Tensor *input,
local_ws[2] = std::min<uint32_t>(height * batch, kwg_size / (local_ws[0] * local_ws[1]));
return {{4, 15, 8}, //SNPE size
{local_ws[0], local_ws[1], local_ws[2]},
{local_ws[2], local_ws[1], local_ws[0]},
{kwg_size/16, 4, 4},
{kwg_size/32, 4, 8},
{kwg_size/32, 8, 4},
......
......@@ -68,6 +68,7 @@ static void Conv2d3x3S12(const Tensor *input, const Tensor *filter,
local_ws[2] = std::min<uint32_t>(height * batch, kwg_size / (local_ws[0] * local_ws[1]));
return {{4, 15, 8}, //SNPE size
{local_ws[0], local_ws[1], local_ws[2]},
{local_ws[2], local_ws[1], local_ws[0]},
{kwg_size / 16, 4, 4},
{kwg_size / 32, 4, 8},
{kwg_size / 32, 8, 4},
......
......@@ -70,6 +70,7 @@ void Conv2dOpencl(const Tensor *input, const Tensor *filter,
local_ws[2] = std::min<uint32_t>(height * batch, kwg_size / (local_ws[0] * local_ws[1]));
return {{4, 15, 8}, //SNPE size
{local_ws[0], local_ws[1], local_ws[2]},
{local_ws[2], local_ws[1], local_ws[0]},
{kwg_size / 16, 4, 4},
{kwg_size / 32, 4, 8},
{kwg_size / 32, 8, 4},
......
......@@ -69,6 +69,7 @@ static void Pooling(const Tensor *input,
local_ws[2] = std::min<uint32_t>(out_height * batch, kwg_size / (local_ws[0] * local_ws[1]));
return {{4, 15, 8}, //SNPE size
{local_ws[0], local_ws[1], local_ws[2]},
{local_ws[2], local_ws[1], local_ws[0]},
{kwg_size / 16, 4, 4},
{kwg_size / 32, 4, 8},
{kwg_size / 32, 8, 4},
......
......@@ -57,6 +57,7 @@ void ReluFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
local_ws[2] = std::min<uint32_t>(height * batch, kwg_size / (local_ws[0] * local_ws[1]));
return {{4, 15, 8}, //SNPE size
{local_ws[0], local_ws[1], local_ws[2]},
{local_ws[2], local_ws[1], local_ws[0]},
{kwg_size / 16, 4, 4},
{kwg_size / 32, 4, 8},
{kwg_size / 32, 8, 4},
......
......@@ -66,6 +66,7 @@ void ResizeBilinearFunctor<DeviceType::OPENCL, T>::operator()(
local_ws[2] = std::min<uint32_t>(out_height * batch, kwg_size / (local_ws[0] * local_ws[1]));
return {{4, 15, 8}, //SNPE size
{local_ws[0], local_ws[1], local_ws[2]},
{local_ws[2], local_ws[1], local_ws[0]},
{kwg_size / 16, 4, 4},
{kwg_size / 32, 4, 8},
{kwg_size / 32, 8, 4},
......
......@@ -48,6 +48,7 @@ void SoftmaxFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *logits,
local_ws[2] = std::min<uint32_t>(height * batch, kwg_size / (local_ws[0] * local_ws[1]));
return {{4, 15, 8}, //SNPE size
{local_ws[0], local_ws[1], local_ws[2]},
{local_ws[2], local_ws[1], local_ws[0]},
{kwg_size / 16, 4, 4},
{kwg_size / 32, 4, 8},
{kwg_size / 32, 8, 4},
......
......@@ -23,15 +23,12 @@ class MemoryOptimizer(object):
for op in net_def.op:
if self.is_buffer_image_op(op):
continue
tensor_name = self._op_to_tensor(op)
tensor_name = op.output[0]
if tensor_name in consumers:
self.ref_counter[tensor_name] = len(consumers[tensor_name])
else:
self.ref_counter[tensor_name] = 0
def _op_to_tensor(self, op):
return op.name + ':0'
def is_buffer_image_op(self, op):
return op.type == 'BufferToImage' or op.type == 'ImageToBuffer'
......@@ -51,7 +48,7 @@ class MemoryOptimizer(object):
print('WARNING: There is no output shape information to do memory optimization.')
return
op.mem_id = mem_id
self.op_mem[self._op_to_tensor(op)] = mem_id
self.op_mem[op.output[0]] = mem_id
if mem_id not in self.mem_block:
self.mem_block[mem_id] = [0, 0]
mem_size = self.mem_block[mem_id]
......
TF_INPUT_NODE=input
TF_OUTPUT_NODE=GCN/br_result_2/fcn_br
\ No newline at end of file
TF_OUTPUT_NODE=softmax/Reshape_1
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册