diff --git a/mace/kernels/opencl/addn.cc b/mace/kernels/opencl/addn.cc index 514f0d2adc8f790074de789f34b12a7f8baef131..42359f45e9b3cd4eded47aa4ef15efe75bccaf79 100644 --- a/mace/kernels/opencl/addn.cc +++ b/mace/kernels/opencl/addn.cc @@ -54,6 +54,7 @@ static void AddN(const std::vector &input_tensors, local_ws[0] = std::min(width_pixels, kwg_size); local_ws[1] = std::min(batch_height_pixels, kwg_size / local_ws[0]); return {{local_ws[0], local_ws[1]}, + {local_ws[1], local_ws[0]}, {kwg_size / 16, 16}, {kwg_size / 32, 32}, {kwg_size / 64, 64}, diff --git a/mace/kernels/opencl/batch_norm_opencl.cc b/mace/kernels/opencl/batch_norm_opencl.cc index c3dc8445f80e7b51b6956d7d789c5a46e4052dc7..513d73665f2d16e45393464b7faa7765a73763a6 100644 --- a/mace/kernels/opencl/batch_norm_opencl.cc +++ b/mace/kernels/opencl/batch_norm_opencl.cc @@ -67,6 +67,7 @@ void BatchNormFunctor::operator()( local_ws[2] = std::min(height * batch, kwg_size / (local_ws[0] * local_ws[1])); return {{8, 128, 1}, //SNPE size {local_ws[0], local_ws[1], local_ws[2]}, + {local_ws[2], local_ws[1], local_ws[0]}, {kwg_size / 16, 4, 4}, {kwg_size / 32, 4, 8}, {kwg_size / 32, 8, 4}, diff --git a/mace/kernels/opencl/concat.cc b/mace/kernels/opencl/concat.cc index bea536b69bd1191c410abaa348b2cf4ca4108c0b..921b34ce0350aac8647986584d3a8f68bceb248c 100644 --- a/mace/kernels/opencl/concat.cc +++ b/mace/kernels/opencl/concat.cc @@ -57,6 +57,7 @@ static void Concat2(const Tensor *input0, local_ws[2] = std::min(height * batch, kwg_size / (local_ws[0] * local_ws[1])); return {{4, 15, 8}, //SNPE size {local_ws[0], local_ws[1], local_ws[2]}, + {local_ws[2], local_ws[1], local_ws[0]}, {kwg_size / 16, 4, 4}, {kwg_size / 32, 4, 8}, {kwg_size / 32, 8, 4}, diff --git a/mace/kernels/opencl/conv_2d_opencl_1x1.cc b/mace/kernels/opencl/conv_2d_opencl_1x1.cc index d56926a85357978f1857768b55362bc44ed1a5c3..49eea13d6420a066fd7fb3d8bb2cf5ba5fc7a348 100644 --- a/mace/kernels/opencl/conv_2d_opencl_1x1.cc +++ b/mace/kernels/opencl/conv_2d_opencl_1x1.cc @@ -74,6 +74,7 @@ void Conv1x1(const Tensor *input, local_ws[2] = std::min(height * batch, kwg_size / (local_ws[0] * local_ws[1])); return {{4, 15, 8}, //SNPE size {local_ws[0], local_ws[1], local_ws[2]}, + {local_ws[2], local_ws[1], local_ws[0]}, {kwg_size/16, 4, 4}, {kwg_size/32, 4, 8}, {kwg_size/32, 8, 4}, diff --git a/mace/kernels/opencl/conv_2d_opencl_3x3.cc b/mace/kernels/opencl/conv_2d_opencl_3x3.cc index f48c7fc3450928a3245da17e09fa20e3aac9c326..d108dea19f22ebdeda897c750b693792d6943d73 100644 --- a/mace/kernels/opencl/conv_2d_opencl_3x3.cc +++ b/mace/kernels/opencl/conv_2d_opencl_3x3.cc @@ -68,6 +68,7 @@ static void Conv2d3x3S12(const Tensor *input, const Tensor *filter, local_ws[2] = std::min(height * batch, kwg_size / (local_ws[0] * local_ws[1])); return {{4, 15, 8}, //SNPE size {local_ws[0], local_ws[1], local_ws[2]}, + {local_ws[2], local_ws[1], local_ws[0]}, {kwg_size / 16, 4, 4}, {kwg_size / 32, 4, 8}, {kwg_size / 32, 8, 4}, diff --git a/mace/kernels/opencl/conv_2d_opencl_general.cc b/mace/kernels/opencl/conv_2d_opencl_general.cc index 2bd897f0a7eaf9203466016e9308d344fbe2889e..89026e83b489a27bf894496adcec907e2597cb5a 100644 --- a/mace/kernels/opencl/conv_2d_opencl_general.cc +++ b/mace/kernels/opencl/conv_2d_opencl_general.cc @@ -70,6 +70,7 @@ void Conv2dOpencl(const Tensor *input, const Tensor *filter, local_ws[2] = std::min(height * batch, kwg_size / (local_ws[0] * local_ws[1])); return {{4, 15, 8}, //SNPE size {local_ws[0], local_ws[1], local_ws[2]}, + {local_ws[2], local_ws[1], local_ws[0]}, {kwg_size / 16, 4, 4}, {kwg_size / 32, 4, 8}, {kwg_size / 32, 8, 4}, diff --git a/mace/kernels/opencl/pooling_opencl.cc b/mace/kernels/opencl/pooling_opencl.cc index 991dcac36dd3bd4c6ba24cb899d23e567f1f7bf8..6835af69df39236ce545c743410cc9fcf81a0258 100644 --- a/mace/kernels/opencl/pooling_opencl.cc +++ b/mace/kernels/opencl/pooling_opencl.cc @@ -69,6 +69,7 @@ static void Pooling(const Tensor *input, local_ws[2] = std::min(out_height * batch, kwg_size / (local_ws[0] * local_ws[1])); return {{4, 15, 8}, //SNPE size {local_ws[0], local_ws[1], local_ws[2]}, + {local_ws[2], local_ws[1], local_ws[0]}, {kwg_size / 16, 4, 4}, {kwg_size / 32, 4, 8}, {kwg_size / 32, 8, 4}, diff --git a/mace/kernels/opencl/relu_opencl.cc b/mace/kernels/opencl/relu_opencl.cc index 7561b1fa5a59d9b5f351f0f5c1a4403d401b8c1c..831197f132d0afd8dd754c734ec881850d7c1eb7 100644 --- a/mace/kernels/opencl/relu_opencl.cc +++ b/mace/kernels/opencl/relu_opencl.cc @@ -57,6 +57,7 @@ void ReluFunctor::operator()(const Tensor *input, local_ws[2] = std::min(height * batch, kwg_size / (local_ws[0] * local_ws[1])); return {{4, 15, 8}, //SNPE size {local_ws[0], local_ws[1], local_ws[2]}, + {local_ws[2], local_ws[1], local_ws[0]}, {kwg_size / 16, 4, 4}, {kwg_size / 32, 4, 8}, {kwg_size / 32, 8, 4}, diff --git a/mace/kernels/opencl/resize_bilinear_opencl.cc b/mace/kernels/opencl/resize_bilinear_opencl.cc index 588d83c603426a9e23f9e050bdd6568924e28d6f..7d3af2233b4ae70044c687187434711072544531 100644 --- a/mace/kernels/opencl/resize_bilinear_opencl.cc +++ b/mace/kernels/opencl/resize_bilinear_opencl.cc @@ -66,6 +66,7 @@ void ResizeBilinearFunctor::operator()( local_ws[2] = std::min(out_height * batch, kwg_size / (local_ws[0] * local_ws[1])); return {{4, 15, 8}, //SNPE size {local_ws[0], local_ws[1], local_ws[2]}, + {local_ws[2], local_ws[1], local_ws[0]}, {kwg_size / 16, 4, 4}, {kwg_size / 32, 4, 8}, {kwg_size / 32, 8, 4}, diff --git a/mace/kernels/opencl/softmax_opencl.cc b/mace/kernels/opencl/softmax_opencl.cc index 147e53d54af4b155917b1b8bd065e7766bc324f0..407de210abde1bfd38c3bde06d9c294a0d98f587 100644 --- a/mace/kernels/opencl/softmax_opencl.cc +++ b/mace/kernels/opencl/softmax_opencl.cc @@ -48,6 +48,7 @@ void SoftmaxFunctor::operator()(const Tensor *logits, local_ws[2] = std::min(height * batch, kwg_size / (local_ws[0] * local_ws[1])); return {{4, 15, 8}, //SNPE size {local_ws[0], local_ws[1], local_ws[2]}, + {local_ws[2], local_ws[1], local_ws[0]}, {kwg_size / 16, 4, 4}, {kwg_size / 32, 4, 8}, {kwg_size / 32, 8, 4}, diff --git a/mace/python/tools/memory_optimizer.py b/mace/python/tools/memory_optimizer.py index ac507145dc08889682acf214bbffa4d4e0e0b546..109ec4c389eb51adc9771c5f81beba804d7f8699 100644 --- a/mace/python/tools/memory_optimizer.py +++ b/mace/python/tools/memory_optimizer.py @@ -23,15 +23,12 @@ class MemoryOptimizer(object): for op in net_def.op: if self.is_buffer_image_op(op): continue - tensor_name = self._op_to_tensor(op) + tensor_name = op.output[0] if tensor_name in consumers: self.ref_counter[tensor_name] = len(consumers[tensor_name]) else: self.ref_counter[tensor_name] = 0 - def _op_to_tensor(self, op): - return op.name + ':0' - def is_buffer_image_op(self, op): return op.type == 'BufferToImage' or op.type == 'ImageToBuffer' @@ -51,7 +48,7 @@ class MemoryOptimizer(object): print('WARNING: There is no output shape information to do memory optimization.') return op.mem_id = mem_id - self.op_mem[self._op_to_tensor(op)] = mem_id + self.op_mem[op.output[0]] = mem_id if mem_id not in self.mem_block: self.mem_block[mem_id] = [0, 0] mem_size = self.mem_block[mem_id] diff --git a/tools/gcn.config b/tools/gcn.config index 304d7a2931ee288619cb08d99193828d2cd2cc9a..85ea36b3d706f4bc7518bee5c5e484d88dda9566 100644 --- a/tools/gcn.config +++ b/tools/gcn.config @@ -1,2 +1,2 @@ TF_INPUT_NODE=input -TF_OUTPUT_NODE=GCN/br_result_2/fcn_br \ No newline at end of file +TF_OUTPUT_NODE=softmax/Reshape_1 \ No newline at end of file