From 5f19524cbf269d61146f6f7fa5a85662a83a67b9 Mon Sep 17 00:00:00 2001 From: NazgulLee Date: Tue, 10 Sep 2019 12:30:25 +0800 Subject: [PATCH] optimize instancenorm local workgroup computation. test=develop (#2001) --- .../kernel/cl/instancenorm_kernel.cpp | 18 +++--------------- .../kernel/cl/instancenorm_relu_kernel.cpp | 18 +++--------------- 2 files changed, 6 insertions(+), 30 deletions(-) diff --git a/mobile/src/operators/kernel/cl/instancenorm_kernel.cpp b/mobile/src/operators/kernel/cl/instancenorm_kernel.cpp index a8307d05d5..ffd0e6bc92 100644 --- a/mobile/src/operators/kernel/cl/instancenorm_kernel.cpp +++ b/mobile/src/operators/kernel/cl/instancenorm_kernel.cpp @@ -49,22 +49,10 @@ void InstanceNormKernel::Compute( DLOG << local_work_size_info.max_work_item_size1; DLOG << local_work_size_info.max_work_item_size2; - const int max_work_group_size = - std::min(256, static_cast(local_work_size_info.max_work_group_size)); - int local_work_size1 = 1; + int local_work_size1 = + std::min(static_cast(local_work_size_info.max_work_item_size1), + std::min(256, w)); int local_work_size2 = 1; - for (int i = 1; i <= local_work_size_info.max_work_item_size1 && i <= w; - i++) { - for (int j = 1; j <= local_work_size_info.max_work_item_size2 && j <= h; - j++) { - if (i * j <= max_work_group_size) { - if (i * j > local_work_size1 * local_work_size2) { - local_work_size1 = i; - local_work_size2 = j; - } - } - } - } const size_t work_size[3] = {(size_t)(n * c_group), (size_t)local_work_size1, (size_t)local_work_size2}; const size_t local_work_size[3] = {(size_t)1, (size_t)local_work_size1, diff --git a/mobile/src/operators/kernel/cl/instancenorm_relu_kernel.cpp b/mobile/src/operators/kernel/cl/instancenorm_relu_kernel.cpp index 47ab258876..eb4c13fde1 100644 --- a/mobile/src/operators/kernel/cl/instancenorm_relu_kernel.cpp +++ b/mobile/src/operators/kernel/cl/instancenorm_relu_kernel.cpp @@ -52,22 +52,10 @@ void InstanceNormReluKernel::Compute( DLOG << local_work_size_info.max_work_item_size1; DLOG << local_work_size_info.max_work_item_size2; - const int max_work_group_size = - std::min(256, static_cast(local_work_size_info.max_work_group_size)); - int local_work_size1 = 1; + int local_work_size1 = + std::min(static_cast(local_work_size_info.max_work_item_size1), + std::min(256, w)); int local_work_size2 = 1; - for (int i = 1; i <= local_work_size_info.max_work_item_size1 && i <= w; - i++) { - for (int j = 1; j <= local_work_size_info.max_work_item_size2 && j <= h; - j++) { - if (i * j <= max_work_group_size) { - if (i * j > local_work_size1 * local_work_size2) { - local_work_size1 = i; - local_work_size2 = j; - } - } - } - } const size_t work_size[3] = {(size_t)(n * c_group), (size_t)local_work_size1, (size_t)local_work_size2}; const size_t local_work_size[3] = {(size_t)1, (size_t)local_work_size1, -- GitLab