Add more strategy for convolution opencl default lws.

1794dae4 · liuqi · cf5cae14 · 1794dae4 · 1794dae4 · 1794dae4
7 changed file
--- a/mace/kernels/opencl/concat.cc
+++ b/mace/kernels/opencl/concat.cc
--- a/mace/kernels/opencl/conv_2d_1x1.cc
+++ b/mace/kernels/opencl/conv_2d_1x1.cc
@@ -23,6 +23,8 @@ namespace kernels {
 namespace {
 // (inputs + weights + outputs) * array_size * sizeof(float)
 const uint32_t kernel_cache_size = (4 + 4 + 4) * 4 * 4;
+// TODO(liuqi): Fix the specific value.
+const uint32_t lws_limit = 128;
 std::vector<uint32_t> LocalWS(const uint32_t *gws,
                              const uint32_t kwg_size) {
  std::vector<uint32_t> lws(4, 0);
@@ -33,6 +35,8 @@ std::vector<uint32_t> LocalWS(const uint32_t *gws,
  lws[1] = std::min<uint32_t>(gws[1], kwg_size);
  if (lws[1] >= base) {
    lws[0] = std::min<uint32_t>(gws[0], base);
+  } else if ((1 < lws[1] && lws[1] < base) && gws[0] >= lws_limit) {
+    lws[0] = std::min<uint32_t>(gws[0], base);
  } else {
    lws[0] = gws[0] / 8;
    if (lws[0] < base) {

--- a/mace/kernels/opencl/conv_2d_3x3.cc
+++ b/mace/kernels/opencl/conv_2d_3x3.cc
@@ -21,7 +21,6 @@
 namespace mace {
 namespace kernels {
 namespace {
 // (inputs + weights + outputs) * array_size * sizeof(float)
 const uint32_t kernel_cache_size = (5 + 4 + 5) * 4 * 4;
@@ -157,7 +156,7 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel,
    *prev_input_shape = input->shape();
  }
-  const std::vector<uint32_t> lws = LocalWS(gws, *kwg_size);
+  std::vector<uint32_t> lws = LocalWS(gws, *kwg_size);
  std::string tuning_key =
      Concat("conv2d_3x3_opencl_kernel", output->dim(0),
             output->dim(1), output->dim(2), output->dim(3));

--- a/mace/kernels/opencl/conv_2d_general.cc
+++ b/mace/kernels/opencl/conv_2d_general.cc
--- a/mace/kernels/opencl/depthwise_conv.cc
+++ b/mace/kernels/opencl/depthwise_conv.cc
--- a/mace/kernels/opencl/helper.cc
+++ b/mace/kernels/opencl/helper.cc
@@ -215,7 +215,6 @@ std::vector<uint32_t> Default2DLocalWS(const uint32_t *gws,
  lws[0] = std::min<uint32_t>(base, kwg_size);
  lws[1] = kwg_size / lws[1];
  return lws;
 }
 std::vector<uint32_t> Default3DLocalWS(const uint32_t *gws,

--- a/mace/kernels/opencl/helper.h
+++ b/mace/kernels/opencl/helper.h
@@ -118,7 +118,6 @@ std::vector<uint32_t> Default2DLocalWS(const uint32_t *gws,
                                       const uint32_t kwg_size);
 std::vector<uint32_t> Default3DLocalWS(const uint32_t *gws,
                                       const uint32_t kwg_size);
 }  // namespace kernels
 }  // namespace mace
 #endif  // MACE_KERNELS_OPENCL_HELPER_H_