From 9f3431568429950d51d1f809dc2cf1377f459508 Mon Sep 17 00:00:00 2001 From: xiebaiyuan Date: Tue, 9 Jun 2020 15:44:22 +0800 Subject: [PATCH] [OPENCL] use depthwise3x3d1 to replace s1 enhance hn performance ,test=develop --- .../opencl/cl_kernel/image/depthwise_conv2d_kernel.cl | 2 +- lite/kernels/opencl/conv_image_compute.cc | 8 ++++---- lite/kernels/opencl/conv_image_compute.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lite/backends/opencl/cl_kernel/image/depthwise_conv2d_kernel.cl b/lite/backends/opencl/cl_kernel/image/depthwise_conv2d_kernel.cl index 5626fe6be7..8410262af6 100755 --- a/lite/backends/opencl/cl_kernel/image/depthwise_conv2d_kernel.cl +++ b/lite/backends/opencl/cl_kernel/image/depthwise_conv2d_kernel.cl @@ -244,7 +244,7 @@ __kernel void depth_conv2d_3x3( WRITE_IMG_TYPE(CL_DTYPE_CHAR, output_image, output_pos, output); } -__kernel void depth_conv2d_3x3s1(__private const int ou_ch_blk, +__kernel void depth_conv2d_3x3d1(__private const int ou_ch_blk, __private const int ou_w_blk, __private const int ou_nh, __read_only image2d_t input, diff --git a/lite/kernels/opencl/conv_image_compute.cc b/lite/kernels/opencl/conv_image_compute.cc index fed8171cc2..d60be36322 100644 --- a/lite/kernels/opencl/conv_image_compute.cc +++ b/lite/kernels/opencl/conv_image_compute.cc @@ -149,9 +149,9 @@ void ConvImageCompute::PrepareForRun() { } else if (filter_dims[1] == 1 && x_dims[1] == output_dims[1] && kernel_h == 3 && kernel_w == 3 && groups > 1) { // depth_conv2d_3x3s1, depth_conv2d_3x3 - if (stride_h == 1 && dilations[0] == 1) { - kernel_func_names_.push_back("depth_conv2d_3x3s1"); - impl_ = &ConvImageCompute::DepthwiseConv2d3x3s1; + if (dilations[0] == 1) { + kernel_func_names_.push_back("depth_conv2d_3x3d1"); + impl_ = &ConvImageCompute::DepthwiseConv2d3x3d1; { // depthwise spl gws s1 int c_block = (output_dims[1] + 3) / 4; @@ -1462,7 +1462,7 @@ void ConvImageCompute::Conv2d7x7opt(bool is_turn) { CLRuntime::Global()->command_queue().finish(); } } -void ConvImageCompute::DepthwiseConv2d3x3s1(bool is_turn) { +void ConvImageCompute::DepthwiseConv2d3x3d1(bool is_turn) { auto& context = ctx_->As(); CHECK(context.cl_context() != nullptr); const auto& param = *param_.get_mutable(); diff --git a/lite/kernels/opencl/conv_image_compute.h b/lite/kernels/opencl/conv_image_compute.h index 64276a5721..bdb2a8d8c3 100644 --- a/lite/kernels/opencl/conv_image_compute.h +++ b/lite/kernels/opencl/conv_image_compute.h @@ -63,7 +63,7 @@ class ConvImageCompute : public KernelLite