From 312239cce34fbc71679099b3526069aed599a859 Mon Sep 17 00:00:00 2001 From: Yuan Shuai Date: Tue, 3 Mar 2020 22:00:11 +0800 Subject: [PATCH] fix kernel of conv1x1, fc OOM in opencl buffer kernel. test=develop (#3062) --- .../opencl/cl_kernel/buffer/fc_kernel.cl | 8 ++------ lite/kernels/opencl/conv_buffer_compute.cc | 18 ++++++++++++++++-- .../kernels/opencl/conv_buffer_compute_test.cc | 4 ++-- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/lite/backends/opencl/cl_kernel/buffer/fc_kernel.cl b/lite/backends/opencl/cl_kernel/buffer/fc_kernel.cl index b8dbf62c06..c24457558b 100644 --- a/lite/backends/opencl/cl_kernel/buffer/fc_kernel.cl +++ b/lite/backends/opencl/cl_kernel/buffer/fc_kernel.cl @@ -91,11 +91,7 @@ void gemm_batch_naive(__global const CL_DTYPE* a, c0 += a0 * b0; } -#ifdef RELU cur_c[row * N + col] = activation(c0); -#else - cur_c[row * N + col] = c0; -#endif } @@ -103,7 +99,7 @@ void gemm_batch_naive(__global const CL_DTYPE* a, // a: filter_d // b: x_d // c: output_d - +#if 0 // TODO(ysh239): cause CL_OUT_OF_HOST_MEMORY on some devices(such as snapdragon 855) //#define PRINT_KERNEL __kernel void gemm_batch(__global const CL_DTYPE* Aptr, @@ -213,7 +209,7 @@ void gemm_batch(__global const CL_DTYPE* Aptr, } } } - +#endif // fc_gemv_naive: keep for check // used for fc with M = 1 diff --git a/lite/kernels/opencl/conv_buffer_compute.cc b/lite/kernels/opencl/conv_buffer_compute.cc index 0fa607c938..65477e89c7 100644 --- a/lite/kernels/opencl/conv_buffer_compute.cc +++ b/lite/kernels/opencl/conv_buffer_compute.cc @@ -71,7 +71,11 @@ void ConvCompute::PrepareForRun() { if (kernel_h == 1 && kernel_w == 1 && stride_h == 1 && stride_w == 1 && zero_pad && no_dilation && pad_equal) { // conv2d_1x1 + /* TODO(ysh329): CL_OUT_OF_MEMORY when use gemm_batched OpenCL kernel, + use gemm_batched_naive instead. kernel_func_names_.push_back("gemm_batch"); + */ + kernel_func_names_.push_back("gemm_batch_naive"); kernel_func_paths_.push_back("buffer/fc_kernel.cl"); if (relu_fused) { build_options_.push_back("-DCL_DTYPE_float -DRELU"); @@ -84,7 +88,11 @@ void ConvCompute::PrepareForRun() { impl_ = &ConvCompute::Conv2d1x1; } else if (pad_equal) { kernel_func_names_.push_back("im2col"); + /* TODO(ysh329): CL_OUT_OF_MEMORY when use gemm_batched OpenCL kernel, + use gemm_batched_naive instead. kernel_func_names_.push_back("gemm_batch"); + */ + kernel_func_names_.push_back("gemm_batch_naive"); kernel_func_paths_.push_back("buffer/im2col_kernel.cl"); kernel_func_paths_.push_back("buffer/fc_kernel.cl"); build_options_.push_back("-DCL_DTYPE_float"); @@ -258,8 +266,14 @@ void ConvCompute::GemmBatched(cl::Kernel& kernel, const int m, const int n, const int k) { - auto global_work_size = cl::NDRange{static_cast((m + 7) / 8), - static_cast((n + 3) / 4), + /* TODO(ysh329): CL_OUT_OF_MEMORY when use gemm_batch OpenCL kernel, + use gemm_batch_naive instead. + auto global_work_size = cl::NDRange{static_cast((m + 7) / 8), + static_cast((n + 3) / 4), + static_cast(batch_size)}; + */ + auto global_work_size = cl::NDRange{static_cast(m), + static_cast(n), static_cast(batch_size)}; auto local_work_size = cl::NDRange{16, 16}; // cl::NullRange; diff --git a/lite/kernels/opencl/conv_buffer_compute_test.cc b/lite/kernels/opencl/conv_buffer_compute_test.cc index 4d4715fbee..2060bd1f83 100644 --- a/lite/kernels/opencl/conv_buffer_compute_test.cc +++ b/lite/kernels/opencl/conv_buffer_compute_test.cc @@ -168,7 +168,7 @@ void PrintData(std::string name, // buffer // #define PRINT_RESULT -#define LOOP_TEST +// #define LOOP_TEST TEST(conv2d, compute_conv2d_1x1) { // conv2d 1x1 note // kernel/filter size = 1x1, group = 1, pad = 0, stride = 1, dilation = 1 @@ -199,7 +199,7 @@ TEST(conv2d, compute_conv2d_1x1) { // output_dims:1 64 112 112 // filter_dims:64 32 1 1 const bool bias_flag = true; - const bool relu_flag = true; + const std::string relu_flag = "relu"; const int batch_size = 8; const int oc = 64; const int ih = 112; -- GitLab