From b55b537bfc70ca3f3b213131ae796ddf94eb5ae3 Mon Sep 17 00:00:00 2001 From: liuqi Date: Mon, 16 Apr 2018 16:23:44 +0800 Subject: [PATCH] Fix depth_to_space opencl global work size bug. --- mace/kernels/depth_to_space.h | 4 +-- mace/kernels/opencl/depth_to_space_opencl.cc | 33 ++++++++++---------- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/mace/kernels/depth_to_space.h b/mace/kernels/depth_to_space.h index ab557c91..76f3c80e 100644 --- a/mace/kernels/depth_to_space.h +++ b/mace/kernels/depth_to_space.h @@ -108,11 +108,11 @@ struct DepthToSpaceOpFunctor { : block_size_(block_size), d2s_(d2s) {} void operator()(const Tensor *input, Tensor *output, StatsFuture *future); + const int block_size_; + bool d2s_; cl::Kernel kernel_; uint32_t kwg_size_; std::unique_ptr kernel_error_; - const int block_size_; - bool d2s_; std::vector input_shape_; }; diff --git a/mace/kernels/opencl/depth_to_space_opencl.cc b/mace/kernels/opencl/depth_to_space_opencl.cc index d2ea128a..bd7e723d 100644 --- a/mace/kernels/opencl/depth_to_space_opencl.cc +++ b/mace/kernels/opencl/depth_to_space_opencl.cc @@ -22,16 +22,31 @@ void DepthToSpaceOpFunctor::operator()( const char *kernel_name = nullptr; + uint32_t gws[3]; + std::stringstream ss; index_t output_height, output_width, output_depth; - if (d2s_) { output_height = input_height * block_size_; + if (d2s_) { + output_height = input_height * block_size_; output_width = input_width * block_size_; output_depth = input_depth / (block_size_ * block_size_); kernel_name = "depth_to_space"; + + gws[0] = static_cast(RoundUpDiv4(output_depth)); + gws[1] = static_cast(output_width); + gws[2] = static_cast(output_height * batch); + ss << "depth_to_space_opencl_kernel_" << batch << "_" + << output_height << "_" << output_width << "_" << output_depth; } else { output_height = input_height / block_size_; output_width = input_width / block_size_; output_depth = input_depth * block_size_ * block_size_; kernel_name = "space_to_depth"; + + gws[0] = static_cast(RoundUpDiv4(input_depth)); + gws[1] = static_cast(input_width); + gws[2] = static_cast(input_height * batch); + ss << "space_to_depth_opencl_kernel_" << input->dim(0) << "_" + << input->dim(1) << "_" << input->dim(2) << "_" << input->dim(3); } const index_t input_depth_blocks = RoundUpDiv4(input_depth); const index_t output_depth_blocks = RoundUpDiv4(output_depth); @@ -73,23 +88,7 @@ void DepthToSpaceOpFunctor::operator()( static_cast(runtime->GetKernelMaxWorkGroupSize(kernel_)); } - uint32_t gws[3]; - std::stringstream ss; if (!IsVecEqual(input_shape_, input->shape())) { - if (d2s_) { - gws[0] = static_cast(output_depth_blocks); - gws[1] = static_cast(output_width); - gws[2] = static_cast(output_height * batch); - ss << "depth_to_space_opencl_kernel_" << output->dim(0) << "_" - << output->dim(1) << "_" << output->dim(2) << "_" << output->dim(3); - } else { - gws[0] = static_cast(input_depth_blocks); - gws[1] = static_cast(input_width); - gws[2] = static_cast(input_height * batch); - ss << "space_to_depth_opencl_kernel_" << input->dim(0) << "_" - << input->dim(1) << "_" << input->dim(2) << "_" << input->dim(3); - } - uint32_t idx = 0; if (runtime->IsOutOfRangeCheckEnabled()) { kernel_.setArg(idx++, -- GitLab