From 201983c64e64a98f59041eb142806d37113cd11e Mon Sep 17 00:00:00 2001 From: liuqi Date: Wed, 21 Mar 2018 10:21:10 +0800 Subject: [PATCH] Change the some opencl runtime api return type to int64_t --- mace/core/runtime/opencl/opencl_runtime.cc | 12 ++++++------ mace/core/runtime/opencl/opencl_runtime.h | 6 +++--- mace/kernels/opencl/fully_connected_opencl.cc | 6 ++++-- mace/kernels/opencl/helper.cc | 6 ++++-- 4 files changed, 17 insertions(+), 13 deletions(-) diff --git a/mace/core/runtime/opencl/opencl_runtime.cc b/mace/core/runtime/opencl/opencl_runtime.cc index b39e28b8..f2f5be1b 100644 --- a/mace/core/runtime/opencl/opencl_runtime.cc +++ b/mace/core/runtime/opencl/opencl_runtime.cc @@ -323,21 +323,21 @@ void OpenCLRuntime::GetCallStats(const cl::Event &event, CallStats *stats) { } } -uint32_t OpenCLRuntime::GetDeviceMaxWorkGroupSize() { +uint64_t OpenCLRuntime::GetDeviceMaxWorkGroupSize() { uint64_t size = 0; device_->getInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE, &size); - return static_cast(size); + return size; } -uint32_t OpenCLRuntime::GetKernelMaxWorkGroupSize(const cl::Kernel &kernel) { +uint64_t OpenCLRuntime::GetKernelMaxWorkGroupSize(const cl::Kernel &kernel) { uint64_t size = 0; kernel.getWorkGroupInfo(*device_, CL_KERNEL_WORK_GROUP_SIZE, &size); - return static_cast(size); + return size; } // TODO(liuqi): not compatible with mali gpu. -uint32_t OpenCLRuntime::GetKernelWaveSize(const cl::Kernel &kernel) { - uint32_t size = 0; +uint64_t OpenCLRuntime::GetKernelWaveSize(const cl::Kernel &kernel) { + uint64_t size = 0; kernel.getWorkGroupInfo(*device_, CL_KERNEL_WAVE_SIZE_QCOM, &size); return size; } diff --git a/mace/core/runtime/opencl/opencl_runtime.h b/mace/core/runtime/opencl/opencl_runtime.h index 69ea4233..f5e2c25b 100644 --- a/mace/core/runtime/opencl/opencl_runtime.h +++ b/mace/core/runtime/opencl/opencl_runtime.h @@ -46,9 +46,9 @@ class OpenCLRuntime { cl::CommandQueue &command_queue(); void GetCallStats(const cl::Event &event, CallStats *stats); - uint32_t GetDeviceMaxWorkGroupSize(); - uint32_t GetKernelMaxWorkGroupSize(const cl::Kernel &kernel); - uint32_t GetKernelWaveSize(const cl::Kernel &kernel); + uint64_t GetDeviceMaxWorkGroupSize(); + uint64_t GetKernelMaxWorkGroupSize(const cl::Kernel &kernel); + uint64_t GetKernelWaveSize(const cl::Kernel &kernel); cl::Kernel BuildKernel(const std::string &program_name, const std::string &kernel_name, const std::set &build_options); diff --git a/mace/kernels/opencl/fully_connected_opencl.cc b/mace/kernels/opencl/fully_connected_opencl.cc index 772a6d8d..f4b7b222 100644 --- a/mace/kernels/opencl/fully_connected_opencl.cc +++ b/mace/kernels/opencl/fully_connected_opencl.cc @@ -62,11 +62,13 @@ void FCWXKernel(cl::Kernel *kernel, const index_t batch = output->dim(0); const index_t output_size = output->dim(3); const index_t output_blocks = RoundUpDiv4(output_size); - const uint32_t wave_size = runtime->GetKernelWaveSize(*kernel); + const uint32_t wave_size = + static_cast(runtime->GetKernelWaveSize(*kernel)); *gws = {4, (wave_size / 4), static_cast(batch * output_blocks)}; - const uint32_t kwg_size = runtime->GetKernelMaxWorkGroupSize(*kernel); + const uint32_t kwg_size = + static_cast(runtime->GetKernelMaxWorkGroupSize(*kernel)); const uint32_t inter_local_blks = kwg_size / ((*gws)[0] * (*gws)[1]); *lws = {(*gws)[0], (*gws)[1], inter_local_blks}; } diff --git a/mace/kernels/opencl/helper.cc b/mace/kernels/opencl/helper.cc index e3cadbc6..ee52625a 100644 --- a/mace/kernels/opencl/helper.cc +++ b/mace/kernels/opencl/helper.cc @@ -201,7 +201,8 @@ void TuningOrRun3DKernel(const cl::Kernel &kernel, StatsFuture *future) { auto runtime = OpenCLRuntime::Global(); auto params_generator = [&]() -> std::vector> { - const uint32_t kwg_size = runtime->GetKernelMaxWorkGroupSize(kernel); + const uint32_t kwg_size = + static_cast(runtime->GetKernelMaxWorkGroupSize(kernel)); std::vector local_ws(3, 0); local_ws[0] = std::min(gws[0], kwg_size); local_ws[1] = std::min(gws[1], kwg_size / local_ws[0]); @@ -304,7 +305,8 @@ void TuningOrRun2DKernel(const cl::Kernel &kernel, StatsFuture *future) { auto runtime = OpenCLRuntime::Global(); auto params_generator = [&]() -> std::vector> { - const uint32_t kwg_size = runtime->GetKernelMaxWorkGroupSize(kernel); + const uint32_t kwg_size = + static_cast(runtime->GetKernelMaxWorkGroupSize(kernel)); uint32_t local_ws[2]; local_ws[0] = std::min(gws[0], kwg_size); local_ws[1] = std::min(gws[1], kwg_size / local_ws[0]); -- GitLab