From 201983c64e64a98f59041eb142806d37113cd11e Mon Sep 17 00:00:00 2001
From: liuqi <liuqi10@xiaomi.com>
Date: Wed, 21 Mar 2018 10:21:10 +0800
Subject: [PATCH] Change the some opencl runtime api return type to int64_t

---
 mace/core/runtime/opencl/opencl_runtime.cc    | 12 ++++++------
 mace/core/runtime/opencl/opencl_runtime.h     |  6 +++---
 mace/kernels/opencl/fully_connected_opencl.cc |  6 ++++--
 mace/kernels/opencl/helper.cc                 |  6 ++++--
 4 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/mace/core/runtime/opencl/opencl_runtime.cc b/mace/core/runtime/opencl/opencl_runtime.cc
index b39e28b8..f2f5be1b 100644
--- a/mace/core/runtime/opencl/opencl_runtime.cc
+++ b/mace/core/runtime/opencl/opencl_runtime.cc
@@ -323,21 +323,21 @@ void OpenCLRuntime::GetCallStats(const cl::Event &event, CallStats *stats) {
   }
 }
 
-uint32_t OpenCLRuntime::GetDeviceMaxWorkGroupSize() {
+uint64_t OpenCLRuntime::GetDeviceMaxWorkGroupSize() {
   uint64_t size = 0;
   device_->getInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE, &size);
-  return static_cast<uint32_t>(size);
+  return size;
 }
 
-uint32_t OpenCLRuntime::GetKernelMaxWorkGroupSize(const cl::Kernel &kernel) {
+uint64_t OpenCLRuntime::GetKernelMaxWorkGroupSize(const cl::Kernel &kernel) {
   uint64_t size = 0;
   kernel.getWorkGroupInfo(*device_, CL_KERNEL_WORK_GROUP_SIZE, &size);
-  return static_cast<uint32_t>(size);
+  return size;
 }
 
 // TODO(liuqi): not compatible with mali gpu.
-uint32_t OpenCLRuntime::GetKernelWaveSize(const cl::Kernel &kernel) {
-  uint32_t size = 0;
+uint64_t OpenCLRuntime::GetKernelWaveSize(const cl::Kernel &kernel) {
+  uint64_t size = 0;
   kernel.getWorkGroupInfo(*device_, CL_KERNEL_WAVE_SIZE_QCOM, &size);
   return size;
 }
diff --git a/mace/core/runtime/opencl/opencl_runtime.h b/mace/core/runtime/opencl/opencl_runtime.h
index 69ea4233..f5e2c25b 100644
--- a/mace/core/runtime/opencl/opencl_runtime.h
+++ b/mace/core/runtime/opencl/opencl_runtime.h
@@ -46,9 +46,9 @@ class OpenCLRuntime {
   cl::CommandQueue &command_queue();
 
   void GetCallStats(const cl::Event &event, CallStats *stats);
-  uint32_t GetDeviceMaxWorkGroupSize();
-  uint32_t GetKernelMaxWorkGroupSize(const cl::Kernel &kernel);
-  uint32_t GetKernelWaveSize(const cl::Kernel &kernel);
+  uint64_t GetDeviceMaxWorkGroupSize();
+  uint64_t GetKernelMaxWorkGroupSize(const cl::Kernel &kernel);
+  uint64_t GetKernelWaveSize(const cl::Kernel &kernel);
   cl::Kernel BuildKernel(const std::string &program_name,
                          const std::string &kernel_name,
                          const std::set<std::string> &build_options);
diff --git a/mace/kernels/opencl/fully_connected_opencl.cc b/mace/kernels/opencl/fully_connected_opencl.cc
index 772a6d8d..f4b7b222 100644
--- a/mace/kernels/opencl/fully_connected_opencl.cc
+++ b/mace/kernels/opencl/fully_connected_opencl.cc
@@ -62,11 +62,13 @@ void FCWXKernel(cl::Kernel *kernel,
     const index_t batch = output->dim(0);
     const index_t output_size = output->dim(3);
     const index_t output_blocks = RoundUpDiv4(output_size);
-    const uint32_t wave_size = runtime->GetKernelWaveSize(*kernel);
+    const uint32_t wave_size =
+        static_cast<uint32_t>(runtime->GetKernelWaveSize(*kernel));
 
     *gws = {4, (wave_size / 4), static_cast<uint32_t>(batch * output_blocks)};
 
-    const uint32_t kwg_size = runtime->GetKernelMaxWorkGroupSize(*kernel);
+    const uint32_t kwg_size =
+        static_cast<uint32_t>(runtime->GetKernelMaxWorkGroupSize(*kernel));
     const uint32_t inter_local_blks = kwg_size / ((*gws)[0] * (*gws)[1]);
     *lws = {(*gws)[0], (*gws)[1], inter_local_blks};
   }
diff --git a/mace/kernels/opencl/helper.cc b/mace/kernels/opencl/helper.cc
index e3cadbc6..ee52625a 100644
--- a/mace/kernels/opencl/helper.cc
+++ b/mace/kernels/opencl/helper.cc
@@ -201,7 +201,8 @@ void TuningOrRun3DKernel(const cl::Kernel &kernel,
                          StatsFuture *future) {
   auto runtime = OpenCLRuntime::Global();
   auto params_generator = [&]() -> std::vector<std::vector<uint32_t>> {
-    const uint32_t kwg_size = runtime->GetKernelMaxWorkGroupSize(kernel);
+    const uint32_t kwg_size =
+        static_cast<uint32_t>(runtime->GetKernelMaxWorkGroupSize(kernel));
     std::vector<uint32_t> local_ws(3, 0);
     local_ws[0] = std::min<uint32_t>(gws[0], kwg_size);
     local_ws[1] = std::min<uint32_t>(gws[1], kwg_size / local_ws[0]);
@@ -304,7 +305,8 @@ void TuningOrRun2DKernel(const cl::Kernel &kernel,
                          StatsFuture *future) {
   auto runtime = OpenCLRuntime::Global();
   auto params_generator = [&]() -> std::vector<std::vector<uint32_t>> {
-    const uint32_t kwg_size = runtime->GetKernelMaxWorkGroupSize(kernel);
+    const uint32_t kwg_size =
+        static_cast<uint32_t>(runtime->GetKernelMaxWorkGroupSize(kernel));
     uint32_t local_ws[2];
     local_ws[0] = std::min<uint32_t>(gws[0], kwg_size);
     local_ws[1] = std::min<uint32_t>(gws[1], kwg_size / local_ws[0]);
-- 
GitLab