From ea3ed9ae74c3e31c6a3fd56c92090b69eb5c4158 Mon Sep 17 00:00:00 2001
From: liuqi <liuqi10@xiaomi.com>
Date: Mon, 16 Apr 2018 19:08:39 +0800
Subject: [PATCH] Fix max pooling opencl bug for mali gpu.

---
 mace/core/runtime/opencl/opencl_runtime.cc | 24 ++++++++++++++++++++--
 mace/kernels/opencl/cl/pooling.cl          |  4 ----
 mace/ops/conv_2d_test.cc                   |  6 +++---
 3 files changed, 25 insertions(+), 9 deletions(-)
diff --git a/mace/core/runtime/opencl/opencl_runtime.cc b/mace/core/runtime/opencl/opencl_runtime.cc
index 3f931466..b6d51f9a 100644
--- a/mace/core/runtime/opencl/opencl_runtime.cc
+++ b/mace/core/runtime/opencl/opencl_runtime.cc
@@ -154,6 +154,15 @@ const std::string OpenCLErrorToString(cl_int error) {
   }
 }
 
+namespace {
+void CLCallback(const char *buffer,
+                size_t length,
+                size_t final,
+                void *user_data) {
+  fwrite(buffer, 1, length, stdout);
+}
+}
+
 void OpenCLProfilingTimer::StartTiming() {}
 
 void OpenCLProfilingTimer::StopTiming() {
@@ -299,8 +308,19 @@ OpenCLRuntime::OpenCLRuntime(GPUPerfHint gpu_perf_hint,
         new cl::Context({*device_}, context_properties.data(),
                         nullptr, nullptr, &err));
   } else {
-    context_ = std::shared_ptr<cl::Context>(
-        new cl::Context({*device_}, nullptr, nullptr, nullptr, &err));
+    if (is_profiling_enabled_ && gpu_type_ == GPUType::MALI) {
+      std::vector<cl_context_properties> context_properties = {
+          CL_CONTEXT_PLATFORM, (cl_context_properties)default_platform(),
+          CL_PRINTF_CALLBACK_ARM, (cl_context_properties)CLCallback,
+          CL_PRINTF_BUFFERSIZE_ARM, 0x1000, 0
+      };
+      context_ = std::shared_ptr<cl::Context>(
+          new cl::Context({*device_}, context_properties.data(),
+                          nullptr, nullptr, &err));
+    } else {
+      context_ = std::shared_ptr<cl::Context>(
+          new cl::Context({*device_}, nullptr, nullptr, nullptr, &err));
+    }
   }
   MACE_CHECK_CL_SUCCESS(err);
 
diff --git a/mace/kernels/opencl/cl/pooling.cl b/mace/kernels/opencl/cl/pooling.cl
index c76d055f..b0faddb0 100644
--- a/mace/kernels/opencl/cl/pooling.cl
+++ b/mace/kernels/opencl/cl/pooling.cl
@@ -1,10 +1,6 @@
 #include <common.h>
 
-#ifdef FP16
-#define MIN_VALUE -USHRT_MAX
-#else
 #define MIN_VALUE -FLT_MAX
-#endif
 
 inline int calculate_avg_block_size(const int pool_size,
                                     const int pos_h,
diff --git a/mace/ops/conv_2d_test.cc b/mace/ops/conv_2d_test.cc
index a9707ab8..e0edd2a9 100644
--- a/mace/ops/conv_2d_test.cc
+++ b/mace/ops/conv_2d_test.cc
@@ -416,7 +416,7 @@ void TestComplexConvNxNS12(const std::vector<index_t> &shape,
     ImageToBuffer<D, T>(&net, "OutputImage", "OPENCLOutput",
                         kernels::BufferType::IN_OUT_CHANNEL);
     ExpectTensorNear<float>(expected,
-                            *net.GetOutput("OPENCLOutput"), 1e-5, 1e-4);
+                            *net.GetOutput("OPENCLOutput"), 1e-4, 1e-4);
   };
 
   for (int kernel_size : {1, 3, 7}) {
@@ -644,7 +644,7 @@ void TestDilationConvNxN(const std::vector<index_t> &shape,
     ImageToBuffer<D, T>(&net, "OutputImage", "OPENCLOutput",
                         kernels::BufferType::IN_OUT_CHANNEL);
     ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"),
-                            1e-5, 1e-4);
+                            1e-4, 1e-4);
   };
 
   for (int kernel_size : {3}) {
@@ -729,7 +729,7 @@ void TestArbitraryPadConvNxN(const std::vector<index_t> &shape,
     ImageToBuffer<D, T>(&net, "OutputImage", "OPENCLOutput",
                         kernels::BufferType::IN_OUT_CHANNEL);
     ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"),
-                            1e-5, 1e-4);
+                            1e-4, 1e-4);
   };
 
   for (int kernel_size : {3, 5}) {
-- 
GitLab