From ea3ed9ae74c3e31c6a3fd56c92090b69eb5c4158 Mon Sep 17 00:00:00 2001 From: liuqi Date: Mon, 16 Apr 2018 19:08:39 +0800 Subject: [PATCH] Fix max pooling opencl bug for mali gpu. --- mace/core/runtime/opencl/opencl_runtime.cc | 24 ++++++++++++++++++++-- mace/kernels/opencl/cl/pooling.cl | 4 ---- mace/ops/conv_2d_test.cc | 6 +++--- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/mace/core/runtime/opencl/opencl_runtime.cc b/mace/core/runtime/opencl/opencl_runtime.cc index 3f931466..b6d51f9a 100644 --- a/mace/core/runtime/opencl/opencl_runtime.cc +++ b/mace/core/runtime/opencl/opencl_runtime.cc @@ -154,6 +154,15 @@ const std::string OpenCLErrorToString(cl_int error) { } } +namespace { +void CLCallback(const char *buffer, + size_t length, + size_t final, + void *user_data) { + fwrite(buffer, 1, length, stdout); +} +} + void OpenCLProfilingTimer::StartTiming() {} void OpenCLProfilingTimer::StopTiming() { @@ -299,8 +308,19 @@ OpenCLRuntime::OpenCLRuntime(GPUPerfHint gpu_perf_hint, new cl::Context({*device_}, context_properties.data(), nullptr, nullptr, &err)); } else { - context_ = std::shared_ptr( - new cl::Context({*device_}, nullptr, nullptr, nullptr, &err)); + if (is_profiling_enabled_ && gpu_type_ == GPUType::MALI) { + std::vector context_properties = { + CL_CONTEXT_PLATFORM, (cl_context_properties)default_platform(), + CL_PRINTF_CALLBACK_ARM, (cl_context_properties)CLCallback, + CL_PRINTF_BUFFERSIZE_ARM, 0x1000, 0 + }; + context_ = std::shared_ptr( + new cl::Context({*device_}, context_properties.data(), + nullptr, nullptr, &err)); + } else { + context_ = std::shared_ptr( + new cl::Context({*device_}, nullptr, nullptr, nullptr, &err)); + } } MACE_CHECK_CL_SUCCESS(err); diff --git a/mace/kernels/opencl/cl/pooling.cl b/mace/kernels/opencl/cl/pooling.cl index c76d055f..b0faddb0 100644 --- a/mace/kernels/opencl/cl/pooling.cl +++ b/mace/kernels/opencl/cl/pooling.cl @@ -1,10 +1,6 @@ #include -#ifdef FP16 -#define MIN_VALUE -USHRT_MAX -#else #define MIN_VALUE -FLT_MAX -#endif inline int calculate_avg_block_size(const int pool_size, const int pos_h, diff --git a/mace/ops/conv_2d_test.cc b/mace/ops/conv_2d_test.cc index a9707ab8..e0edd2a9 100644 --- a/mace/ops/conv_2d_test.cc +++ b/mace/ops/conv_2d_test.cc @@ -416,7 +416,7 @@ void TestComplexConvNxNS12(const std::vector &shape, ImageToBuffer(&net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(expected, - *net.GetOutput("OPENCLOutput"), 1e-5, 1e-4); + *net.GetOutput("OPENCLOutput"), 1e-4, 1e-4); }; for (int kernel_size : {1, 3, 7}) { @@ -644,7 +644,7 @@ void TestDilationConvNxN(const std::vector &shape, ImageToBuffer(&net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), - 1e-5, 1e-4); + 1e-4, 1e-4); }; for (int kernel_size : {3}) { @@ -729,7 +729,7 @@ void TestArbitraryPadConvNxN(const std::vector &shape, ImageToBuffer(&net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), - 1e-5, 1e-4); + 1e-4, 1e-4); }; for (int kernel_size : {3, 5}) { -- GitLab