提交 8f5a79b7 编写于 作者: L Liangliang He

Merge branch 'max-pooling' into 'master'

Fix max pooling opencl bug for mali gpu.

See merge request !387
...@@ -154,6 +154,15 @@ const std::string OpenCLErrorToString(cl_int error) { ...@@ -154,6 +154,15 @@ const std::string OpenCLErrorToString(cl_int error) {
} }
} }
namespace {
void OpenCLPrintfCallback(const char *buffer,
size_t length,
size_t final,
void *user_data) {
fwrite(buffer, 1, length, stdout);
}
}
void OpenCLProfilingTimer::StartTiming() {} void OpenCLProfilingTimer::StartTiming() {}
void OpenCLProfilingTimer::StopTiming() { void OpenCLProfilingTimer::StopTiming() {
...@@ -298,10 +307,21 @@ OpenCLRuntime::OpenCLRuntime(GPUPerfHint gpu_perf_hint, ...@@ -298,10 +307,21 @@ OpenCLRuntime::OpenCLRuntime(GPUPerfHint gpu_perf_hint,
context_ = std::shared_ptr<cl::Context>( context_ = std::shared_ptr<cl::Context>(
new cl::Context({*device_}, context_properties.data(), new cl::Context({*device_}, context_properties.data(),
nullptr, nullptr, &err)); nullptr, nullptr, &err));
} else {
if (is_profiling_enabled_ && gpu_type_ == GPUType::MALI) {
std::vector<cl_context_properties> context_properties = {
CL_CONTEXT_PLATFORM, (cl_context_properties)default_platform(),
CL_PRINTF_CALLBACK_ARM, (cl_context_properties)OpenCLPrintfCallback,
CL_PRINTF_BUFFERSIZE_ARM, 0x1000, 0
};
context_ = std::shared_ptr<cl::Context>(
new cl::Context({*device_}, context_properties.data(),
nullptr, nullptr, &err));
} else { } else {
context_ = std::shared_ptr<cl::Context>( context_ = std::shared_ptr<cl::Context>(
new cl::Context({*device_}, nullptr, nullptr, nullptr, &err)); new cl::Context({*device_}, nullptr, nullptr, nullptr, &err));
} }
}
MACE_CHECK_CL_SUCCESS(err); MACE_CHECK_CL_SUCCESS(err);
command_queue_ = std::make_shared<cl::CommandQueue>(*context_, command_queue_ = std::make_shared<cl::CommandQueue>(*context_,
......
#include <common.h> #include <common.h>
#ifdef FP16
#define MIN_VALUE -USHRT_MAX
#else
#define MIN_VALUE -FLT_MAX #define MIN_VALUE -FLT_MAX
#endif
inline int calculate_avg_block_size(const int pool_size, inline int calculate_avg_block_size(const int pool_size,
const int pos_h, const int pos_h,
......
...@@ -416,7 +416,7 @@ void TestComplexConvNxNS12(const std::vector<index_t> &shape, ...@@ -416,7 +416,7 @@ void TestComplexConvNxNS12(const std::vector<index_t> &shape,
ImageToBuffer<D, T>(&net, "OutputImage", "OPENCLOutput", ImageToBuffer<D, T>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, ExpectTensorNear<float>(expected,
*net.GetOutput("OPENCLOutput"), 1e-5, 1e-4); *net.GetOutput("OPENCLOutput"), 1e-4, 1e-4);
}; };
for (int kernel_size : {1, 3, 7}) { for (int kernel_size : {1, 3, 7}) {
...@@ -644,7 +644,7 @@ void TestDilationConvNxN(const std::vector<index_t> &shape, ...@@ -644,7 +644,7 @@ void TestDilationConvNxN(const std::vector<index_t> &shape,
ImageToBuffer<D, T>(&net, "OutputImage", "OPENCLOutput", ImageToBuffer<D, T>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"),
1e-5, 1e-4); 1e-4, 1e-4);
}; };
for (int kernel_size : {3}) { for (int kernel_size : {3}) {
...@@ -729,7 +729,7 @@ void TestArbitraryPadConvNxN(const std::vector<index_t> &shape, ...@@ -729,7 +729,7 @@ void TestArbitraryPadConvNxN(const std::vector<index_t> &shape,
ImageToBuffer<D, T>(&net, "OutputImage", "OPENCLOutput", ImageToBuffer<D, T>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL); kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"), ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"),
1e-5, 1e-4); 1e-4, 1e-4);
}; };
for (int kernel_size : {3, 5}) { for (int kernel_size : {3, 5}) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册