提交 ea3ed9ae 编写于 作者: L liuqi

Fix max pooling opencl bug for mali gpu.

上级 47bff05a
......@@ -154,6 +154,15 @@ const std::string OpenCLErrorToString(cl_int error) {
}
}
namespace {
void CLCallback(const char *buffer,
size_t length,
size_t final,
void *user_data) {
fwrite(buffer, 1, length, stdout);
}
}
void OpenCLProfilingTimer::StartTiming() {}
void OpenCLProfilingTimer::StopTiming() {
......@@ -298,10 +307,21 @@ OpenCLRuntime::OpenCLRuntime(GPUPerfHint gpu_perf_hint,
context_ = std::shared_ptr<cl::Context>(
new cl::Context({*device_}, context_properties.data(),
nullptr, nullptr, &err));
} else {
if (is_profiling_enabled_ && gpu_type_ == GPUType::MALI) {
std::vector<cl_context_properties> context_properties = {
CL_CONTEXT_PLATFORM, (cl_context_properties)default_platform(),
CL_PRINTF_CALLBACK_ARM, (cl_context_properties)CLCallback,
CL_PRINTF_BUFFERSIZE_ARM, 0x1000, 0
};
context_ = std::shared_ptr<cl::Context>(
new cl::Context({*device_}, context_properties.data(),
nullptr, nullptr, &err));
} else {
context_ = std::shared_ptr<cl::Context>(
new cl::Context({*device_}, nullptr, nullptr, nullptr, &err));
}
}
MACE_CHECK_CL_SUCCESS(err);
command_queue_ = std::make_shared<cl::CommandQueue>(*context_,
......
#include <common.h>
#ifdef FP16
#define MIN_VALUE -USHRT_MAX
#else
#define MIN_VALUE -FLT_MAX
#endif
inline int calculate_avg_block_size(const int pool_size,
const int pos_h,
......
......@@ -416,7 +416,7 @@ void TestComplexConvNxNS12(const std::vector<index_t> &shape,
ImageToBuffer<D, T>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected,
*net.GetOutput("OPENCLOutput"), 1e-5, 1e-4);
*net.GetOutput("OPENCLOutput"), 1e-4, 1e-4);
};
for (int kernel_size : {1, 3, 7}) {
......@@ -644,7 +644,7 @@ void TestDilationConvNxN(const std::vector<index_t> &shape,
ImageToBuffer<D, T>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"),
1e-5, 1e-4);
1e-4, 1e-4);
};
for (int kernel_size : {3}) {
......@@ -729,7 +729,7 @@ void TestArbitraryPadConvNxN(const std::vector<index_t> &shape,
ImageToBuffer<D, T>(&net, "OutputImage", "OPENCLOutput",
kernels::BufferType::IN_OUT_CHANNEL);
ExpectTensorNear<float>(expected, *net.GetOutput("OPENCLOutput"),
1e-5, 1e-4);
1e-4, 1e-4);
};
for (int kernel_size : {3, 5}) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册