提交 7efde524 编写于 作者: 刘琦

Merge branch 'improve-opencl-errorcode' into 'master'

Improve opencl error code

See merge request !340
...@@ -40,7 +40,7 @@ void *OpenCLAllocator::New(size_t nbytes) const { ...@@ -40,7 +40,7 @@ void *OpenCLAllocator::New(size_t nbytes) const {
cl::Buffer *buffer = new cl::Buffer(OpenCLRuntime::Global()->context(), cl::Buffer *buffer = new cl::Buffer(OpenCLRuntime::Global()->context(),
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
nbytes, nullptr, &error); nbytes, nullptr, &error);
MACE_CHECK(error == CL_SUCCESS) << "error code: " << error; MACE_CHECK_CL_SUCCESS(error);
return static_cast<void *>(buffer); return static_cast<void *>(buffer);
} }
...@@ -57,7 +57,7 @@ void *OpenCLAllocator::NewImage(const std::vector<size_t> &image_shape, ...@@ -57,7 +57,7 @@ void *OpenCLAllocator::NewImage(const std::vector<size_t> &image_shape,
new cl::Image2D(OpenCLRuntime::Global()->context(), new cl::Image2D(OpenCLRuntime::Global()->context(),
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, img_format, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, img_format,
image_shape[0], image_shape[1], 0, nullptr, &error); image_shape[0], image_shape[1], 0, nullptr, &error);
MACE_CHECK(error == CL_SUCCESS) << error << " with image shape: [" MACE_CHECK_CL_SUCCESS(error) << " with image shape: ["
<< image_shape[0] << ", " << image_shape[1] << image_shape[0] << ", " << image_shape[1]
<< "]"; << "]";
...@@ -88,7 +88,7 @@ void *OpenCLAllocator::Map(void *buffer, size_t offset, size_t nbytes) const { ...@@ -88,7 +88,7 @@ void *OpenCLAllocator::Map(void *buffer, size_t offset, size_t nbytes) const {
void *mapped_ptr = void *mapped_ptr =
queue.enqueueMapBuffer(*cl_buffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, queue.enqueueMapBuffer(*cl_buffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
offset, nbytes, nullptr, nullptr, &error); offset, nbytes, nullptr, nullptr, &error);
MACE_CHECK(error == CL_SUCCESS); MACE_CHECK_CL_SUCCESS(error);
return mapped_ptr; return mapped_ptr;
} }
...@@ -107,7 +107,7 @@ void *OpenCLAllocator::MapImage(void *buffer, ...@@ -107,7 +107,7 @@ void *OpenCLAllocator::MapImage(void *buffer,
*cl_image, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, origin, region, *cl_image, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, origin, region,
mapped_image_pitch->data(), mapped_image_pitch->data() + 1, nullptr, mapped_image_pitch->data(), mapped_image_pitch->data() + 1, nullptr,
nullptr, &error); nullptr, &error);
MACE_CHECK(error == CL_SUCCESS) << error; MACE_CHECK_CL_SUCCESS(error);
return mapped_ptr; return mapped_ptr;
} }
...@@ -115,8 +115,9 @@ void *OpenCLAllocator::MapImage(void *buffer, ...@@ -115,8 +115,9 @@ void *OpenCLAllocator::MapImage(void *buffer,
void OpenCLAllocator::Unmap(void *buffer, void *mapped_ptr) const { void OpenCLAllocator::Unmap(void *buffer, void *mapped_ptr) const {
auto cl_buffer = static_cast<cl::Buffer *>(buffer); auto cl_buffer = static_cast<cl::Buffer *>(buffer);
auto queue = OpenCLRuntime::Global()->command_queue(); auto queue = OpenCLRuntime::Global()->command_queue();
MACE_CHECK(queue.enqueueUnmapMemObject(*cl_buffer, mapped_ptr, nullptr, cl_int error = queue.enqueueUnmapMemObject(*cl_buffer, mapped_ptr,
nullptr) == CL_SUCCESS); nullptr, nullptr);
MACE_CHECK_CL_SUCCESS(error);
} }
bool OpenCLAllocator::OnHost() const { return false; } bool OpenCLAllocator::OnHost() const { return false; }
......
...@@ -40,6 +40,135 @@ bool WriteFile(const std::string &filename, ...@@ -40,6 +40,135 @@ bool WriteFile(const std::string &filename,
} // namespace } // namespace
const std::string OpenCLErrorToString(cl_int error) {
switch (error) {
case CL_SUCCESS:
return "CL_SUCCESS";
case CL_DEVICE_NOT_FOUND:
return "CL_DEVICE_NOT_FOUND";
case CL_DEVICE_NOT_AVAILABLE:
return "CL_DEVICE_NOT_AVAILABLE";
case CL_COMPILER_NOT_AVAILABLE:
return "CL_COMPILER_NOT_AVAILABLE";
case CL_MEM_OBJECT_ALLOCATION_FAILURE:
return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
case CL_OUT_OF_RESOURCES:
return "CL_OUT_OF_RESOURCES";
case CL_OUT_OF_HOST_MEMORY:
return "CL_OUT_OF_HOST_MEMORY";
case CL_PROFILING_INFO_NOT_AVAILABLE:
return "CL_PROFILING_INFO_NOT_AVAILABLE";
case CL_MEM_COPY_OVERLAP:
return "CL_MEM_COPY_OVERLAP";
case CL_IMAGE_FORMAT_MISMATCH:
return "CL_IMAGE_FORMAT_MISMATCH";
case CL_IMAGE_FORMAT_NOT_SUPPORTED:
return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
case CL_BUILD_PROGRAM_FAILURE:
return "CL_BUILD_PROGRAM_FAILURE";
case CL_MAP_FAILURE:
return "CL_MAP_FAILURE";
case CL_MISALIGNED_SUB_BUFFER_OFFSET:
return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:
return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
case CL_COMPILE_PROGRAM_FAILURE:
return "CL_COMPILE_PROGRAM_FAILURE";
case CL_LINKER_NOT_AVAILABLE:
return "CL_LINKER_NOT_AVAILABLE";
case CL_LINK_PROGRAM_FAILURE:
return "CL_LINK_PROGRAM_FAILURE";
case CL_DEVICE_PARTITION_FAILED:
return "CL_DEVICE_PARTITION_FAILED";
case CL_KERNEL_ARG_INFO_NOT_AVAILABLE:
return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
case CL_INVALID_VALUE:
return "CL_INVALID_VALUE";
case CL_INVALID_DEVICE_TYPE:
return "CL_INVALID_DEVICE_TYPE";
case CL_INVALID_PLATFORM:
return "CL_INVALID_PLATFORM";
case CL_INVALID_DEVICE:
return "CL_INVALID_DEVICE";
case CL_INVALID_CONTEXT:
return "CL_INVALID_CONTEXT";
case CL_INVALID_QUEUE_PROPERTIES:
return "CL_INVALID_QUEUE_PROPERTIES";
case CL_INVALID_COMMAND_QUEUE:
return "CL_INVALID_COMMAND_QUEUE";
case CL_INVALID_HOST_PTR:
return "CL_INVALID_HOST_PTR";
case CL_INVALID_MEM_OBJECT:
return "CL_INVALID_MEM_OBJECT";
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
case CL_INVALID_IMAGE_SIZE:
return "CL_INVALID_IMAGE_SIZE";
case CL_INVALID_SAMPLER:
return "CL_INVALID_SAMPLER";
case CL_INVALID_BINARY:
return "CL_INVALID_BINARY";
case CL_INVALID_BUILD_OPTIONS:
return "CL_INVALID_BUILD_OPTIONS";
case CL_INVALID_PROGRAM:
return "CL_INVALID_PROGRAM";
case CL_INVALID_PROGRAM_EXECUTABLE:
return "CL_INVALID_PROGRAM_EXECUTABLE";
case CL_INVALID_KERNEL_NAME:
return "CL_INVALID_KERNEL_NAME";
case CL_INVALID_KERNEL_DEFINITION:
return "CL_INVALID_KERNEL_DEFINITION";
case CL_INVALID_KERNEL:
return "CL_INVALID_KERNEL";
case CL_INVALID_ARG_INDEX:
return "CL_INVALID_ARG_INDEX";
case CL_INVALID_ARG_VALUE:
return "CL_INVALID_ARG_VALUE";
case CL_INVALID_ARG_SIZE:
return "CL_INVALID_ARG_SIZE";
case CL_INVALID_KERNEL_ARGS:
return "CL_INVALID_KERNEL_ARGS";
case CL_INVALID_WORK_DIMENSION:
return "CL_INVALID_WORK_DIMENSION";
case CL_INVALID_WORK_GROUP_SIZE:
return "CL_INVALID_WORK_GROUP_SIZE";
case CL_INVALID_WORK_ITEM_SIZE:
return "CL_INVALID_WORK_ITEM_SIZE";
case CL_INVALID_GLOBAL_OFFSET:
return "CL_INVALID_GLOBAL_OFFSET";
case CL_INVALID_EVENT_WAIT_LIST:
return "CL_INVALID_EVENT_WAIT_LIST";
case CL_INVALID_EVENT:
return "CL_INVALID_EVENT";
case CL_INVALID_OPERATION:
return "CL_INVALID_OPERATION";
case CL_INVALID_GL_OBJECT:
return "CL_INVALID_GL_OBJECT";
case CL_INVALID_BUFFER_SIZE:
return "CL_INVALID_BUFFER_SIZE";
case CL_INVALID_MIP_LEVEL:
return "CL_INVALID_MIP_LEVEL";
case CL_INVALID_GLOBAL_WORK_SIZE:
return "CL_INVALID_GLOBAL_WORK_SIZE";
case CL_INVALID_PROPERTY:
return "CL_INVALID_PROPERTY";
case CL_INVALID_IMAGE_DESCRIPTOR:
return "CL_INVALID_IMAGE_DESCRIPTOR";
case CL_INVALID_COMPILER_OPTIONS:
return "CL_INVALID_COMPILER_OPTIONS";
case CL_INVALID_LINKER_OPTIONS:
return "CL_INVALID_LINKER_OPTIONS";
case CL_INVALID_DEVICE_PARTITION_COUNT:
return "CL_INVALID_DEVICE_PARTITION_COUNT";
case CL_INVALID_PIPE_SIZE:
return "CL_INVALID_PIPE_SIZE";
case CL_INVALID_DEVICE_QUEUE:
return "CL_INVALID_DEVICE_QUEUE";
default:
return MakeString("UNKNOWN: ", error);
}
}
void OpenCLProfilingTimer::StartTiming() {} void OpenCLProfilingTimer::StartTiming() {}
void OpenCLProfilingTimer::StopTiming() { void OpenCLProfilingTimer::StopTiming() {
...@@ -183,13 +312,13 @@ OpenCLRuntime::OpenCLRuntime(GPUPerfHint gpu_perf_hint, ...@@ -183,13 +312,13 @@ OpenCLRuntime::OpenCLRuntime(GPUPerfHint gpu_perf_hint,
context_ = std::shared_ptr<cl::Context>( context_ = std::shared_ptr<cl::Context>(
new cl::Context({*device_}, nullptr, nullptr, nullptr, &err)); new cl::Context({*device_}, nullptr, nullptr, nullptr, &err));
} }
MACE_CHECK(err == CL_SUCCESS) << "error code: " << err; MACE_CHECK_CL_SUCCESS(err);
command_queue_ = std::make_shared<cl::CommandQueue>(*context_, command_queue_ = std::make_shared<cl::CommandQueue>(*context_,
*device_, *device_,
properties, properties,
&err); &err);
MACE_CHECK(err == CL_SUCCESS) << "error code: " << err; MACE_CHECK_CL_SUCCESS(err);
const char *kernel_path = getenv("MACE_KERNEL_PATH"); const char *kernel_path = getenv("MACE_KERNEL_PATH");
this->kernel_path_ = this->kernel_path_ =
...@@ -278,7 +407,7 @@ void OpenCLRuntime::BuildProgram(const std::string &program_name, ...@@ -278,7 +407,7 @@ void OpenCLRuntime::BuildProgram(const std::string &program_name,
cl_int err = clGetProgramInfo((*program)(), CL_PROGRAM_BINARY_SIZES, cl_int err = clGetProgramInfo((*program)(), CL_PROGRAM_BINARY_SIZES,
sizeof(size_t) * device_list_size, sizeof(size_t) * device_list_size,
program_binary_sizes.get(), nullptr); program_binary_sizes.get(), nullptr);
MACE_CHECK(err == CL_SUCCESS) << "Error code: " << err; MACE_CHECK_CL_SUCCESS(err);
std::unique_ptr<std::unique_ptr<unsigned char[]>[]> program_binaries( std::unique_ptr<std::unique_ptr<unsigned char[]>[]> program_binaries(
new std::unique_ptr<unsigned char[]>[device_list_size]); new std::unique_ptr<unsigned char[]>[device_list_size]);
for (cl_uint i = 0; i < device_list_size; ++i) { for (cl_uint i = 0; i < device_list_size; ++i) {
...@@ -289,7 +418,7 @@ void OpenCLRuntime::BuildProgram(const std::string &program_name, ...@@ -289,7 +418,7 @@ void OpenCLRuntime::BuildProgram(const std::string &program_name,
err = clGetProgramInfo((*program)(), CL_PROGRAM_BINARIES, err = clGetProgramInfo((*program)(), CL_PROGRAM_BINARIES,
sizeof(unsigned char *) * device_list_size, sizeof(unsigned char *) * device_list_size,
program_binaries.get(), nullptr); program_binaries.get(), nullptr);
MACE_CHECK(err == CL_SUCCESS) << "Error code: " << err; MACE_CHECK_CL_SUCCESS(err);
std::vector<unsigned char> content( std::vector<unsigned char> content(
reinterpret_cast<unsigned char const *>(program_binaries[0].get()), reinterpret_cast<unsigned char const *>(program_binaries[0].get()),
reinterpret_cast<unsigned char const *>(program_binaries[0].get()) + reinterpret_cast<unsigned char const *>(program_binaries[0].get()) +
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/opencl_wrapper.h" #include "mace/core/runtime/opencl/opencl_wrapper.h"
#include "mace/public/mace_runtime.h" #include "mace/public/mace_runtime.h"
#include "mace/utils/string_util.h"
#include "mace/utils/timer.h" #include "mace/utils/timer.h"
namespace mace { namespace mace {
...@@ -26,6 +27,12 @@ enum GPUType { ...@@ -26,6 +27,12 @@ enum GPUType {
UNKNOWN, UNKNOWN,
}; };
const std::string OpenCLErrorToString(cl_int error);
#define MACE_CHECK_CL_SUCCESS(error) \
MACE_CHECK(error == CL_SUCCESS) << "error: " << OpenCLErrorToString(error)
class OpenCLProfilingTimer : public Timer { class OpenCLProfilingTimer : public Timer {
public: public:
explicit OpenCLProfilingTimer(const cl::Event *event) explicit OpenCLProfilingTimer(const cl::Event *event)
......
...@@ -76,7 +76,7 @@ void BiasAddFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input, ...@@ -76,7 +76,7 @@ void BiasAddFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
cl::NDRange(roundup_gws[0], roundup_gws[1], roundup_gws[2]), cl::NDRange(roundup_gws[0], roundup_gws[1], roundup_gws[2]),
cl::NDRange(lws[0], lws[1], lws[2]), nullptr, &event); cl::NDRange(lws[0], lws[1], lws[2]), nullptr, &event);
} }
MACE_CHECK(error == CL_SUCCESS); MACE_CHECK_CL_SUCCESS(error);
if (future != nullptr) { if (future != nullptr) {
future->wait_fn = [runtime, event](CallStats *stats) { future->wait_fn = [runtime, event](CallStats *stats) {
event.wait(); event.wait();
......
...@@ -134,7 +134,7 @@ void BufferToImageFunctor<DeviceType::OPENCL, T>::operator()( ...@@ -134,7 +134,7 @@ void BufferToImageFunctor<DeviceType::OPENCL, T>::operator()(
b2f_kernel, cl::NullRange, cl::NDRange(roundup_gws[0], roundup_gws[1]), b2f_kernel, cl::NullRange, cl::NDRange(roundup_gws[0], roundup_gws[1]),
cl::NDRange(lws[0], lws[1]), nullptr, &event); cl::NDRange(lws[0], lws[1]), nullptr, &event);
} }
MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; MACE_CHECK_CL_SUCCESS(error);
if (future != nullptr) { if (future != nullptr) {
future->wait_fn = [runtime, event](CallStats *stats) { future->wait_fn = [runtime, event](CallStats *stats) {
event.wait(); event.wait();
......
...@@ -6,7 +6,6 @@ ...@@ -6,7 +6,6 @@
#define MACE_KERNELS_OPENCL_CL_COMMON_H_ #define MACE_KERNELS_OPENCL_CL_COMMON_H_
#pragma OPENCL EXTENSION cl_khr_fp16 : enable #pragma OPENCL EXTENSION cl_khr_fp16 : enable
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
#define VEC_DATA_TYPE_STR(data_type, size) data_type##size #define VEC_DATA_TYPE_STR(data_type, size) data_type##size
#define VEC_DATA_TYPE(data_type, size) VEC_DATA_TYPE_STR(data_type, size) #define VEC_DATA_TYPE(data_type, size) VEC_DATA_TYPE_STR(data_type, size)
......
...@@ -99,7 +99,7 @@ void FCWXKernel(cl::Kernel *kernel, ...@@ -99,7 +99,7 @@ void FCWXKernel(cl::Kernel *kernel,
cl_int error = runtime->command_queue().enqueueNDRangeKernel( cl_int error = runtime->command_queue().enqueueNDRangeKernel(
*kernel, cl::NullRange, cl::NDRange((*gws)[0], (*gws)[1], (*gws)[2]), *kernel, cl::NullRange, cl::NDRange((*gws)[0], (*gws)[1], (*gws)[2]),
cl::NDRange((*lws)[0], (*lws)[1], (*lws)[2]), nullptr, &event); cl::NDRange((*lws)[0], (*lws)[1], (*lws)[2]), nullptr, &event);
MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; MACE_CHECK_CL_SUCCESS(error);
if (future != nullptr) { if (future != nullptr) {
future->wait_fn = [runtime, event](CallStats *stats) { future->wait_fn = [runtime, event](CallStats *stats) {
......
...@@ -262,7 +262,7 @@ void TuningOrRun3DKernel(const cl::Kernel &kernel, ...@@ -262,7 +262,7 @@ void TuningOrRun3DKernel(const cl::Kernel &kernel,
cl::NDRange(roundup_gws[0], roundup_gws[1], roundup_gws2), cl::NDRange(roundup_gws[0], roundup_gws[1], roundup_gws2),
cl::NDRange(params[0], params[1], params[2]), nullptr, &event); cl::NDRange(params[0], params[1], params[2]), nullptr, &event);
} }
MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; MACE_CHECK_CL_SUCCESS(error);
} }
} else { } else {
timer->ClearTiming(); timer->ClearTiming();
...@@ -276,7 +276,7 @@ void TuningOrRun3DKernel(const cl::Kernel &kernel, ...@@ -276,7 +276,7 @@ void TuningOrRun3DKernel(const cl::Kernel &kernel,
cl::NDRange(roundup_gws[0], roundup_gws[1], roundup_gws[2]), cl::NDRange(roundup_gws[0], roundup_gws[1], roundup_gws[2]),
cl::NDRange(params[0], params[1], params[2]), nullptr, &event); cl::NDRange(params[0], params[1], params[2]), nullptr, &event);
} }
MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; MACE_CHECK_CL_SUCCESS(error);
timer->AccumulateTiming(); timer->AccumulateTiming();
tuning_result->assign(params.begin(), params.end()); tuning_result->assign(params.begin(), params.end());
...@@ -303,7 +303,7 @@ void TuningOrRun3DKernel(const cl::Kernel &kernel, ...@@ -303,7 +303,7 @@ void TuningOrRun3DKernel(const cl::Kernel &kernel,
cl::NDRange(roundup_gws[0], roundup_gws[1], roundup_gws2), cl::NDRange(roundup_gws[0], roundup_gws[1], roundup_gws2),
cl::NDRange(params[0], params[1], params[2]), nullptr, &event); cl::NDRange(params[0], params[1], params[2]), nullptr, &event);
} }
MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; MACE_CHECK_CL_SUCCESS(error);
timer->AccumulateTiming(); timer->AccumulateTiming();
} }
} }
...@@ -379,7 +379,7 @@ void TuningOrRun2DKernel(const cl::Kernel &kernel, ...@@ -379,7 +379,7 @@ void TuningOrRun2DKernel(const cl::Kernel &kernel,
cl::NDRange(roundup_gws[0], roundup_gws1), cl::NDRange(roundup_gws[0], roundup_gws1),
cl::NDRange(params[0], params[1]), nullptr, &event); cl::NDRange(params[0], params[1]), nullptr, &event);
} }
MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; MACE_CHECK_CL_SUCCESS(error);
} }
} else { } else {
timer->ClearTiming(); timer->ClearTiming();
...@@ -392,7 +392,7 @@ void TuningOrRun2DKernel(const cl::Kernel &kernel, ...@@ -392,7 +392,7 @@ void TuningOrRun2DKernel(const cl::Kernel &kernel,
kernel, cl::NullRange, cl::NDRange(roundup_gws[0], roundup_gws[1]), kernel, cl::NullRange, cl::NDRange(roundup_gws[0], roundup_gws[1]),
cl::NDRange(params[0], params[1]), nullptr, &event); cl::NDRange(params[0], params[1]), nullptr, &event);
} }
MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; MACE_CHECK_CL_SUCCESS(error);
timer->AccumulateTiming(); timer->AccumulateTiming();
tuning_result->assign(params.begin(), params.end()); tuning_result->assign(params.begin(), params.end());
...@@ -419,7 +419,7 @@ void TuningOrRun2DKernel(const cl::Kernel &kernel, ...@@ -419,7 +419,7 @@ void TuningOrRun2DKernel(const cl::Kernel &kernel,
cl::NDRange(roundup_gws[0], roundup_gws1), cl::NDRange(roundup_gws[0], roundup_gws1),
cl::NDRange(params[0], params[1]), nullptr, &event); cl::NDRange(params[0], params[1]), nullptr, &event);
} }
MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; MACE_CHECK_CL_SUCCESS(error);
timer->AccumulateTiming(); timer->AccumulateTiming();
} }
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册