diff --git a/mace/core/runtime/opencl/opencl_allocator.cc b/mace/core/runtime/opencl/opencl_allocator.cc index a9eb73e8dfbd00706ae569af48745abcdfd97d15..e0f63b8689b6838d67c088e98f85247198fe361b 100644 --- a/mace/core/runtime/opencl/opencl_allocator.cc +++ b/mace/core/runtime/opencl/opencl_allocator.cc @@ -40,7 +40,7 @@ void *OpenCLAllocator::New(size_t nbytes) const { cl::Buffer *buffer = new cl::Buffer(OpenCLRuntime::Global()->context(), CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, nbytes, nullptr, &error); - MACE_CHECK(error == CL_SUCCESS) << "error code: " << error; + MACE_CHECK_CL_SUCCESS(error); return static_cast(buffer); } @@ -57,9 +57,9 @@ void *OpenCLAllocator::NewImage(const std::vector &image_shape, new cl::Image2D(OpenCLRuntime::Global()->context(), CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, img_format, image_shape[0], image_shape[1], 0, nullptr, &error); - MACE_CHECK(error == CL_SUCCESS) << error << " with image shape: [" - << image_shape[0] << ", " << image_shape[1] - << "]"; + MACE_CHECK_CL_SUCCESS(error) << " with image shape: [" + << image_shape[0] << ", " << image_shape[1] + << "]"; return cl_image; } @@ -88,7 +88,7 @@ void *OpenCLAllocator::Map(void *buffer, size_t offset, size_t nbytes) const { void *mapped_ptr = queue.enqueueMapBuffer(*cl_buffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, offset, nbytes, nullptr, nullptr, &error); - MACE_CHECK(error == CL_SUCCESS); + MACE_CHECK_CL_SUCCESS(error); return mapped_ptr; } @@ -107,7 +107,7 @@ void *OpenCLAllocator::MapImage(void *buffer, *cl_image, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, origin, region, mapped_image_pitch->data(), mapped_image_pitch->data() + 1, nullptr, nullptr, &error); - MACE_CHECK(error == CL_SUCCESS) << error; + MACE_CHECK_CL_SUCCESS(error); return mapped_ptr; } @@ -115,8 +115,9 @@ void *OpenCLAllocator::MapImage(void *buffer, void OpenCLAllocator::Unmap(void *buffer, void *mapped_ptr) const { auto cl_buffer = static_cast(buffer); auto queue = OpenCLRuntime::Global()->command_queue(); - MACE_CHECK(queue.enqueueUnmapMemObject(*cl_buffer, mapped_ptr, nullptr, - nullptr) == CL_SUCCESS); + cl_int error = queue.enqueueUnmapMemObject(*cl_buffer, mapped_ptr, + nullptr, nullptr); + MACE_CHECK_CL_SUCCESS(error); } bool OpenCLAllocator::OnHost() const { return false; } diff --git a/mace/core/runtime/opencl/opencl_runtime.cc b/mace/core/runtime/opencl/opencl_runtime.cc index dcd7fab4a019c5a49772b7794c1aa17e0bbd4e26..331ae89677acf0cca5059c84ffe9bdfc0e913d01 100644 --- a/mace/core/runtime/opencl/opencl_runtime.cc +++ b/mace/core/runtime/opencl/opencl_runtime.cc @@ -40,6 +40,135 @@ bool WriteFile(const std::string &filename, } // namespace +const std::string OpenCLErrorToString(cl_int error) { + switch(error) { + case CL_SUCCESS: + return "CL_SUCCESS"; + case CL_DEVICE_NOT_FOUND: + return "CL_DEVICE_NOT_FOUND"; + case CL_DEVICE_NOT_AVAILABLE: + return "CL_DEVICE_NOT_AVAILABLE"; + case CL_COMPILER_NOT_AVAILABLE: + return "CL_COMPILER_NOT_AVAILABLE"; + case CL_MEM_OBJECT_ALLOCATION_FAILURE: + return "CL_MEM_OBJECT_ALLOCATION_FAILURE"; + case CL_OUT_OF_RESOURCES: + return "CL_OUT_OF_RESOURCES"; + case CL_OUT_OF_HOST_MEMORY: + return "CL_OUT_OF_HOST_MEMORY"; + case CL_PROFILING_INFO_NOT_AVAILABLE: + return "CL_PROFILING_INFO_NOT_AVAILABLE"; + case CL_MEM_COPY_OVERLAP: + return "CL_MEM_COPY_OVERLAP"; + case CL_IMAGE_FORMAT_MISMATCH: + return "CL_IMAGE_FORMAT_MISMATCH"; + case CL_IMAGE_FORMAT_NOT_SUPPORTED: + return "CL_IMAGE_FORMAT_NOT_SUPPORTED"; + case CL_BUILD_PROGRAM_FAILURE: + return "CL_BUILD_PROGRAM_FAILURE"; + case CL_MAP_FAILURE: + return "CL_MAP_FAILURE"; + case CL_MISALIGNED_SUB_BUFFER_OFFSET: + return "CL_MISALIGNED_SUB_BUFFER_OFFSET"; + case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: + return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST"; + case CL_COMPILE_PROGRAM_FAILURE: + return "CL_COMPILE_PROGRAM_FAILURE"; + case CL_LINKER_NOT_AVAILABLE: + return "CL_LINKER_NOT_AVAILABLE"; + case CL_LINK_PROGRAM_FAILURE: + return "CL_LINK_PROGRAM_FAILURE"; + case CL_DEVICE_PARTITION_FAILED: + return "CL_DEVICE_PARTITION_FAILED"; + case CL_KERNEL_ARG_INFO_NOT_AVAILABLE: + return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE"; + case CL_INVALID_VALUE: + return "CL_INVALID_VALUE"; + case CL_INVALID_DEVICE_TYPE: + return "CL_INVALID_DEVICE_TYPE"; + case CL_INVALID_PLATFORM: + return "CL_INVALID_PLATFORM"; + case CL_INVALID_DEVICE: + return "CL_INVALID_DEVICE"; + case CL_INVALID_CONTEXT: + return "CL_INVALID_CONTEXT"; + case CL_INVALID_QUEUE_PROPERTIES: + return "CL_INVALID_QUEUE_PROPERTIES"; + case CL_INVALID_COMMAND_QUEUE: + return "CL_INVALID_COMMAND_QUEUE"; + case CL_INVALID_HOST_PTR: + return "CL_INVALID_HOST_PTR"; + case CL_INVALID_MEM_OBJECT: + return "CL_INVALID_MEM_OBJECT"; + case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: + return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"; + case CL_INVALID_IMAGE_SIZE: + return "CL_INVALID_IMAGE_SIZE"; + case CL_INVALID_SAMPLER: + return "CL_INVALID_SAMPLER"; + case CL_INVALID_BINARY: + return "CL_INVALID_BINARY"; + case CL_INVALID_BUILD_OPTIONS: + return "CL_INVALID_BUILD_OPTIONS"; + case CL_INVALID_PROGRAM: + return "CL_INVALID_PROGRAM"; + case CL_INVALID_PROGRAM_EXECUTABLE: + return "CL_INVALID_PROGRAM_EXECUTABLE"; + case CL_INVALID_KERNEL_NAME: + return "CL_INVALID_KERNEL_NAME"; + case CL_INVALID_KERNEL_DEFINITION: + return "CL_INVALID_KERNEL_DEFINITION"; + case CL_INVALID_KERNEL: + return "CL_INVALID_KERNEL"; + case CL_INVALID_ARG_INDEX: + return "CL_INVALID_ARG_INDEX"; + case CL_INVALID_ARG_VALUE: + return "CL_INVALID_ARG_VALUE"; + case CL_INVALID_ARG_SIZE: + return "CL_INVALID_ARG_SIZE"; + case CL_INVALID_KERNEL_ARGS: + return "CL_INVALID_KERNEL_ARGS"; + case CL_INVALID_WORK_DIMENSION: + return "CL_INVALID_WORK_DIMENSION"; + case CL_INVALID_WORK_GROUP_SIZE: + return "CL_INVALID_WORK_GROUP_SIZE"; + case CL_INVALID_WORK_ITEM_SIZE: + return "CL_INVALID_WORK_ITEM_SIZE"; + case CL_INVALID_GLOBAL_OFFSET: + return "CL_INVALID_GLOBAL_OFFSET"; + case CL_INVALID_EVENT_WAIT_LIST: + return "CL_INVALID_EVENT_WAIT_LIST"; + case CL_INVALID_EVENT: + return "CL_INVALID_EVENT"; + case CL_INVALID_OPERATION: + return "CL_INVALID_OPERATION"; + case CL_INVALID_GL_OBJECT: + return "CL_INVALID_GL_OBJECT"; + case CL_INVALID_BUFFER_SIZE: + return "CL_INVALID_BUFFER_SIZE"; + case CL_INVALID_MIP_LEVEL: + return "CL_INVALID_MIP_LEVEL"; + case CL_INVALID_GLOBAL_WORK_SIZE: + return "CL_INVALID_GLOBAL_WORK_SIZE"; + case CL_INVALID_PROPERTY: + return "CL_INVALID_PROPERTY"; + case CL_INVALID_IMAGE_DESCRIPTOR: + return "CL_INVALID_IMAGE_DESCRIPTOR"; + case CL_INVALID_COMPILER_OPTIONS: + return "CL_INVALID_COMPILER_OPTIONS"; + case CL_INVALID_LINKER_OPTIONS: + return "CL_INVALID_LINKER_OPTIONS"; + case CL_INVALID_DEVICE_PARTITION_COUNT: + return "CL_INVALID_DEVICE_PARTITION_COUNT"; + case CL_INVALID_PIPE_SIZE: + return "CL_INVALID_PIPE_SIZE"; + case CL_INVALID_DEVICE_QUEUE: + return "CL_INVALID_DEVICE_QUEUE"; + default: + return MakeString("UNKNOWN: ", error); + } +} + void OpenCLProfilingTimer::StartTiming() {} void OpenCLProfilingTimer::StopTiming() { @@ -183,13 +312,13 @@ OpenCLRuntime::OpenCLRuntime(GPUPerfHint gpu_perf_hint, context_ = std::shared_ptr( new cl::Context({*device_}, nullptr, nullptr, nullptr, &err)); } - MACE_CHECK(err == CL_SUCCESS) << "error code: " << err; + MACE_CHECK_CL_SUCCESS(err); command_queue_ = std::make_shared(*context_, *device_, properties, &err); - MACE_CHECK(err == CL_SUCCESS) << "error code: " << err; + MACE_CHECK_CL_SUCCESS(err); const char *kernel_path = getenv("MACE_KERNEL_PATH"); this->kernel_path_ = @@ -278,7 +407,7 @@ void OpenCLRuntime::BuildProgram(const std::string &program_name, cl_int err = clGetProgramInfo((*program)(), CL_PROGRAM_BINARY_SIZES, sizeof(size_t) * device_list_size, program_binary_sizes.get(), nullptr); - MACE_CHECK(err == CL_SUCCESS) << "Error code: " << err; + MACE_CHECK_CL_SUCCESS(err); std::unique_ptr[]> program_binaries( new std::unique_ptr[device_list_size]); for (cl_uint i = 0; i < device_list_size; ++i) { @@ -289,7 +418,7 @@ void OpenCLRuntime::BuildProgram(const std::string &program_name, err = clGetProgramInfo((*program)(), CL_PROGRAM_BINARIES, sizeof(unsigned char *) * device_list_size, program_binaries.get(), nullptr); - MACE_CHECK(err == CL_SUCCESS) << "Error code: " << err; + MACE_CHECK_CL_SUCCESS(err); std::vector content( reinterpret_cast(program_binaries[0].get()), reinterpret_cast(program_binaries[0].get()) + diff --git a/mace/core/runtime/opencl/opencl_runtime.h b/mace/core/runtime/opencl/opencl_runtime.h index 414fa7ed91fa205cbbb5b3d3b06d6d7c91d59fcf..3814eb41e8551363150f3ed3fb777f8d6ea73300 100644 --- a/mace/core/runtime/opencl/opencl_runtime.h +++ b/mace/core/runtime/opencl/opencl_runtime.h @@ -15,6 +15,7 @@ #include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/runtime/opencl/opencl_wrapper.h" #include "mace/public/mace_runtime.h" +#include "mace/utils/string_util.h" #include "mace/utils/timer.h" namespace mace { @@ -26,6 +27,12 @@ enum GPUType { UNKNOWN, }; + +const std::string OpenCLErrorToString(cl_int error); + +#define MACE_CHECK_CL_SUCCESS(error) \ + MACE_CHECK(error == CL_SUCCESS) << "error: " << OpenCLErrorToString(error) + class OpenCLProfilingTimer : public Timer { public: explicit OpenCLProfilingTimer(const cl::Event *event) diff --git a/mace/kernels/opencl/bias_add_opencl.cc b/mace/kernels/opencl/bias_add_opencl.cc index 1197a3590599b4aac96ac97ce0c8fc58915d0046..684474be64632d47bd0b4ae488c22bf687b5146b 100644 --- a/mace/kernels/opencl/bias_add_opencl.cc +++ b/mace/kernels/opencl/bias_add_opencl.cc @@ -76,7 +76,7 @@ void BiasAddFunctor::operator()(const Tensor *input, cl::NDRange(roundup_gws[0], roundup_gws[1], roundup_gws[2]), cl::NDRange(lws[0], lws[1], lws[2]), nullptr, &event); } - MACE_CHECK(error == CL_SUCCESS); + MACE_CHECK_CL_SUCCESS(error); if (future != nullptr) { future->wait_fn = [runtime, event](CallStats *stats) { event.wait(); diff --git a/mace/kernels/opencl/buffer_to_image.cc b/mace/kernels/opencl/buffer_to_image.cc index 8b570b3369aca067d53bb0286c3bf9c354f8f74a..f652696f64b70444e41a5e50c793e7b093fe91c3 100644 --- a/mace/kernels/opencl/buffer_to_image.cc +++ b/mace/kernels/opencl/buffer_to_image.cc @@ -134,7 +134,7 @@ void BufferToImageFunctor::operator()( b2f_kernel, cl::NullRange, cl::NDRange(roundup_gws[0], roundup_gws[1]), cl::NDRange(lws[0], lws[1]), nullptr, &event); } - MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; + MACE_CHECK_CL_SUCCESS(error); if (future != nullptr) { future->wait_fn = [runtime, event](CallStats *stats) { event.wait(); diff --git a/mace/kernels/opencl/cl/common.h b/mace/kernels/opencl/cl/common.h index 30aad065ac12da29c3eb661bcf17069a023b16e6..6e698b5c61c7a7940f58440baf9e48f13c9f34da 100644 --- a/mace/kernels/opencl/cl/common.h +++ b/mace/kernels/opencl/cl/common.h @@ -6,7 +6,6 @@ #define MACE_KERNELS_OPENCL_CL_COMMON_H_ #pragma OPENCL EXTENSION cl_khr_fp16 : enable -#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable #define VEC_DATA_TYPE_STR(data_type, size) data_type##size #define VEC_DATA_TYPE(data_type, size) VEC_DATA_TYPE_STR(data_type, size) diff --git a/mace/kernels/opencl/fully_connected_opencl.cc b/mace/kernels/opencl/fully_connected_opencl.cc index f4b7b2223349e45b9f7d02976a0d4184ea3ee0ad..70af952ee8da6f7cd475f9c9aaa4020ac44f20e5 100644 --- a/mace/kernels/opencl/fully_connected_opencl.cc +++ b/mace/kernels/opencl/fully_connected_opencl.cc @@ -99,7 +99,7 @@ void FCWXKernel(cl::Kernel *kernel, cl_int error = runtime->command_queue().enqueueNDRangeKernel( *kernel, cl::NullRange, cl::NDRange((*gws)[0], (*gws)[1], (*gws)[2]), cl::NDRange((*lws)[0], (*lws)[1], (*lws)[2]), nullptr, &event); - MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; + MACE_CHECK_CL_SUCCESS(error); if (future != nullptr) { future->wait_fn = [runtime, event](CallStats *stats) { diff --git a/mace/kernels/opencl/helper.cc b/mace/kernels/opencl/helper.cc index 2ab8cde257de73b8dee80300afa89122dd3cf126..1d294462d1cd615ee45836b325e5f8d5d8051161 100644 --- a/mace/kernels/opencl/helper.cc +++ b/mace/kernels/opencl/helper.cc @@ -262,7 +262,7 @@ void TuningOrRun3DKernel(const cl::Kernel &kernel, cl::NDRange(roundup_gws[0], roundup_gws[1], roundup_gws2), cl::NDRange(params[0], params[1], params[2]), nullptr, &event); } - MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; + MACE_CHECK_CL_SUCCESS(error); } } else { timer->ClearTiming(); @@ -276,7 +276,7 @@ void TuningOrRun3DKernel(const cl::Kernel &kernel, cl::NDRange(roundup_gws[0], roundup_gws[1], roundup_gws[2]), cl::NDRange(params[0], params[1], params[2]), nullptr, &event); } - MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; + MACE_CHECK_CL_SUCCESS(error); timer->AccumulateTiming(); tuning_result->assign(params.begin(), params.end()); @@ -303,7 +303,7 @@ void TuningOrRun3DKernel(const cl::Kernel &kernel, cl::NDRange(roundup_gws[0], roundup_gws[1], roundup_gws2), cl::NDRange(params[0], params[1], params[2]), nullptr, &event); } - MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; + MACE_CHECK_CL_SUCCESS(error); timer->AccumulateTiming(); } } @@ -379,7 +379,7 @@ void TuningOrRun2DKernel(const cl::Kernel &kernel, cl::NDRange(roundup_gws[0], roundup_gws1), cl::NDRange(params[0], params[1]), nullptr, &event); } - MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; + MACE_CHECK_CL_SUCCESS(error); } } else { timer->ClearTiming(); @@ -392,7 +392,7 @@ void TuningOrRun2DKernel(const cl::Kernel &kernel, kernel, cl::NullRange, cl::NDRange(roundup_gws[0], roundup_gws[1]), cl::NDRange(params[0], params[1]), nullptr, &event); } - MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; + MACE_CHECK_CL_SUCCESS(error); timer->AccumulateTiming(); tuning_result->assign(params.begin(), params.end()); @@ -419,7 +419,7 @@ void TuningOrRun2DKernel(const cl::Kernel &kernel, cl::NDRange(roundup_gws[0], roundup_gws1), cl::NDRange(params[0], params[1]), nullptr, &event); } - MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; + MACE_CHECK_CL_SUCCESS(error); timer->AccumulateTiming(); } }