From 77e021517822b424f67e51cce499d0954926c5fa Mon Sep 17 00:00:00 2001 From: Liangliang He Date: Wed, 11 Oct 2017 20:21:03 +0800 Subject: [PATCH] Add OpenCL runtime smoke tests --- mace/core/BUILD | 13 +- mace/core/platform/opencl/opencl_smoketest.cc | 125 ++++++++++++++ mace/core/platform/opencl/opencl_wrapper.cc | 156 +++++++++++++++++- 3 files changed, 290 insertions(+), 4 deletions(-) create mode 100644 mace/core/platform/opencl/opencl_smoketest.cc diff --git a/mace/core/BUILD b/mace/core/BUILD index b6a087ca..b37f0cba 100644 --- a/mace/core/BUILD +++ b/mace/core/BUILD @@ -19,11 +19,22 @@ cc_library( ]), copts = ["-std=c++11"], deps = [ - "@opencl_headers//:opencl12_headers", + "@opencl_headers//:opencl20_headers", "core", ], ) +cc_binary( + name = "opencl_smoketest", + srcs = glob([ + "platform/opencl/opencl_smoketest.cc", + ]), + copts = ["-std=c++11"], + deps = [ + "opencl_runtime", + ], +) + cc_library( name = "core", srcs = glob([ diff --git a/mace/core/platform/opencl/opencl_smoketest.cc b/mace/core/platform/opencl/opencl_smoketest.cc new file mode 100644 index 00000000..377b1828 --- /dev/null +++ b/mace/core/platform/opencl/opencl_smoketest.cc @@ -0,0 +1,125 @@ +// +// Copyright (c) 2017 XiaoMi All rights reserved. +// + +#define CL_HPP_MINIMUM_OPENCL_VERSION 200 +#define CL_HPP_TARGET_OPENCL_VERSION 200 + +#include "mace/core/logging.h" +#include "mace/core/platform/opencl/cl2.hpp" +#include "mace/core/platform/opencl/opencl_wrapper.h" + +int main() { + LOG(INFO) << "OpenCL support: " << mace::OpenCLSupported(); + if (!mace::OpenCLSupported()) return 1; + LOG(INFO) << "Start OpenCL test"; + + // get all platforms (drivers) + std::vector all_platforms; + cl::Platform::get(&all_platforms); + + if (all_platforms.size() == 0) { + LOG(INFO) << " No OpenCL platforms found"; + return 1; + } + LOG(INFO) << "Platform sizes: " << all_platforms.size(); + cl::Platform default_platform = all_platforms[0]; + LOG(INFO) << "Using platform: " + << default_platform.getInfo() << ", " + << default_platform.getInfo() << ", " + << default_platform.getInfo(); + + // get default device (CPUs, GPUs) of the default platform + std::vector all_devices; + default_platform.getDevices(CL_DEVICE_TYPE_ALL, &all_devices); + if (all_devices.size() == 0) { + LOG(INFO) << "No OpenCL devices found"; + return 1; + } + + // Use the last device + cl::Device default_device = *all_devices.rbegin(); + LOG(INFO) << "Using device: " << default_device.getInfo() + << ", " << default_device.getInfo(); + + // a context is like a "runtime link" to the device and platform; + // i.e. communication is possible + cl::Context context({default_device}); + + // create the program that we want to execute on the device + cl::Program::Sources sources; + + // calculates for each element; C = A + B + std::string kernel_code = + " void kernel simple_add(global const int* A, global const int* B, " + "global int* C, " + " global const int* N) {" + " int ID, Nthreads, n, ratio, start, stop;" + "" + " ID = get_global_id(0);" + " Nthreads = get_global_size(0);" + " n = N[0];" + "" + " ratio = (n / Nthreads);" // number of elements for each thread + " start = ratio * ID;" + " stop = ratio * (ID + 1);" + "" + " for (int i=start; i(default_device); + return 1; + } + + // apparently OpenCL only likes arrays ... + // N holds the number of elements in the vectors we want to add + int N[1] = {1000}; + int n = N[0]; + + // create buffers on device (allocate space on GPU) + cl::Buffer buffer_A(context, CL_MEM_READ_WRITE, sizeof(int) * n); + cl::Buffer buffer_B(context, CL_MEM_READ_WRITE, sizeof(int) * n); + cl::Buffer buffer_C(context, CL_MEM_READ_WRITE, sizeof(int) * n); + cl::Buffer buffer_N(context, CL_MEM_READ_ONLY, sizeof(int)); + + // create things on here (CPU) + int A[n], B[n]; + for (int i = 0; i < n; i++) { + A[i] = i; + B[i] = 2 * i; + } + // create a queue (a queue of commands that the GPU will execute) + cl::CommandQueue queue(context, default_device); + + // push write commands to queue + queue.enqueueWriteBuffer(buffer_A, CL_TRUE, 0, sizeof(int) * n, A); + queue.enqueueWriteBuffer(buffer_B, CL_TRUE, 0, sizeof(int) * n, B); + queue.enqueueWriteBuffer(buffer_N, CL_TRUE, 0, sizeof(int), N); + + auto simple_add = + cl::KernelFunctor( + program, "simple_add"); + cl_int error; + simple_add(cl::EnqueueArgs(queue, cl::NDRange(100), cl::NDRange(10)), + buffer_A, buffer_B, buffer_C, buffer_N, error); + if (error != 0) { + LOG(ERROR) << "Failed to execute kernel " << error; + } + + int C[n]; + // read result from GPU to here + queue.enqueueReadBuffer(buffer_C, CL_TRUE, 0, sizeof(int) * n, C); + + bool correct = true; + for (int i = 0; i < n; i++) { + if (C[i] != A[i] + B[i]) correct = false; + } + LOG(INFO) << "OpenCL test result: " << (correct ? "correct" : "incorrect"); + + return 0; +} diff --git a/mace/core/platform/opencl/opencl_wrapper.cc b/mace/core/platform/opencl/opencl_wrapper.cc index 79b319b0..15693119 100644 --- a/mace/core/platform/opencl/opencl_wrapper.cc +++ b/mace/core/platform/opencl/opencl_wrapper.cc @@ -11,7 +11,7 @@ #include /** - * Wrapper of OpenCL 1.2 + * Wrapper of OpenCL 2.0 (based on 1.2) */ namespace mace { class OpenCLStub final { @@ -19,6 +19,9 @@ class OpenCLStub final { static OpenCLStub &Get(); bool loaded() { return loaded_; } + using clGetPlatformIDsFunc = cl_int (*)(cl_uint, cl_platform_id *, cl_uint *); + using clGetPlatformInfoFunc = + cl_int (*)(cl_platform_id, cl_platform_info, size_t, void *, size_t *); using clBuildProgramFunc = cl_int (*)(cl_program, cl_uint, const cl_device_id *, @@ -43,7 +46,21 @@ class OpenCLStub final { using clEnqueueUnmapMemObjectFunc = cl_int (*)( cl_command_queue, cl_mem, void *, cl_uint, const cl_event *, cl_event *); using clRetainCommandQueueFunc = cl_int (*)(cl_command_queue command_queue); + using clCreateContextFunc = cl_context (*)( + const cl_context_properties *, + cl_uint, + const cl_device_id *, + void(CL_CALLBACK *)(const char *, const void *, size_t, void *), + void *, + cl_int *); + using clCreateContextFromTypeFunc = cl_context (*)( + const cl_context_properties *, + cl_device_type, + void(CL_CALLBACK *)(const char *, const void *, size_t, void *), + void *, + cl_int *); using clReleaseContextFunc = cl_int (*)(cl_context); + using clWaitForEventsFunc = cl_int (*)(cl_uint, const cl_event *); using clReleaseEventFunc = cl_int (*)(cl_event); using clEnqueueWriteBufferFunc = cl_int (*)(cl_command_queue, cl_mem, @@ -80,6 +97,11 @@ class OpenCLStub final { const cl_event *, cl_event *, cl_int *); + using clCreateCommandQueueWithPropertiesFunc = + cl_command_queue (*)(cl_context /* context */, + cl_device_id /* device */, + const cl_queue_properties * /* properties */, + cl_int * /* errcode_ret */); using clReleaseCommandQueueFunc = cl_int (*)(cl_command_queue); using clCreateProgramWithBinaryFunc = cl_program (*)(cl_context, cl_uint, @@ -89,6 +111,8 @@ class OpenCLStub final { cl_int *, cl_int *); using clRetainContextFunc = cl_int (*)(cl_context context); + using clGetContextInfoFunc = + cl_int (*)(cl_context, cl_context_info, size_t, void *, size_t *); using clReleaseProgramFunc = cl_int (*)(cl_program program); using clFlushFunc = cl_int (*)(cl_command_queue command_queue); using clFinishFunc = cl_int (*)(cl_command_queue command_queue); @@ -105,10 +129,14 @@ class OpenCLStub final { cl_int (*)(cl_device_id, cl_device_info, size_t, void *, size_t *); using clGetDeviceIDsFunc = cl_int (*)( cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *); + using clRetainDeviceFunc = cl_int (*)(cl_device_id); + using clReleaseDeviceFunc = cl_int (*)(cl_device_id); using clRetainEventFunc = cl_int (*)(cl_event); #define DEFINE_FUNC_PTR(func) func##Func func = nullptr + DEFINE_FUNC_PTR(clGetPlatformIDs); + DEFINE_FUNC_PTR(clGetPlatformInfo); DEFINE_FUNC_PTR(clBuildProgram); DEFINE_FUNC_PTR(clEnqueueNDRangeKernel); DEFINE_FUNC_PTR(clSetKernelArg); @@ -122,14 +150,19 @@ class OpenCLStub final { DEFINE_FUNC_PTR(clFinish); DEFINE_FUNC_PTR(clReleaseProgram); DEFINE_FUNC_PTR(clRetainContext); + DEFINE_FUNC_PTR(clGetContextInfo); DEFINE_FUNC_PTR(clCreateProgramWithBinary); + DEFINE_FUNC_PTR(clCreateCommandQueueWithProperties); DEFINE_FUNC_PTR(clReleaseCommandQueue); DEFINE_FUNC_PTR(clEnqueueMapBuffer); DEFINE_FUNC_PTR(clRetainProgram); DEFINE_FUNC_PTR(clGetProgramBuildInfo); DEFINE_FUNC_PTR(clEnqueueReadBuffer); DEFINE_FUNC_PTR(clEnqueueWriteBuffer); + DEFINE_FUNC_PTR(clWaitForEvents); DEFINE_FUNC_PTR(clReleaseEvent); + DEFINE_FUNC_PTR(clCreateContext); + DEFINE_FUNC_PTR(clCreateContextFromType); DEFINE_FUNC_PTR(clReleaseContext); DEFINE_FUNC_PTR(clRetainCommandQueue); DEFINE_FUNC_PTR(clEnqueueUnmapMemObject); @@ -137,6 +170,8 @@ class OpenCLStub final { DEFINE_FUNC_PTR(clReleaseMemObject); DEFINE_FUNC_PTR(clGetDeviceInfo); DEFINE_FUNC_PTR(clGetDeviceIDs); + DEFINE_FUNC_PTR(clRetainDevice); + DEFINE_FUNC_PTR(clReleaseDevice); DEFINE_FUNC_PTR(clRetainEvent); #undef DEFINE_FUNC_PTR @@ -199,7 +234,7 @@ bool OpenCLStub::Load(const std::string &path) { void *ptr = dlsym(handle, #func); \ if (ptr == nullptr) { \ LOG(ERROR) << "Failed to load " << #func << " from " << path; \ - loaded_ = false; \ + loaded_ = false; \ dlclose(handle); \ return false; \ } \ @@ -207,6 +242,8 @@ bool OpenCLStub::Load(const std::string &path) { VLOG(2) << "Loaded " << #func << " from " << path; \ } while (false) + ASSIGN_FROM_DLSYM(clGetPlatformIDs); + ASSIGN_FROM_DLSYM(clGetPlatformInfo); ASSIGN_FROM_DLSYM(clBuildProgram); ASSIGN_FROM_DLSYM(clEnqueueNDRangeKernel); ASSIGN_FROM_DLSYM(clSetKernelArg); @@ -220,14 +257,19 @@ bool OpenCLStub::Load(const std::string &path) { ASSIGN_FROM_DLSYM(clFinish); ASSIGN_FROM_DLSYM(clReleaseProgram); ASSIGN_FROM_DLSYM(clRetainContext); + ASSIGN_FROM_DLSYM(clGetContextInfo); ASSIGN_FROM_DLSYM(clCreateProgramWithBinary); + ASSIGN_FROM_DLSYM(clCreateCommandQueueWithProperties); ASSIGN_FROM_DLSYM(clReleaseCommandQueue); ASSIGN_FROM_DLSYM(clEnqueueMapBuffer); ASSIGN_FROM_DLSYM(clRetainProgram); ASSIGN_FROM_DLSYM(clGetProgramBuildInfo); ASSIGN_FROM_DLSYM(clEnqueueReadBuffer); ASSIGN_FROM_DLSYM(clEnqueueWriteBuffer); + ASSIGN_FROM_DLSYM(clWaitForEvents); ASSIGN_FROM_DLSYM(clReleaseEvent); + ASSIGN_FROM_DLSYM(clCreateContext); + ASSIGN_FROM_DLSYM(clCreateContextFromType); ASSIGN_FROM_DLSYM(clReleaseContext); ASSIGN_FROM_DLSYM(clRetainCommandQueue); ASSIGN_FROM_DLSYM(clEnqueueUnmapMemObject); @@ -235,12 +277,14 @@ bool OpenCLStub::Load(const std::string &path) { ASSIGN_FROM_DLSYM(clReleaseMemObject); ASSIGN_FROM_DLSYM(clGetDeviceInfo); ASSIGN_FROM_DLSYM(clGetDeviceIDs); + ASSIGN_FROM_DLSYM(clRetainDevice); + ASSIGN_FROM_DLSYM(clReleaseDevice); ASSIGN_FROM_DLSYM(clRetainEvent); #undef ASSIGN_FROM_DLSYM loaded_ = true; - dlclose(handle); + // TODO (heliangliang) Call dlclose if we are dlclosed return true; } @@ -248,6 +292,30 @@ bool OpenCLSupported() { return OpenCLStub::Get().loaded(); } } // namespace mace +cl_int clGetPlatformIDs(cl_uint num_entries, + cl_platform_id *platforms, + cl_uint *num_platforms) { + auto func = mace::OpenCLStub::Get().clGetPlatformIDs; + if (func != nullptr) { + return func(num_entries, platforms, num_platforms); + } else { + return CL_OUT_OF_RESOURCES; + } +} +cl_int clGetPlatformInfo(cl_platform_id platform, + cl_platform_info param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret) { + auto func = mace::OpenCLStub::Get().clGetPlatformInfo; + if (func != nullptr) { + return func(platform, param_name, param_value_size, param_value, + param_value_size_ret); + } else { + return CL_OUT_OF_RESOURCES; + } +} + cl_int clBuildProgram(cl_program program, cl_uint num_devices, const cl_device_id *device_list, @@ -336,6 +404,34 @@ cl_int clRetainCommandQueue(cl_command_queue command_queue) { return CL_OUT_OF_RESOURCES; } } +cl_context clCreateContext( + const cl_context_properties *properties, + cl_uint num_devices, + const cl_device_id *devices, + void(CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *), + void *user_data, + cl_int *errcode_ret) { + auto func = mace::OpenCLStub::Get().clCreateContext; + if (func != nullptr) { + return func(properties, num_devices, devices, pfn_notify, user_data, + errcode_ret); + } else { + return nullptr; + } +} +cl_context clCreateContextFromType( + const cl_context_properties *properties, + cl_device_type device_type, + void(CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *), + void *user_data, + cl_int *errcode_ret) { + auto func = mace::OpenCLStub::Get().clCreateContextFromType; + if (func != nullptr) { + return func(properties, device_type, pfn_notify, user_data, errcode_ret); + } else { + return nullptr; + } +} cl_int clReleaseContext(cl_context context) { auto func = mace::OpenCLStub::Get().clReleaseContext; @@ -345,6 +441,16 @@ cl_int clReleaseContext(cl_context context) { return CL_OUT_OF_RESOURCES; } } + +cl_int clWaitForEvents(cl_uint num_events, const cl_event *event_list) { + auto func = mace::OpenCLStub::Get().clWaitForEvents; + if (func != nullptr) { + return func(num_events, event_list); + } else { + return CL_OUT_OF_RESOURCES; + } +} + cl_int clReleaseEvent(cl_event event) { auto func = mace::OpenCLStub::Get().clReleaseEvent; if (func != nullptr) { @@ -435,6 +541,18 @@ void *clEnqueueMapBuffer(cl_command_queue command_queue, return nullptr; } } +cl_command_queue clCreateCommandQueueWithProperties( + cl_context context, + cl_device_id device, + const cl_queue_properties *properties, + cl_int *errcode_ret) { + auto func = mace::OpenCLStub::Get().clCreateCommandQueueWithProperties; + if (func != nullptr) { + return func(context, device, properties, errcode_ret); + } else { + return nullptr; + } +} cl_int clReleaseCommandQueue(cl_command_queue command_queue) { auto func = mace::OpenCLStub::Get().clReleaseCommandQueue; @@ -473,6 +591,20 @@ cl_int clRetainContext(cl_context context) { } } +cl_int clGetContextInfo(cl_context context, + cl_context_info param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret) { + auto func = mace::OpenCLStub::Get().clGetContextInfo; + if (func != nullptr) { + return func(context, param_name, param_value_size, param_value, + param_value_size_ret); + } else { + return CL_OUT_OF_RESOURCES; + } +} + cl_int clReleaseProgram(cl_program program) { auto func = mace::OpenCLStub::Get().clReleaseProgram; if (func != nullptr) { @@ -605,6 +737,24 @@ cl_int clGetDeviceInfo(cl_device_id device, } } +cl_int clRetainDevice(cl_device_id device) { + auto func = mace::OpenCLStub::Get().clRetainDevice; + if (func != nullptr) { + return func(device); + } else { + return CL_OUT_OF_RESOURCES; + } +} + +cl_int clReleaseDevice(cl_device_id device) { + auto func = mace::OpenCLStub::Get().clReleaseDevice; + if (func != nullptr) { + return func(device); + } else { + return CL_OUT_OF_RESOURCES; + } +} + cl_int clRetainEvent(cl_event event) { auto func = mace::OpenCLStub::Get().clRetainEvent; if (func != nullptr) { -- GitLab