提交 c0b8d045 编写于 作者: 李寅

Merge branch 'opencl' into 'master'

Add OpenCL runtime smoke tests

See merge request !69
......@@ -19,11 +19,22 @@ cc_library(
]),
copts = ["-std=c++11"],
deps = [
"@opencl_headers//:opencl12_headers",
"@opencl_headers//:opencl20_headers",
"core",
],
)
cc_binary(
name = "opencl_smoketest",
srcs = glob([
"platform/opencl/opencl_smoketest.cc",
]),
copts = ["-std=c++11"],
deps = [
"opencl_runtime",
],
)
cc_library(
name = "core",
srcs = glob([
......
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#define CL_HPP_MINIMUM_OPENCL_VERSION 200
#define CL_HPP_TARGET_OPENCL_VERSION 200
#include "mace/core/logging.h"
#include "mace/core/platform/opencl/cl2.hpp"
#include "mace/core/platform/opencl/opencl_wrapper.h"
int main() {
LOG(INFO) << "OpenCL support: " << mace::OpenCLSupported();
if (!mace::OpenCLSupported()) return 1;
LOG(INFO) << "Start OpenCL test";
// get all platforms (drivers)
std::vector<cl::Platform> all_platforms;
cl::Platform::get(&all_platforms);
if (all_platforms.size() == 0) {
LOG(INFO) << " No OpenCL platforms found";
return 1;
}
LOG(INFO) << "Platform sizes: " << all_platforms.size();
cl::Platform default_platform = all_platforms[0];
LOG(INFO) << "Using platform: "
<< default_platform.getInfo<CL_PLATFORM_NAME>() << ", "
<< default_platform.getInfo<CL_PLATFORM_PROFILE>() << ", "
<< default_platform.getInfo<CL_PLATFORM_VERSION>();
// get default device (CPUs, GPUs) of the default platform
std::vector<cl::Device> all_devices;
default_platform.getDevices(CL_DEVICE_TYPE_ALL, &all_devices);
if (all_devices.size() == 0) {
LOG(INFO) << "No OpenCL devices found";
return 1;
}
// Use the last device
cl::Device default_device = *all_devices.rbegin();
LOG(INFO) << "Using device: " << default_device.getInfo<CL_DEVICE_NAME>()
<< ", " << default_device.getInfo<CL_DEVICE_TYPE>();
// a context is like a "runtime link" to the device and platform;
// i.e. communication is possible
cl::Context context({default_device});
// create the program that we want to execute on the device
cl::Program::Sources sources;
// calculates for each element; C = A + B
std::string kernel_code =
" void kernel simple_add(global const int* A, global const int* B, "
"global int* C, "
" global const int* N) {"
" int ID, Nthreads, n, ratio, start, stop;"
""
" ID = get_global_id(0);"
" Nthreads = get_global_size(0);"
" n = N[0];"
""
" ratio = (n / Nthreads);" // number of elements for each thread
" start = ratio * ID;"
" stop = ratio * (ID + 1);"
""
" for (int i=start; i<stop; i++)"
" C[i] = A[i] + B[i];"
" }";
sources.push_back({kernel_code.c_str(), kernel_code.length()});
cl::Program program(context, sources);
if (program.build({default_device}) != CL_SUCCESS) {
LOG(INFO) << "Error building: "
<< program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(default_device);
return 1;
}
// apparently OpenCL only likes arrays ...
// N holds the number of elements in the vectors we want to add
int N[1] = {1000};
int n = N[0];
// create buffers on device (allocate space on GPU)
cl::Buffer buffer_A(context, CL_MEM_READ_WRITE, sizeof(int) * n);
cl::Buffer buffer_B(context, CL_MEM_READ_WRITE, sizeof(int) * n);
cl::Buffer buffer_C(context, CL_MEM_READ_WRITE, sizeof(int) * n);
cl::Buffer buffer_N(context, CL_MEM_READ_ONLY, sizeof(int));
// create things on here (CPU)
int A[n], B[n];
for (int i = 0; i < n; i++) {
A[i] = i;
B[i] = 2 * i;
}
// create a queue (a queue of commands that the GPU will execute)
cl::CommandQueue queue(context, default_device);
// push write commands to queue
queue.enqueueWriteBuffer(buffer_A, CL_TRUE, 0, sizeof(int) * n, A);
queue.enqueueWriteBuffer(buffer_B, CL_TRUE, 0, sizeof(int) * n, B);
queue.enqueueWriteBuffer(buffer_N, CL_TRUE, 0, sizeof(int), N);
auto simple_add =
cl::KernelFunctor<cl::Buffer, cl::Buffer, cl::Buffer, cl::Buffer>(
program, "simple_add");
cl_int error;
simple_add(cl::EnqueueArgs(queue, cl::NDRange(100), cl::NDRange(10)),
buffer_A, buffer_B, buffer_C, buffer_N, error);
if (error != 0) {
LOG(ERROR) << "Failed to execute kernel " << error;
}
int C[n];
// read result from GPU to here
queue.enqueueReadBuffer(buffer_C, CL_TRUE, 0, sizeof(int) * n, C);
bool correct = true;
for (int i = 0; i < n; i++) {
if (C[i] != A[i] + B[i]) correct = false;
}
LOG(INFO) << "OpenCL test result: " << (correct ? "correct" : "incorrect");
return 0;
}
......@@ -11,7 +11,7 @@
#include <mutex>
/**
* Wrapper of OpenCL 1.2
* Wrapper of OpenCL 2.0 (based on 1.2)
*/
namespace mace {
class OpenCLStub final {
......@@ -19,6 +19,9 @@ class OpenCLStub final {
static OpenCLStub &Get();
bool loaded() { return loaded_; }
using clGetPlatformIDsFunc = cl_int (*)(cl_uint, cl_platform_id *, cl_uint *);
using clGetPlatformInfoFunc =
cl_int (*)(cl_platform_id, cl_platform_info, size_t, void *, size_t *);
using clBuildProgramFunc = cl_int (*)(cl_program,
cl_uint,
const cl_device_id *,
......@@ -43,7 +46,21 @@ class OpenCLStub final {
using clEnqueueUnmapMemObjectFunc = cl_int (*)(
cl_command_queue, cl_mem, void *, cl_uint, const cl_event *, cl_event *);
using clRetainCommandQueueFunc = cl_int (*)(cl_command_queue command_queue);
using clCreateContextFunc = cl_context (*)(
const cl_context_properties *,
cl_uint,
const cl_device_id *,
void(CL_CALLBACK *)(const char *, const void *, size_t, void *),
void *,
cl_int *);
using clCreateContextFromTypeFunc = cl_context (*)(
const cl_context_properties *,
cl_device_type,
void(CL_CALLBACK *)(const char *, const void *, size_t, void *),
void *,
cl_int *);
using clReleaseContextFunc = cl_int (*)(cl_context);
using clWaitForEventsFunc = cl_int (*)(cl_uint, const cl_event *);
using clReleaseEventFunc = cl_int (*)(cl_event);
using clEnqueueWriteBufferFunc = cl_int (*)(cl_command_queue,
cl_mem,
......@@ -80,6 +97,11 @@ class OpenCLStub final {
const cl_event *,
cl_event *,
cl_int *);
using clCreateCommandQueueWithPropertiesFunc =
cl_command_queue (*)(cl_context /* context */,
cl_device_id /* device */,
const cl_queue_properties * /* properties */,
cl_int * /* errcode_ret */);
using clReleaseCommandQueueFunc = cl_int (*)(cl_command_queue);
using clCreateProgramWithBinaryFunc = cl_program (*)(cl_context,
cl_uint,
......@@ -89,6 +111,8 @@ class OpenCLStub final {
cl_int *,
cl_int *);
using clRetainContextFunc = cl_int (*)(cl_context context);
using clGetContextInfoFunc =
cl_int (*)(cl_context, cl_context_info, size_t, void *, size_t *);
using clReleaseProgramFunc = cl_int (*)(cl_program program);
using clFlushFunc = cl_int (*)(cl_command_queue command_queue);
using clFinishFunc = cl_int (*)(cl_command_queue command_queue);
......@@ -105,10 +129,14 @@ class OpenCLStub final {
cl_int (*)(cl_device_id, cl_device_info, size_t, void *, size_t *);
using clGetDeviceIDsFunc = cl_int (*)(
cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *);
using clRetainDeviceFunc = cl_int (*)(cl_device_id);
using clReleaseDeviceFunc = cl_int (*)(cl_device_id);
using clRetainEventFunc = cl_int (*)(cl_event);
#define DEFINE_FUNC_PTR(func) func##Func func = nullptr
DEFINE_FUNC_PTR(clGetPlatformIDs);
DEFINE_FUNC_PTR(clGetPlatformInfo);
DEFINE_FUNC_PTR(clBuildProgram);
DEFINE_FUNC_PTR(clEnqueueNDRangeKernel);
DEFINE_FUNC_PTR(clSetKernelArg);
......@@ -122,14 +150,19 @@ class OpenCLStub final {
DEFINE_FUNC_PTR(clFinish);
DEFINE_FUNC_PTR(clReleaseProgram);
DEFINE_FUNC_PTR(clRetainContext);
DEFINE_FUNC_PTR(clGetContextInfo);
DEFINE_FUNC_PTR(clCreateProgramWithBinary);
DEFINE_FUNC_PTR(clCreateCommandQueueWithProperties);
DEFINE_FUNC_PTR(clReleaseCommandQueue);
DEFINE_FUNC_PTR(clEnqueueMapBuffer);
DEFINE_FUNC_PTR(clRetainProgram);
DEFINE_FUNC_PTR(clGetProgramBuildInfo);
DEFINE_FUNC_PTR(clEnqueueReadBuffer);
DEFINE_FUNC_PTR(clEnqueueWriteBuffer);
DEFINE_FUNC_PTR(clWaitForEvents);
DEFINE_FUNC_PTR(clReleaseEvent);
DEFINE_FUNC_PTR(clCreateContext);
DEFINE_FUNC_PTR(clCreateContextFromType);
DEFINE_FUNC_PTR(clReleaseContext);
DEFINE_FUNC_PTR(clRetainCommandQueue);
DEFINE_FUNC_PTR(clEnqueueUnmapMemObject);
......@@ -137,6 +170,8 @@ class OpenCLStub final {
DEFINE_FUNC_PTR(clReleaseMemObject);
DEFINE_FUNC_PTR(clGetDeviceInfo);
DEFINE_FUNC_PTR(clGetDeviceIDs);
DEFINE_FUNC_PTR(clRetainDevice);
DEFINE_FUNC_PTR(clReleaseDevice);
DEFINE_FUNC_PTR(clRetainEvent);
#undef DEFINE_FUNC_PTR
......@@ -199,7 +234,7 @@ bool OpenCLStub::Load(const std::string &path) {
void *ptr = dlsym(handle, #func); \
if (ptr == nullptr) { \
LOG(ERROR) << "Failed to load " << #func << " from " << path; \
loaded_ = false; \
loaded_ = false; \
dlclose(handle); \
return false; \
} \
......@@ -207,6 +242,8 @@ bool OpenCLStub::Load(const std::string &path) {
VLOG(2) << "Loaded " << #func << " from " << path; \
} while (false)
ASSIGN_FROM_DLSYM(clGetPlatformIDs);
ASSIGN_FROM_DLSYM(clGetPlatformInfo);
ASSIGN_FROM_DLSYM(clBuildProgram);
ASSIGN_FROM_DLSYM(clEnqueueNDRangeKernel);
ASSIGN_FROM_DLSYM(clSetKernelArg);
......@@ -220,14 +257,19 @@ bool OpenCLStub::Load(const std::string &path) {
ASSIGN_FROM_DLSYM(clFinish);
ASSIGN_FROM_DLSYM(clReleaseProgram);
ASSIGN_FROM_DLSYM(clRetainContext);
ASSIGN_FROM_DLSYM(clGetContextInfo);
ASSIGN_FROM_DLSYM(clCreateProgramWithBinary);
ASSIGN_FROM_DLSYM(clCreateCommandQueueWithProperties);
ASSIGN_FROM_DLSYM(clReleaseCommandQueue);
ASSIGN_FROM_DLSYM(clEnqueueMapBuffer);
ASSIGN_FROM_DLSYM(clRetainProgram);
ASSIGN_FROM_DLSYM(clGetProgramBuildInfo);
ASSIGN_FROM_DLSYM(clEnqueueReadBuffer);
ASSIGN_FROM_DLSYM(clEnqueueWriteBuffer);
ASSIGN_FROM_DLSYM(clWaitForEvents);
ASSIGN_FROM_DLSYM(clReleaseEvent);
ASSIGN_FROM_DLSYM(clCreateContext);
ASSIGN_FROM_DLSYM(clCreateContextFromType);
ASSIGN_FROM_DLSYM(clReleaseContext);
ASSIGN_FROM_DLSYM(clRetainCommandQueue);
ASSIGN_FROM_DLSYM(clEnqueueUnmapMemObject);
......@@ -235,12 +277,14 @@ bool OpenCLStub::Load(const std::string &path) {
ASSIGN_FROM_DLSYM(clReleaseMemObject);
ASSIGN_FROM_DLSYM(clGetDeviceInfo);
ASSIGN_FROM_DLSYM(clGetDeviceIDs);
ASSIGN_FROM_DLSYM(clRetainDevice);
ASSIGN_FROM_DLSYM(clReleaseDevice);
ASSIGN_FROM_DLSYM(clRetainEvent);
#undef ASSIGN_FROM_DLSYM
loaded_ = true;
dlclose(handle);
// TODO (heliangliang) Call dlclose if we are dlclosed
return true;
}
......@@ -248,6 +292,30 @@ bool OpenCLSupported() { return OpenCLStub::Get().loaded(); }
} // namespace mace
cl_int clGetPlatformIDs(cl_uint num_entries,
cl_platform_id *platforms,
cl_uint *num_platforms) {
auto func = mace::OpenCLStub::Get().clGetPlatformIDs;
if (func != nullptr) {
return func(num_entries, platforms, num_platforms);
} else {
return CL_OUT_OF_RESOURCES;
}
}
cl_int clGetPlatformInfo(cl_platform_id platform,
cl_platform_info param_name,
size_t param_value_size,
void *param_value,
size_t *param_value_size_ret) {
auto func = mace::OpenCLStub::Get().clGetPlatformInfo;
if (func != nullptr) {
return func(platform, param_name, param_value_size, param_value,
param_value_size_ret);
} else {
return CL_OUT_OF_RESOURCES;
}
}
cl_int clBuildProgram(cl_program program,
cl_uint num_devices,
const cl_device_id *device_list,
......@@ -336,6 +404,34 @@ cl_int clRetainCommandQueue(cl_command_queue command_queue) {
return CL_OUT_OF_RESOURCES;
}
}
cl_context clCreateContext(
const cl_context_properties *properties,
cl_uint num_devices,
const cl_device_id *devices,
void(CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *),
void *user_data,
cl_int *errcode_ret) {
auto func = mace::OpenCLStub::Get().clCreateContext;
if (func != nullptr) {
return func(properties, num_devices, devices, pfn_notify, user_data,
errcode_ret);
} else {
return nullptr;
}
}
cl_context clCreateContextFromType(
const cl_context_properties *properties,
cl_device_type device_type,
void(CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *),
void *user_data,
cl_int *errcode_ret) {
auto func = mace::OpenCLStub::Get().clCreateContextFromType;
if (func != nullptr) {
return func(properties, device_type, pfn_notify, user_data, errcode_ret);
} else {
return nullptr;
}
}
cl_int clReleaseContext(cl_context context) {
auto func = mace::OpenCLStub::Get().clReleaseContext;
......@@ -345,6 +441,16 @@ cl_int clReleaseContext(cl_context context) {
return CL_OUT_OF_RESOURCES;
}
}
cl_int clWaitForEvents(cl_uint num_events, const cl_event *event_list) {
auto func = mace::OpenCLStub::Get().clWaitForEvents;
if (func != nullptr) {
return func(num_events, event_list);
} else {
return CL_OUT_OF_RESOURCES;
}
}
cl_int clReleaseEvent(cl_event event) {
auto func = mace::OpenCLStub::Get().clReleaseEvent;
if (func != nullptr) {
......@@ -435,6 +541,18 @@ void *clEnqueueMapBuffer(cl_command_queue command_queue,
return nullptr;
}
}
cl_command_queue clCreateCommandQueueWithProperties(
cl_context context,
cl_device_id device,
const cl_queue_properties *properties,
cl_int *errcode_ret) {
auto func = mace::OpenCLStub::Get().clCreateCommandQueueWithProperties;
if (func != nullptr) {
return func(context, device, properties, errcode_ret);
} else {
return nullptr;
}
}
cl_int clReleaseCommandQueue(cl_command_queue command_queue) {
auto func = mace::OpenCLStub::Get().clReleaseCommandQueue;
......@@ -473,6 +591,20 @@ cl_int clRetainContext(cl_context context) {
}
}
cl_int clGetContextInfo(cl_context context,
cl_context_info param_name,
size_t param_value_size,
void *param_value,
size_t *param_value_size_ret) {
auto func = mace::OpenCLStub::Get().clGetContextInfo;
if (func != nullptr) {
return func(context, param_name, param_value_size, param_value,
param_value_size_ret);
} else {
return CL_OUT_OF_RESOURCES;
}
}
cl_int clReleaseProgram(cl_program program) {
auto func = mace::OpenCLStub::Get().clReleaseProgram;
if (func != nullptr) {
......@@ -605,6 +737,24 @@ cl_int clGetDeviceInfo(cl_device_id device,
}
}
cl_int clRetainDevice(cl_device_id device) {
auto func = mace::OpenCLStub::Get().clRetainDevice;
if (func != nullptr) {
return func(device);
} else {
return CL_OUT_OF_RESOURCES;
}
}
cl_int clReleaseDevice(cl_device_id device) {
auto func = mace::OpenCLStub::Get().clReleaseDevice;
if (func != nullptr) {
return func(device);
} else {
return CL_OUT_OF_RESOURCES;
}
}
cl_int clRetainEvent(cl_event event) {
auto func = mace::OpenCLStub::Get().clRetainEvent;
if (func != nullptr) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册