diff --git a/mace/BUILD b/mace/BUILD index dbe38d6dad5658edc052ec77ec39be41ece8a7fc..b1bcd27021878e20e35be0ce9c1f4b2e58f095bd 100644 --- a/mace/BUILD +++ b/mace/BUILD @@ -25,7 +25,7 @@ config_setting( ) config_setting( - name = "is_profiling", + name = "profiling_enabled", define_values = { "profiling": "true", }, diff --git a/mace/core/BUILD b/mace/core/BUILD index 9f5ca2cb44e810da944bd72fe7db33dff7ab636b..36b2a121e2c3ab4c7ad0e16468502134328777e0 100644 --- a/mace/core/BUILD +++ b/mace/core/BUILD @@ -7,7 +7,7 @@ package( licenses(["notice"]) # Apache 2.0 -load("//mace:mace.bzl", "if_android", "if_profiling") +load("//mace:mace.bzl", "if_android", "if_profiling_enabled") cc_library( name = "opencl_runtime", @@ -15,76 +15,48 @@ cc_library( "runtime/opencl/*.cc", ]), hdrs = glob([ - "runtime/opencl/cl.hpp", "runtime/opencl/cl2.hpp", "runtime/opencl/*.h", ]), - copts = ["-std=c++11"] + if_profiling(["-D__ENABLE_PROFILING"]), + copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"] + + if_profiling_enabled(["-DMACE_OPENCL_PROFILING"]), + linkopts = ["-ldl"], deps = [ - ":logging", + ":core", + "//mace/utils:logging", + "//mace/utils:tuner", "@opencl_headers//:opencl20_headers", ], alwayslink = 1, ) -cc_library( - name = "logging", - srcs = [ - "logging.cc", - ], - hdrs = [ - "logging.h", - ], - copts = ["-std=c++11"], - linkopts = if_android([ - "-llog", - ]), -) - cc_library( name = "core", - srcs = glob( - ["*.cc",], - exclude=[ - "logging.cc", - ]), - hdrs = glob( - ["*.h"], - exclude=[ - "logging.h", - ]), - copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"] + if_android([ - "-D__USE_OPENCL", - ]), - linkopts = ["-ldl"] + if_android([ - "-pie", - ]), + srcs = glob(["*.cc"]), + hdrs = glob(["*.h"]), + copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"], + linkopts = if_android(["-pie"]), deps = [ - ":logging", - "//mace/proto:stats_proto", - "//mace/utils", - ":opencl_runtime", + "//mace/utils:utils_hdrs", + "//mace/utils:logging", ], ) -# Main program for tests cc_library( name = "test_benchmark_main", testonly = 1, - srcs = glob([ - "testing/*.cc", - ]), - hdrs = glob([ - "testing/*.h", - ]), - copts = [ - "-std=c++11", - "-D_GLIBCXX_USE_C99_MATH_TR1", + hdrs = [ + "testing/test_benchmark.h", + ], + srcs = [ + "testing/test_benchmark.cc", + "testing/test_benchmark_main.cc", ], - linkopts = ["-lm"], + copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"], + alwayslink = 1, deps = [ ":core", + "//mace/utils:utils_hdrs", ], - alwayslink = 1, ) diff --git a/mace/core/allocator.cc b/mace/core/allocator.cc index 84bdeb86fd87f66ef5caee92cc959f84bd19a197..d05c45b352e37e2e7c67226aee28441a15c665b8 100644 --- a/mace/core/allocator.cc +++ b/mace/core/allocator.cc @@ -3,9 +3,6 @@ // #include "mace/core/allocator.h" -#ifdef __USE_OPENCL -#include "mace/core/opencl_allocator.h" -#endif namespace mace { @@ -25,8 +22,5 @@ Allocator *GetDeviceAllocator(DeviceType type) { MACE_REGISTER_ALLOCATOR(DeviceType::CPU, new CPUAllocator()); MACE_REGISTER_ALLOCATOR(DeviceType::NEON, new CPUAllocator()); -#ifdef __USE_OPENCL -MACE_REGISTER_ALLOCATOR(DeviceType::OPENCL, new OpenCLAllocator()); -#endif } // namespace mace diff --git a/mace/core/common.h b/mace/core/common.h index e9b782211925ce979b6c93e0fb28805e827515ee..8eaf062f2cd5a4fb912444623056349620a240b0 100644 --- a/mace/core/common.h +++ b/mace/core/common.h @@ -12,7 +12,7 @@ #include #include -#include "mace/core/logging.h" +#include "mace/utils/logging.h" using std::set; using std::map; diff --git a/mace/core/future.h b/mace/core/future.h new file mode 100644 index 0000000000000000000000000000000000000000..41956f07985f4f6335a4645262ce7b1a737d7865 --- /dev/null +++ b/mace/core/future.h @@ -0,0 +1,38 @@ +// +// Copyright (c) 2017 XiaoMi All rights reserved. +// + +#ifndef MACE_CORE_FUTURE_H_ +#define MACE_CORE_FUTURE_H_ + +#include + +#include "mace/utils/logging.h" + +namespace mace { + +struct CallStats { + int64_t start_micros; + int64_t end_micros; +}; + +struct OperatorStats { + std::string operator_name; + std::string type; + CallStats stats; +}; + +struct RunMetadata { + std::vector op_stats; +}; + +// Wait the call to finish and get the stats if param is not nullptr +struct StatsFuture { + std::function wait_fn = [](CallStats *) { + LOG(FATAL) << "wait_fn must be properly set"; + }; +}; + +} // namespace mace + +#endif // MACE_CORE_FUTURE_H_ diff --git a/mace/core/mace.h b/mace/core/mace.h index 855860c221d0f4410586ffc2f5e2703a94927bb3..e1607af9df16f8f5eec3d24f780e23145807b64a 100644 --- a/mace/core/mace.h +++ b/mace/core/mace.h @@ -7,7 +7,7 @@ #include #include #include -#include "mace/core/logging.h" +#include "mace/utils/logging.h" namespace mace { diff --git a/mace/core/net.cc b/mace/core/net.cc index 55f3c5f6497f761aa04ff0fe6c638d977c626473..c24e22d5a3e8a4057535b8b861bbafbf33271450 100644 --- a/mace/core/net.cc +++ b/mace/core/net.cc @@ -4,9 +4,6 @@ #include "mace/core/net.h" #include "mace/utils/utils.h" -#ifdef __USE_OPENCL -#include "mace/core/runtime/opencl/opencl_runtime.h" -#endif namespace mace { @@ -33,65 +30,51 @@ SimpleNet::SimpleNet(const std::shared_ptr &net_def, } } } + bool SimpleNet::Run(RunMetadata *run_metadata) { VLOG(1) << "Running net " << name_; - for (auto &op : operators_) { + for (auto iter = operators_.begin(); iter != operators_.end(); ++iter) { + bool future_wait = (device_type_ == DeviceType::OPENCL && + (run_metadata != nullptr || + std::distance(iter, operators_.end()) == 1)); + auto &op = *iter; VLOG(1) << "Running operator " << op->debug_def().name() << "(" << op->debug_def().type() << ")."; - OperatorStats *op_stats = nullptr; - if (run_metadata ) { - if (device_type_ != DeviceType::OPENCL) { - op_stats = run_metadata->add_op_stats(); - op_stats->set_operator_name(op->debug_def().name()); - op_stats->set_type(op->debug_def().type()); - op_stats->set_all_start_micros(NowInMicroSec()); - op_stats->set_op_start_rel_micros(NowInMicroSec() - - op_stats->all_start_micros()); + + bool ret; + CallStats call_stats; + if (future_wait) { + StatsFuture future; + ret = op->Run(&future); + if (run_metadata != nullptr) { + future.wait_fn(&call_stats); + } else { + future.wait_fn(nullptr); } + } else if (run_metadata != nullptr) { + call_stats.start_micros = NowInMicroSec(); + ret = op->Run(nullptr); + call_stats.end_micros = NowInMicroSec(); + } else { + ret = op->Run(nullptr); } - if (!op->Run()) { + + if (run_metadata != nullptr) { + OperatorStats op_stats = { op->debug_def().name(), + op->debug_def().type(), + call_stats }; + run_metadata->op_stats.emplace_back(op_stats); + } + + if (!ret) { LOG(ERROR) << "Operator failed: " << op->debug_def().name(); return false; } - if (run_metadata) { - if (device_type_ == DeviceType::OPENCL) { -#ifndef __USE_OPENCL - LOG(FATAL) << "OpenCL is not supported"; -#else - OpenCLRuntime::Get()->command_queue().finish(); - op_stats = run_metadata->add_op_stats(); - op_stats->set_operator_name(op->debug_def().name()); - op_stats->set_type(op->debug_def().type()); - - op_stats->set_all_start_micros( - OpenCLRuntime::Get()->GetEventProfilingStartInfo() / 1000); - op_stats->set_op_start_rel_micros( - OpenCLRuntime::Get()->GetEventProfilingStartInfo() / 1000 - - op_stats->all_start_micros()); - - op_stats->set_op_end_rel_micros( - OpenCLRuntime::Get()->GetEventProfilingEndInfo() / 1000 - - op_stats->all_start_micros()); - op_stats->set_all_end_rel_micros( - OpenCLRuntime::Get()->GetEventProfilingEndInfo() / 1000 - - op_stats->all_start_micros()); -#endif - } else { - op_stats->set_op_end_rel_micros(NowInMicroSec() - - op_stats->all_start_micros()); - op_stats->set_all_end_rel_micros(NowInMicroSec() - - op_stats->all_start_micros()); - } - } VLOG(1) << "Op " << op->debug_def().name() << " has shape: " << internal::MakeString(op->Output(0)->shape()); } -#ifdef __USE_OPENCL - if (device_type_ == DeviceType::OPENCL) { - OpenCLRuntime::Get()->command_queue().finish(); - } -#endif + return true; } diff --git a/mace/core/net.h b/mace/core/net.h index 109d2d66c340bddd42799d38c7ecb6fadd66e746..7c8b510786f494d60df98ba1395b983a05bc4c78 100644 --- a/mace/core/net.h +++ b/mace/core/net.h @@ -9,7 +9,6 @@ #include "mace/core/operator.h" #include "mace/core/workspace.h" #include "mace/core/mace.h" -#include "mace/proto/stats.pb.h" namespace mace { diff --git a/mace/core/operator.h b/mace/core/operator.h index ef0cd7bd560fba0a8de62b55d821bb1a812cce26..9a1c6e94ba7f8221d7320228db739e36afa63a8b 100644 --- a/mace/core/operator.h +++ b/mace/core/operator.h @@ -7,6 +7,7 @@ #include "mace/core/common.h" #include "mace/core/arg_helper.h" +#include "mace/core/future.h" #include "mace/core/registry.h" #include "mace/core/tensor.h" #include "mace/core/workspace.h" @@ -55,7 +56,8 @@ class OperatorBase { inline const vector &Inputs() const { return inputs_; } inline const vector &Outputs() { return outputs_; } - virtual bool Run() = 0; + // Run Op asynchronously (depends on device), return a future if not nullptr. + virtual bool Run(StatsFuture *future) = 0; inline const OperatorDef &debug_def() const { MACE_CHECK(has_debug_def(), "operator_def was null!"); @@ -100,7 +102,7 @@ class Operator : public OperatorBase { } } } - virtual bool Run() override = 0; + virtual bool Run(StatsFuture *future) override = 0; ~Operator() noexcept override {} }; diff --git a/mace/core/runtime/opencl/cl.hpp b/mace/core/runtime/opencl/cl.hpp deleted file mode 100644 index 38fac1962aaba67df827261cdc7227418aadcaea..0000000000000000000000000000000000000000 --- a/mace/core/runtime/opencl/cl.hpp +++ /dev/null @@ -1,12452 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2008-2013 The Khronos Group Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and/or associated documentation files (the - * "Materials"), to deal in the Materials without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Materials, and to - * permit persons to whom the Materials are furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Materials. - * - * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. - ******************************************************************************/ - -/*! \file - * - * \brief C++ bindings for OpenCL 1.0 (rev 48), OpenCL 1.1 (rev 33) and - * OpenCL 1.2 (rev 15) - * \author Benedict R. Gaster, Laurent Morichetti and Lee Howes - * - * Additions and fixes from: - * Brian Cole, March 3rd 2010 and April 2012 - * Matt Gruenke, April 2012. - * Bruce Merry, February 2013. - * Tom Deakin and Simon McIntosh-Smith, July 2013 - * - * \version 1.2.6 - * \date August 2013 - * - * Optional extension support - * - * cl - * cl_ext_device_fission - * #define USE_CL_DEVICE_FISSION - */ - -/*! \mainpage - * \section intro Introduction - * For many large applications C++ is the language of choice and so it seems - * reasonable to define C++ bindings for OpenCL. - * - * - * The interface is contained with a single C++ header file \em cl.hpp and all - * definitions are contained within the namespace \em cl. There is no additional - * requirement to include \em cl.h and to use either the C++ or original C - * bindings it is enough to simply include \em cl.hpp. - * - * The bindings themselves are lightweight and correspond closely to the - * underlying C API. Using the C++ bindings introduces no additional execution - * overhead. - * - * For detail documentation on the bindings see: - * - * The OpenCL C++ Wrapper API 1.2 (revision 09) - * http://www.khronos.org/registry/cl/specs/opencl-cplusplus-1.2.pdf - * - * \section example Example - * - * The following example shows a general use case for the C++ - * bindings, including support for the optional exception feature and - * also the supplied vector and string classes, see following sections for - * decriptions of these features. - * - * \code - * #define __CL_ENABLE_EXCEPTIONS - * - * #if defined(__APPLE__) || defined(__MACOSX) - * #include - * #else - * #include - * #endif - * #include - * #include - * #include - * - * const char * helloStr = "__kernel void " - * "hello(void) " - * "{ " - * " " - * "} "; - * - * int - * main(void) - * { - * cl_int err = CL_SUCCESS; - * try { - * - * std::vector platforms; - * cl::Platform::get(&platforms); - * if (platforms.size() == 0) { - * std::cout << "Platform size 0\n"; - * return -1; - * } - * - * cl_context_properties properties[] = - * { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0}; - * cl::Context context(CL_DEVICE_TYPE_CPU, properties); - * - * std::vector devices = context.getInfo(); - * - * cl::Program::Sources source(1, - * std::make_pair(helloStr,strlen(helloStr))); - * cl::Program program_ = cl::Program(context, source); - * program_.build(devices); - * - * cl::Kernel kernel(program_, "hello", &err); - * - * cl::Event event; - * cl::CommandQueue queue(context, devices[0], 0, &err); - * queue.enqueueNDRangeKernel( - * kernel, - * cl::NullRange, - * cl::NDRange(4,4), - * cl::NullRange, - * NULL, - * &event); - * - * event.wait(); - * } - * catch (cl::Error err) { - * std::cerr - * << "ERROR: " - * << err.what() - * << "(" - * << err.err() - * << ")" - * << std::endl; - * } - * - * return EXIT_SUCCESS; - * } - * - * \endcode - * - */ -#ifndef CL_HPP_ -#define CL_HPP_ - -#ifdef _WIN32 - -#include -#include -#include -#include - -#if defined(__CL_ENABLE_EXCEPTIONS) -#include -#endif // #if defined(__CL_ENABLE_EXCEPTIONS) - -#pragma push_macro("max") -#undef max -#if defined(USE_DX_INTEROP) -#include -#include -#endif -#endif // _WIN32 - -// -#if defined(USE_CL_DEVICE_FISSION) -#include -#endif - -#if defined(__APPLE__) || defined(__MACOSX) -#include -#include -#include -#else -#include -#include -#endif // !__APPLE__ - -// To avoid accidentally taking ownership of core OpenCL types -// such as cl_kernel constructors are made explicit -// under OpenCL 1.2 -#if defined(CL_VERSION_1_2) && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) -#define __CL_EXPLICIT_CONSTRUCTORS explicit -#else // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) -#define __CL_EXPLICIT_CONSTRUCTORS -#endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - -// Define deprecated prefixes and suffixes to ensure compilation -// in case they are not pre-defined -#if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) -#define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED -#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) -#if !defined(CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED) -#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED -#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) - -#if !defined(CL_CALLBACK) -#define CL_CALLBACK -#endif //CL_CALLBACK - -#include -#include - -#if !defined(__NO_STD_VECTOR) -#include -#endif - -#if !defined(__NO_STD_STRING) -#include -#endif - -#if defined(linux) || defined(__APPLE__) || defined(__MACOSX) -#include - -#include -#include -#endif // linux - -#include - - -/*! \namespace cl - * - * \brief The OpenCL C++ bindings are defined within this namespace. - * - */ -namespace cl { - -class Memory; - -/** - * Deprecated APIs for 1.2 - */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) -#define __INIT_CL_EXT_FCN_PTR(name) \ - if(!pfn_##name) { \ - pfn_##name = (PFN_##name) \ - clGetExtensionFunctionAddress(#name); \ - if(!pfn_##name) { \ - } \ - } -#endif // #if defined(CL_VERSION_1_1) - -#if defined(CL_VERSION_1_2) -#define __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, name) \ - if(!pfn_##name) { \ - pfn_##name = (PFN_##name) \ - clGetExtensionFunctionAddressForPlatform(platform, #name); \ - if(!pfn_##name) { \ - } \ - } -#endif // #if defined(CL_VERSION_1_1) - -class Program; -class Device; -class Context; -class CommandQueue; -class Memory; -class Buffer; - -#if defined(__CL_ENABLE_EXCEPTIONS) -/*! \brief Exception class - * - * This may be thrown by API functions when __CL_ENABLE_EXCEPTIONS is defined. - */ -class Error : public std::exception -{ -private: - cl_int err_; - const char * errStr_; -public: - /*! \brief Create a new CL error exception for a given error code - * and corresponding message. - * - * \param err error code value. - * - * \param errStr a descriptive string that must remain in scope until - * handling of the exception has concluded. If set, it - * will be returned by what(). - */ - Error(cl_int err, const char * errStr = NULL) : err_(err), errStr_(errStr) - {} - - ~Error() throw() {} - - /*! \brief Get error string associated with exception - * - * \return A memory pointer to the error message string. - */ - virtual const char * what() const throw () - { - if (errStr_ == NULL) { - return "empty"; - } - else { - return errStr_; - } - } - - /*! \brief Get error code associated with exception - * - * \return The error code. - */ - cl_int err(void) const { return err_; } -}; - -#define __ERR_STR(x) #x -#else -#define __ERR_STR(x) NULL -#endif // __CL_ENABLE_EXCEPTIONS - - -namespace detail -{ -#if defined(__CL_ENABLE_EXCEPTIONS) -static inline cl_int errHandler ( - cl_int err, - const char * errStr = NULL) -{ - if (err != CL_SUCCESS) { - throw Error(err, errStr); - } - return err; -} -#else -static inline cl_int errHandler (cl_int err, const char * errStr = NULL) -{ - (void) errStr; // suppress unused variable warning - return err; -} -#endif // __CL_ENABLE_EXCEPTIONS -} - - - -//! \cond DOXYGEN_DETAIL -#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) -#define __GET_DEVICE_INFO_ERR __ERR_STR(clGetDeviceInfo) -#define __GET_PLATFORM_INFO_ERR __ERR_STR(clGetPlatformInfo) -#define __GET_DEVICE_IDS_ERR __ERR_STR(clGetDeviceIDs) -#define __GET_PLATFORM_IDS_ERR __ERR_STR(clGetPlatformIDs) -#define __GET_CONTEXT_INFO_ERR __ERR_STR(clGetContextInfo) -#define __GET_EVENT_INFO_ERR __ERR_STR(clGetEventInfo) -#define __GET_EVENT_PROFILE_INFO_ERR __ERR_STR(clGetEventProfileInfo) -#define __GET_MEM_OBJECT_INFO_ERR __ERR_STR(clGetMemObjectInfo) -#define __GET_IMAGE_INFO_ERR __ERR_STR(clGetImageInfo) -#define __GET_SAMPLER_INFO_ERR __ERR_STR(clGetSamplerInfo) -#define __GET_KERNEL_INFO_ERR __ERR_STR(clGetKernelInfo) -#if defined(CL_VERSION_1_2) -#define __GET_KERNEL_ARG_INFO_ERR __ERR_STR(clGetKernelArgInfo) -#endif // #if defined(CL_VERSION_1_2) -#define __GET_KERNEL_WORK_GROUP_INFO_ERR __ERR_STR(clGetKernelWorkGroupInfo) -#define __GET_PROGRAM_INFO_ERR __ERR_STR(clGetProgramInfo) -#define __GET_PROGRAM_BUILD_INFO_ERR __ERR_STR(clGetProgramBuildInfo) -#define __GET_COMMAND_QUEUE_INFO_ERR __ERR_STR(clGetCommandQueueInfo) - -#define __CREATE_CONTEXT_ERR __ERR_STR(clCreateContext) -#define __CREATE_CONTEXT_FROM_TYPE_ERR __ERR_STR(clCreateContextFromType) -#define __GET_SUPPORTED_IMAGE_FORMATS_ERR __ERR_STR(clGetSupportedImageFormats) - -#define __CREATE_BUFFER_ERR __ERR_STR(clCreateBuffer) -#define __COPY_ERR __ERR_STR(cl::copy) -#define __CREATE_SUBBUFFER_ERR __ERR_STR(clCreateSubBuffer) -#define __CREATE_GL_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer) -#define __CREATE_GL_RENDER_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer) -#define __GET_GL_OBJECT_INFO_ERR __ERR_STR(clGetGLObjectInfo) -#if defined(CL_VERSION_1_2) -#define __CREATE_IMAGE_ERR __ERR_STR(clCreateImage) -#define __CREATE_GL_TEXTURE_ERR __ERR_STR(clCreateFromGLTexture) -#define __IMAGE_DIMENSION_ERR __ERR_STR(Incorrect image dimensions) -#endif // #if defined(CL_VERSION_1_2) -#define __CREATE_SAMPLER_ERR __ERR_STR(clCreateSampler) -#define __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR __ERR_STR(clSetMemObjectDestructorCallback) - -#define __CREATE_USER_EVENT_ERR __ERR_STR(clCreateUserEvent) -#define __SET_USER_EVENT_STATUS_ERR __ERR_STR(clSetUserEventStatus) -#define __SET_EVENT_CALLBACK_ERR __ERR_STR(clSetEventCallback) -#define __WAIT_FOR_EVENTS_ERR __ERR_STR(clWaitForEvents) - -#define __CREATE_KERNEL_ERR __ERR_STR(clCreateKernel) -#define __SET_KERNEL_ARGS_ERR __ERR_STR(clSetKernelArg) -#define __CREATE_PROGRAM_WITH_SOURCE_ERR __ERR_STR(clCreateProgramWithSource) -#define __CREATE_PROGRAM_WITH_BINARY_ERR __ERR_STR(clCreateProgramWithBinary) -#if defined(CL_VERSION_1_2) -#define __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR __ERR_STR(clCreateProgramWithBuiltInKernels) -#endif // #if defined(CL_VERSION_1_2) -#define __BUILD_PROGRAM_ERR __ERR_STR(clBuildProgram) -#if defined(CL_VERSION_1_2) -#define __COMPILE_PROGRAM_ERR __ERR_STR(clCompileProgram) - -#endif // #if defined(CL_VERSION_1_2) -#define __CREATE_KERNELS_IN_PROGRAM_ERR __ERR_STR(clCreateKernelsInProgram) - -#define __CREATE_COMMAND_QUEUE_ERR __ERR_STR(clCreateCommandQueue) -#define __SET_COMMAND_QUEUE_PROPERTY_ERR __ERR_STR(clSetCommandQueueProperty) -#define __ENQUEUE_READ_BUFFER_ERR __ERR_STR(clEnqueueReadBuffer) -#define __ENQUEUE_READ_BUFFER_RECT_ERR __ERR_STR(clEnqueueReadBufferRect) -#define __ENQUEUE_WRITE_BUFFER_ERR __ERR_STR(clEnqueueWriteBuffer) -#define __ENQUEUE_WRITE_BUFFER_RECT_ERR __ERR_STR(clEnqueueWriteBufferRect) -#define __ENQEUE_COPY_BUFFER_ERR __ERR_STR(clEnqueueCopyBuffer) -#define __ENQEUE_COPY_BUFFER_RECT_ERR __ERR_STR(clEnqueueCopyBufferRect) -#define __ENQUEUE_FILL_BUFFER_ERR __ERR_STR(clEnqueueFillBuffer) -#define __ENQUEUE_READ_IMAGE_ERR __ERR_STR(clEnqueueReadImage) -#define __ENQUEUE_WRITE_IMAGE_ERR __ERR_STR(clEnqueueWriteImage) -#define __ENQUEUE_COPY_IMAGE_ERR __ERR_STR(clEnqueueCopyImage) -#define __ENQUEUE_FILL_IMAGE_ERR __ERR_STR(clEnqueueFillImage) -#define __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR __ERR_STR(clEnqueueCopyImageToBuffer) -#define __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR __ERR_STR(clEnqueueCopyBufferToImage) -#define __ENQUEUE_MAP_BUFFER_ERR __ERR_STR(clEnqueueMapBuffer) -#define __ENQUEUE_MAP_IMAGE_ERR __ERR_STR(clEnqueueMapImage) -#define __ENQUEUE_UNMAP_MEM_OBJECT_ERR __ERR_STR(clEnqueueUnMapMemObject) -#define __ENQUEUE_NDRANGE_KERNEL_ERR __ERR_STR(clEnqueueNDRangeKernel) -#define __ENQUEUE_TASK_ERR __ERR_STR(clEnqueueTask) -#define __ENQUEUE_NATIVE_KERNEL __ERR_STR(clEnqueueNativeKernel) -#if defined(CL_VERSION_1_2) -#define __ENQUEUE_MIGRATE_MEM_OBJECTS_ERR __ERR_STR(clEnqueueMigrateMemObjects) -#endif // #if defined(CL_VERSION_1_2) - -#define __ENQUEUE_ACQUIRE_GL_ERR __ERR_STR(clEnqueueAcquireGLObjects) -#define __ENQUEUE_RELEASE_GL_ERR __ERR_STR(clEnqueueReleaseGLObjects) - - -#define __RETAIN_ERR __ERR_STR(Retain Object) -#define __RELEASE_ERR __ERR_STR(Release Object) -#define __FLUSH_ERR __ERR_STR(clFlush) -#define __FINISH_ERR __ERR_STR(clFinish) -#define __VECTOR_CAPACITY_ERR __ERR_STR(Vector capacity error) - -/** - * CL 1.2 version that uses device fission. - */ -#if defined(CL_VERSION_1_2) -#define __CREATE_SUB_DEVICES __ERR_STR(clCreateSubDevices) -#else -#define __CREATE_SUB_DEVICES __ERR_STR(clCreateSubDevicesEXT) -#endif // #if defined(CL_VERSION_1_2) - -/** - * Deprecated APIs for 1.2 - */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) -#define __ENQUEUE_MARKER_ERR __ERR_STR(clEnqueueMarker) -#define __ENQUEUE_WAIT_FOR_EVENTS_ERR __ERR_STR(clEnqueueWaitForEvents) -#define __ENQUEUE_BARRIER_ERR __ERR_STR(clEnqueueBarrier) -#define __UNLOAD_COMPILER_ERR __ERR_STR(clUnloadCompiler) -#define __CREATE_GL_TEXTURE_2D_ERR __ERR_STR(clCreateFromGLTexture2D) -#define __CREATE_GL_TEXTURE_3D_ERR __ERR_STR(clCreateFromGLTexture3D) -#define __CREATE_IMAGE2D_ERR __ERR_STR(clCreateImage2D) -#define __CREATE_IMAGE3D_ERR __ERR_STR(clCreateImage3D) -#endif // #if defined(CL_VERSION_1_1) - -#endif // __CL_USER_OVERRIDE_ERROR_STRINGS -//! \endcond - -/** - * CL 1.2 marker and barrier commands - */ -#if defined(CL_VERSION_1_2) -#define __ENQUEUE_MARKER_WAIT_LIST_ERR __ERR_STR(clEnqueueMarkerWithWaitList) -#define __ENQUEUE_BARRIER_WAIT_LIST_ERR __ERR_STR(clEnqueueBarrierWithWaitList) -#endif // #if defined(CL_VERSION_1_2) - -#if !defined(__USE_DEV_STRING) && !defined(__NO_STD_STRING) -typedef std::string STRING_CLASS; -#elif !defined(__USE_DEV_STRING) - -/*! \class string - * \brief Simple string class, that provides a limited subset of std::string - * functionality but avoids many of the issues that come with that class. - - * \note Deprecated. Please use std::string as default or - * re-define the string class to match the std::string - * interface by defining STRING_CLASS - */ -class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED string CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED -{ -private: - ::size_t size_; - char * str_; -public: - //! \brief Constructs an empty string, allocating no memory. - string(void) : size_(0), str_(NULL) - { - } - - /*! \brief Constructs a string populated from an arbitrary value of - * specified size. - * - * An extra '\0' is added, in case none was contained in str. - * - * \param str the initial value of the string instance. Note that '\0' - * characters receive no special treatment. If NULL, - * the string is left empty, with a size of 0. - * - * \param size the number of characters to copy from str. - */ - string(const char * str, ::size_t size) : - size_(size), - str_(NULL) - { - if( size > 0 ) { - str_ = new char[size_+1]; - if (str_ != NULL) { - memcpy(str_, str, size_ * sizeof(char)); - str_[size_] = '\0'; - } - else { - size_ = 0; - } - } - } - - /*! \brief Constructs a string populated from a null-terminated value. - * - * \param str the null-terminated initial value of the string instance. - * If NULL, the string is left empty, with a size of 0. - */ - string(const char * str) : - size_(0), - str_(NULL) - { - if( str ) { - size_= ::strlen(str); - } - if( size_ > 0 ) { - str_ = new char[size_ + 1]; - if (str_ != NULL) { - memcpy(str_, str, (size_ + 1) * sizeof(char)); - } - } - } - - void resize( ::size_t n ) - { - if( size_ == n ) { - return; - } - if (n == 0) { - if( str_ ) { - delete [] str_; - } - str_ = NULL; - size_ = 0; - } - else { - char *newString = new char[n + 1]; - int copySize = n; - if( size_ < n ) { - copySize = size_; - } - size_ = n; - - if(str_) { - memcpy(newString, str_, (copySize + 1) * sizeof(char)); - } - if( copySize < size_ ) { - memset(newString + copySize, 0, size_ - copySize); - } - newString[size_] = '\0'; - - delete [] str_; - str_ = newString; - } - } - - const char& operator[] ( ::size_t pos ) const - { - return str_[pos]; - } - - char& operator[] ( ::size_t pos ) - { - return str_[pos]; - } - - /*! \brief Copies the value of another string to this one. - * - * \param rhs the string to copy. - * - * \returns a reference to the modified instance. - */ - string& operator=(const string& rhs) - { - if (this == &rhs) { - return *this; - } - - if( str_ != NULL ) { - delete [] str_; - str_ = NULL; - size_ = 0; - } - - if (rhs.size_ == 0 || rhs.str_ == NULL) { - str_ = NULL; - size_ = 0; - } - else { - str_ = new char[rhs.size_ + 1]; - size_ = rhs.size_; - - if (str_ != NULL) { - memcpy(str_, rhs.str_, (size_ + 1) * sizeof(char)); - } - else { - size_ = 0; - } - } - - return *this; - } - - /*! \brief Constructs a string by copying the value of another instance. - * - * \param rhs the string to copy. - */ - string(const string& rhs) : - size_(0), - str_(NULL) - { - *this = rhs; - } - - //! \brief Destructor - frees memory used to hold the current value. - ~string() - { - delete[] str_; - str_ = NULL; - } - - //! \brief Queries the length of the string, excluding any added '\0's. - ::size_t size(void) const { return size_; } - - //! \brief Queries the length of the string, excluding any added '\0's. - ::size_t length(void) const { return size(); } - - /*! \brief Returns a pointer to the private copy held by this instance, - * or "" if empty/unset. - */ - const char * c_str(void) const { return (str_) ? str_ : "";} -}; -typedef cl::string STRING_CLASS; -#endif // #elif !defined(__USE_DEV_STRING) - -#if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR) -#define VECTOR_CLASS std::vector -#elif !defined(__USE_DEV_VECTOR) -#define VECTOR_CLASS cl::vector - -#if !defined(__MAX_DEFAULT_VECTOR_SIZE) -#define __MAX_DEFAULT_VECTOR_SIZE 10 -#endif - -/*! \class vector - * \brief Fixed sized vector implementation that mirroring - * - * \note Deprecated. Please use std::vector as default or - * re-define the vector class to match the std::vector - * interface by defining VECTOR_CLASS - - * \note Not recommended for use with custom objects as - * current implementation will construct N elements - * - * std::vector functionality. - * \brief Fixed sized vector compatible with std::vector. - * - * \note - * This differs from std::vector<> not just in memory allocation, - * but also in terms of when members are constructed, destroyed, - * and assigned instead of being copy constructed. - * - * \param T type of element contained in the vector. - * - * \param N maximum size of the vector. - */ -template -class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED vector CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED -{ -private: - T data_[N]; - unsigned int size_; - -public: - //! \brief Constructs an empty vector with no memory allocated. - vector() : - size_(static_cast(0)) - {} - - //! \brief Deallocates the vector's memory and destroys all of its elements. - ~vector() - { - clear(); - } - - //! \brief Returns the number of elements currently contained. - unsigned int size(void) const - { - return size_; - } - - /*! \brief Empties the vector of all elements. - * \note - * This does not deallocate memory but will invoke destructors - * on contained elements. - */ - void clear() - { - while(!empty()) { - pop_back(); - } - } - - /*! \brief Appends an element after the last valid element. - * Calling this on a vector that has reached capacity will throw an - * exception if exceptions are enabled. - */ - void push_back (const T& x) - { - if (size() < N) { - new (&data_[size_]) T(x); - size_++; - } else { - detail::errHandler(CL_MEM_OBJECT_ALLOCATION_FAILURE, __VECTOR_CAPACITY_ERR); - } - } - - /*! \brief Removes the last valid element from the vector. - * Calling this on an empty vector will throw an exception - * if exceptions are enabled. - */ - void pop_back(void) - { - if (size_ != 0) { - --size_; - data_[size_].~T(); - } else { - detail::errHandler(CL_MEM_OBJECT_ALLOCATION_FAILURE, __VECTOR_CAPACITY_ERR); - } - } - - /*! \brief Constructs with a value copied from another. - * - * \param vec the vector to copy. - */ - vector(const vector& vec) : - size_(vec.size_) - { - if (size_ != 0) { - assign(vec.begin(), vec.end()); - } - } - - /*! \brief Constructs with a specified number of initial elements. - * - * \param size number of initial elements. - * - * \param val value of initial elements. - */ - vector(unsigned int size, const T& val = T()) : - size_(0) - { - for (unsigned int i = 0; i < size; i++) { - push_back(val); - } - } - - /*! \brief Overwrites the current content with that copied from another - * instance. - * - * \param rhs vector to copy. - * - * \returns a reference to this. - */ - vector& operator=(const vector& rhs) - { - if (this == &rhs) { - return *this; - } - - if (rhs.size_ != 0) { - assign(rhs.begin(), rhs.end()); - } else { - clear(); - } - - return *this; - } - - /*! \brief Tests equality against another instance. - * - * \param vec the vector against which to compare. - */ - bool operator==(vector &vec) - { - if (size() != vec.size()) { - return false; - } - - for( unsigned int i = 0; i < size(); ++i ) { - if( operator[](i) != vec[i] ) { - return false; - } - } - return true; - } - - //! \brief Conversion operator to T*. - operator T* () { return data_; } - - //! \brief Conversion operator to const T*. - operator const T* () const { return data_; } - - //! \brief Tests whether this instance has any elements. - bool empty (void) const - { - return size_==0; - } - - //! \brief Returns the maximum number of elements this instance can hold. - unsigned int max_size (void) const - { - return N; - } - - //! \brief Returns the maximum number of elements this instance can hold. - unsigned int capacity () const - { - return N; - } - - /*! \brief Returns a reference to a given element. - * - * \param index which element to access. * - * \note - * The caller is responsible for ensuring index is >= 0 and < size(). - */ - T& operator[](int index) - { - return data_[index]; - } - - /*! \brief Returns a const reference to a given element. - * - * \param index which element to access. - * - * \note - * The caller is responsible for ensuring index is >= 0 and < size(). - */ - const T& operator[](int index) const - { - return data_[index]; - } - - /*! \brief Assigns elements of the vector based on a source iterator range. - * - * \param start Beginning iterator of source range - * \param end Enditerator of source range - * - * \note - * Will throw an exception if exceptions are enabled and size exceeded. - */ - template - void assign(I start, I end) - { - clear(); - while(start != end) { - push_back(*start); - start++; - } - } - - /*! \class iterator - * \brief Const iterator class for vectors - */ - class iterator - { - private: - const vector *vec_; - int index_; - - /** - * Internal iterator constructor to capture reference - * to the vector it iterates over rather than taking - * the vector by copy. - */ - iterator (const vector &vec, int index) : - vec_(&vec) - { - if( !vec.empty() ) { - index_ = index; - } else { - index_ = -1; - } - } - - public: - iterator(void) : - index_(-1), - vec_(NULL) - { - } - - iterator(const iterator& rhs) : - vec_(rhs.vec_), - index_(rhs.index_) - { - } - - ~iterator(void) {} - - static iterator begin(const cl::vector &vec) - { - iterator i(vec, 0); - - return i; - } - - static iterator end(const cl::vector &vec) - { - iterator i(vec, vec.size()); - - return i; - } - - bool operator==(iterator i) - { - return ((vec_ == i.vec_) && - (index_ == i.index_)); - } - - bool operator!=(iterator i) - { - return (!(*this==i)); - } - - iterator& operator++() - { - ++index_; - return *this; - } - - iterator operator++(int) - { - iterator retVal(*this); - ++index_; - return retVal; - } - - iterator& operator--() - { - --index_; - return *this; - } - - iterator operator--(int) - { - iterator retVal(*this); - --index_; - return retVal; - } - - const T& operator *() const - { - return (*vec_)[index_]; - } - }; - - iterator begin(void) - { - return iterator::begin(*this); - } - - iterator begin(void) const - { - return iterator::begin(*this); - } - - iterator end(void) - { - return iterator::end(*this); - } - - iterator end(void) const - { - return iterator::end(*this); - } - - T& front(void) - { - return data_[0]; - } - - T& back(void) - { - return data_[size_]; - } - - const T& front(void) const - { - return data_[0]; - } - - const T& back(void) const - { - return data_[size_-1]; - } -}; -#endif // #if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR) - - - - - -namespace detail { -#define __DEFAULT_NOT_INITIALIZED 1 -#define __DEFAULT_BEING_INITIALIZED 2 -#define __DEFAULT_INITIALIZED 4 - - /* - * Compare and exchange primitives are needed for handling of defaults - */ - inline int compare_exchange(volatile int * dest, int exchange, int comparand) - { -#ifdef _WIN32 - return (int)(InterlockedCompareExchange( - (volatile long*)dest, - (long)exchange, - (long)comparand)); -#elif defined(__APPLE__) || defined(__MACOSX) - return OSAtomicOr32Orig((uint32_t)exchange, (volatile uint32_t*)dest); -#else // !_WIN32 || defined(__APPLE__) || defined(__MACOSX) - return (__sync_val_compare_and_swap( - dest, - comparand, - exchange)); -#endif // !_WIN32 - } - - inline void fence() { _mm_mfence(); } -}; // namespace detail - - -/*! \brief class used to interface between C++ and - * OpenCL C calls that require arrays of size_t values, whose - * size is known statically. - */ -template -class size_t -{ -private: - ::size_t data_[N]; - -public: - //! \brief Initialize size_t to all 0s - size_t() - { - for( int i = 0; i < N; ++i ) { - data_[i] = 0; - } - } - - ::size_t& operator[](int index) - { - return data_[index]; - } - - const ::size_t& operator[](int index) const - { - return data_[index]; - } - - //! \brief Conversion operator to T*. - operator ::size_t* () { return data_; } - - //! \brief Conversion operator to const T*. - operator const ::size_t* () const { return data_; } -}; - -namespace detail { - -// Generic getInfoHelper. The final parameter is used to guide overload -// resolution: the actual parameter passed is an int, which makes this -// a worse conversion sequence than a specialization that declares the -// parameter as an int. -template -inline cl_int getInfoHelper(Functor f, cl_uint name, T* param, long) -{ - return f(name, sizeof(T), param, NULL); -} - -// Specialized getInfoHelper for VECTOR_CLASS params -template -inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS* param, long) -{ - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - T* value = (T*) alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) { - return err; - } - - param->assign(&value[0], &value[required/sizeof(T)]); - return CL_SUCCESS; -} - -/* Specialization for reference-counted types. This depends on the - * existence of Wrapper::cl_type, and none of the other types having the - * cl_type member. Note that simplify specifying the parameter as Wrapper - * does not work, because when using a derived type (e.g. Context) the generic - * template will provide a better match. - */ -template -inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS* param, int, typename T::cl_type = 0) -{ - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - typename T::cl_type * value = (typename T::cl_type *) alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) { - return err; - } - - ::size_t elements = required / sizeof(typename T::cl_type); - param->assign(&value[0], &value[elements]); - for (::size_t i = 0; i < elements; i++) - { - if (value[i] != NULL) - { - err = (*param)[i].retain(); - if (err != CL_SUCCESS) { - return err; - } - } - } - return CL_SUCCESS; -} - -// Specialized for getInfo -template -inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS* param, int) -{ - cl_int err = f(name, param->size() * sizeof(char *), &(*param)[0], NULL); - - if (err != CL_SUCCESS) { - return err; - } - - return CL_SUCCESS; -} - -// Specialized GetInfoHelper for STRING_CLASS params -template -inline cl_int getInfoHelper(Func f, cl_uint name, STRING_CLASS* param, long) -{ - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - char* value = (char*) alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) { - return err; - } - - *param = value; - return CL_SUCCESS; -} - -// Specialized GetInfoHelper for cl::size_t params -template -inline cl_int getInfoHelper(Func f, cl_uint name, size_t* param, long) -{ - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - ::size_t* value = (::size_t*) alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) { - return err; - } - - for(int i = 0; i < N; ++i) { - (*param)[i] = value[i]; - } - - return CL_SUCCESS; -} - -template struct ReferenceHandler; - -/* Specialization for reference-counted types. This depends on the - * existence of Wrapper::cl_type, and none of the other types having the - * cl_type member. Note that simplify specifying the parameter as Wrapper - * does not work, because when using a derived type (e.g. Context) the generic - * template will provide a better match. - */ -template -inline cl_int getInfoHelper(Func f, cl_uint name, T* param, int, typename T::cl_type = 0) -{ - typename T::cl_type value; - cl_int err = f(name, sizeof(value), &value, NULL); - if (err != CL_SUCCESS) { - return err; - } - *param = value; - if (value != NULL) - { - err = param->retain(); - if (err != CL_SUCCESS) { - return err; - } - } - return CL_SUCCESS; -} - -#define __PARAM_NAME_INFO_1_0(F) \ - F(cl_platform_info, CL_PLATFORM_PROFILE, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_VERSION, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_NAME, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_VENDOR, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_EXTENSIONS, STRING_CLASS) \ - \ - F(cl_device_info, CL_DEVICE_TYPE, cl_device_type) \ - F(cl_device_info, CL_DEVICE_VENDOR_ID, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_GROUP_SIZE, ::size_t) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_SIZES, VECTOR_CLASS< ::size_t>) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint) \ - F(cl_device_info, CL_DEVICE_ADDRESS_BITS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_READ_IMAGE_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_MEM_ALLOC_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_WIDTH, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_HEIGHT, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_WIDTH, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_HEIGHT, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_DEPTH, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE_SUPPORT, cl_bool) \ - F(cl_device_info, CL_DEVICE_MAX_PARAMETER_SIZE, ::size_t) \ - F(cl_device_info, CL_DEVICE_MAX_SAMPLERS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MEM_BASE_ADDR_ALIGN, cl_uint) \ - F(cl_device_info, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, cl_uint) \ - F(cl_device_info, CL_DEVICE_SINGLE_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, cl_device_mem_cache_type) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cl_uint)\ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_MAX_CONSTANT_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_LOCAL_MEM_TYPE, cl_device_local_mem_type) \ - F(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_ERROR_CORRECTION_SUPPORT, cl_bool) \ - F(cl_device_info, CL_DEVICE_PROFILING_TIMER_RESOLUTION, ::size_t) \ - F(cl_device_info, CL_DEVICE_ENDIAN_LITTLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_AVAILABLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_COMPILER_AVAILABLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_EXECUTION_CAPABILITIES, cl_device_exec_capabilities) \ - F(cl_device_info, CL_DEVICE_QUEUE_PROPERTIES, cl_command_queue_properties) \ - F(cl_device_info, CL_DEVICE_PLATFORM, cl_platform_id) \ - F(cl_device_info, CL_DEVICE_NAME, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_VENDOR, STRING_CLASS) \ - F(cl_device_info, CL_DRIVER_VERSION, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_PROFILE, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_VERSION, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_EXTENSIONS, STRING_CLASS) \ - \ - F(cl_context_info, CL_CONTEXT_REFERENCE_COUNT, cl_uint) \ - F(cl_context_info, CL_CONTEXT_DEVICES, VECTOR_CLASS) \ - F(cl_context_info, CL_CONTEXT_PROPERTIES, VECTOR_CLASS) \ - \ - F(cl_event_info, CL_EVENT_COMMAND_QUEUE, cl::CommandQueue) \ - F(cl_event_info, CL_EVENT_COMMAND_TYPE, cl_command_type) \ - F(cl_event_info, CL_EVENT_REFERENCE_COUNT, cl_uint) \ - F(cl_event_info, CL_EVENT_COMMAND_EXECUTION_STATUS, cl_uint) \ - \ - F(cl_profiling_info, CL_PROFILING_COMMAND_QUEUED, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_SUBMIT, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_START, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_END, cl_ulong) \ - \ - F(cl_mem_info, CL_MEM_TYPE, cl_mem_object_type) \ - F(cl_mem_info, CL_MEM_FLAGS, cl_mem_flags) \ - F(cl_mem_info, CL_MEM_SIZE, ::size_t) \ - F(cl_mem_info, CL_MEM_HOST_PTR, void*) \ - F(cl_mem_info, CL_MEM_MAP_COUNT, cl_uint) \ - F(cl_mem_info, CL_MEM_REFERENCE_COUNT, cl_uint) \ - F(cl_mem_info, CL_MEM_CONTEXT, cl::Context) \ - \ - F(cl_image_info, CL_IMAGE_FORMAT, cl_image_format) \ - F(cl_image_info, CL_IMAGE_ELEMENT_SIZE, ::size_t) \ - F(cl_image_info, CL_IMAGE_ROW_PITCH, ::size_t) \ - F(cl_image_info, CL_IMAGE_SLICE_PITCH, ::size_t) \ - F(cl_image_info, CL_IMAGE_WIDTH, ::size_t) \ - F(cl_image_info, CL_IMAGE_HEIGHT, ::size_t) \ - F(cl_image_info, CL_IMAGE_DEPTH, ::size_t) \ - \ - F(cl_sampler_info, CL_SAMPLER_REFERENCE_COUNT, cl_uint) \ - F(cl_sampler_info, CL_SAMPLER_CONTEXT, cl::Context) \ - F(cl_sampler_info, CL_SAMPLER_NORMALIZED_COORDS, cl_addressing_mode) \ - F(cl_sampler_info, CL_SAMPLER_ADDRESSING_MODE, cl_filter_mode) \ - F(cl_sampler_info, CL_SAMPLER_FILTER_MODE, cl_bool) \ - \ - F(cl_program_info, CL_PROGRAM_REFERENCE_COUNT, cl_uint) \ - F(cl_program_info, CL_PROGRAM_CONTEXT, cl::Context) \ - F(cl_program_info, CL_PROGRAM_NUM_DEVICES, cl_uint) \ - F(cl_program_info, CL_PROGRAM_DEVICES, VECTOR_CLASS) \ - F(cl_program_info, CL_PROGRAM_SOURCE, STRING_CLASS) \ - F(cl_program_info, CL_PROGRAM_BINARY_SIZES, VECTOR_CLASS< ::size_t>) \ - F(cl_program_info, CL_PROGRAM_BINARIES, VECTOR_CLASS) \ - \ - F(cl_program_build_info, CL_PROGRAM_BUILD_STATUS, cl_build_status) \ - F(cl_program_build_info, CL_PROGRAM_BUILD_OPTIONS, STRING_CLASS) \ - F(cl_program_build_info, CL_PROGRAM_BUILD_LOG, STRING_CLASS) \ - \ - F(cl_kernel_info, CL_KERNEL_FUNCTION_NAME, STRING_CLASS) \ - F(cl_kernel_info, CL_KERNEL_NUM_ARGS, cl_uint) \ - F(cl_kernel_info, CL_KERNEL_REFERENCE_COUNT, cl_uint) \ - F(cl_kernel_info, CL_KERNEL_CONTEXT, cl::Context) \ - F(cl_kernel_info, CL_KERNEL_PROGRAM, cl::Program) \ - \ - F(cl_kernel_work_group_info, CL_KERNEL_WORK_GROUP_SIZE, ::size_t) \ - F(cl_kernel_work_group_info, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, cl::size_t<3>) \ - F(cl_kernel_work_group_info, CL_KERNEL_LOCAL_MEM_SIZE, cl_ulong) \ - \ - F(cl_command_queue_info, CL_QUEUE_CONTEXT, cl::Context) \ - F(cl_command_queue_info, CL_QUEUE_DEVICE, cl::Device) \ - F(cl_command_queue_info, CL_QUEUE_REFERENCE_COUNT, cl_uint) \ - F(cl_command_queue_info, CL_QUEUE_PROPERTIES, cl_command_queue_properties) - -#if defined(CL_VERSION_1_1) -#define __PARAM_NAME_INFO_1_1(F) \ - F(cl_context_info, CL_CONTEXT_NUM_DEVICES, cl_uint)\ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, cl_uint) \ - F(cl_device_info, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_HOST_UNIFIED_MEMORY, cl_bool) \ - F(cl_device_info, CL_DEVICE_OPENCL_C_VERSION, STRING_CLASS) \ - \ - F(cl_mem_info, CL_MEM_ASSOCIATED_MEMOBJECT, cl::Memory) \ - F(cl_mem_info, CL_MEM_OFFSET, ::size_t) \ - \ - F(cl_kernel_work_group_info, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, ::size_t) \ - F(cl_kernel_work_group_info, CL_KERNEL_PRIVATE_MEM_SIZE, cl_ulong) \ - \ - F(cl_event_info, CL_EVENT_CONTEXT, cl::Context) -#endif // CL_VERSION_1_1 - - -#if defined(CL_VERSION_1_2) -#define __PARAM_NAME_INFO_1_2(F) \ - F(cl_image_info, CL_IMAGE_BUFFER, cl::Buffer) \ - \ - F(cl_program_info, CL_PROGRAM_NUM_KERNELS, ::size_t) \ - F(cl_program_info, CL_PROGRAM_KERNEL_NAMES, STRING_CLASS) \ - \ - F(cl_program_build_info, CL_PROGRAM_BINARY_TYPE, cl_program_binary_type) \ - \ - F(cl_kernel_info, CL_KERNEL_ATTRIBUTES, STRING_CLASS) \ - \ - F(cl_kernel_arg_info, CL_KERNEL_ARG_ADDRESS_QUALIFIER, cl_kernel_arg_address_qualifier) \ - F(cl_kernel_arg_info, CL_KERNEL_ARG_ACCESS_QUALIFIER, cl_kernel_arg_access_qualifier) \ - F(cl_kernel_arg_info, CL_KERNEL_ARG_TYPE_NAME, STRING_CLASS) \ - F(cl_kernel_arg_info, CL_KERNEL_ARG_NAME, STRING_CLASS) \ - \ - F(cl_device_info, CL_DEVICE_PARENT_DEVICE, cl_device_id) \ - F(cl_device_info, CL_DEVICE_PARTITION_PROPERTIES, VECTOR_CLASS) \ - F(cl_device_info, CL_DEVICE_PARTITION_TYPE, VECTOR_CLASS) \ - F(cl_device_info, CL_DEVICE_REFERENCE_COUNT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, ::size_t) \ - F(cl_device_info, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, cl_device_affinity_domain) \ - F(cl_device_info, CL_DEVICE_BUILT_IN_KERNELS, STRING_CLASS) -#endif // #if defined(CL_VERSION_1_2) - -#if defined(USE_CL_DEVICE_FISSION) -#define __PARAM_NAME_DEVICE_FISSION(F) \ - F(cl_device_info, CL_DEVICE_PARENT_DEVICE_EXT, cl_device_id) \ - F(cl_device_info, CL_DEVICE_PARTITION_TYPES_EXT, VECTOR_CLASS) \ - F(cl_device_info, CL_DEVICE_AFFINITY_DOMAINS_EXT, VECTOR_CLASS) \ - F(cl_device_info, CL_DEVICE_REFERENCE_COUNT_EXT , cl_uint) \ - F(cl_device_info, CL_DEVICE_PARTITION_STYLE_EXT, VECTOR_CLASS) -#endif // USE_CL_DEVICE_FISSION - -template -struct param_traits {}; - -#define __CL_DECLARE_PARAM_TRAITS(token, param_name, T) \ -struct token; \ -template<> \ -struct param_traits \ -{ \ - enum { value = param_name }; \ - typedef T param_type; \ -}; - -__PARAM_NAME_INFO_1_0(__CL_DECLARE_PARAM_TRAITS) -#if defined(CL_VERSION_1_1) -__PARAM_NAME_INFO_1_1(__CL_DECLARE_PARAM_TRAITS) -#endif // CL_VERSION_1_1 -#if defined(CL_VERSION_1_2) -__PARAM_NAME_INFO_1_2(__CL_DECLARE_PARAM_TRAITS) -#endif // CL_VERSION_1_1 - -#if defined(USE_CL_DEVICE_FISSION) -__PARAM_NAME_DEVICE_FISSION(__CL_DECLARE_PARAM_TRAITS); -#endif // USE_CL_DEVICE_FISSION - -#ifdef CL_PLATFORM_ICD_SUFFIX_KHR -__CL_DECLARE_PARAM_TRAITS(cl_platform_info, CL_PLATFORM_ICD_SUFFIX_KHR, STRING_CLASS) -#endif - -#ifdef CL_DEVICE_PROFILING_TIMER_OFFSET_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, cl_ulong) -#endif - -#ifdef CL_DEVICE_GLOBAL_FREE_MEMORY_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_FREE_MEMORY_AMD, VECTOR_CLASS< ::size_t>) -#endif -#ifdef CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_SIMD_WIDTH_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_WIDTH_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_WAVEFRONT_WIDTH_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_WAVEFRONT_WIDTH_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_LOCAL_MEM_BANKS_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_LOCAL_MEM_BANKS_AMD, cl_uint) -#endif - -#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, cl_uint) -#endif -#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, cl_uint) -#endif -#ifdef CL_DEVICE_REGISTERS_PER_BLOCK_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_REGISTERS_PER_BLOCK_NV, cl_uint) -#endif -#ifdef CL_DEVICE_WARP_SIZE_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_WARP_SIZE_NV, cl_uint) -#endif -#ifdef CL_DEVICE_GPU_OVERLAP_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GPU_OVERLAP_NV, cl_bool) -#endif -#ifdef CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, cl_bool) -#endif -#ifdef CL_DEVICE_INTEGRATED_MEMORY_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_INTEGRATED_MEMORY_NV, cl_bool) -#endif - -// Convenience functions - -template -inline cl_int -getInfo(Func f, cl_uint name, T* param) -{ - return getInfoHelper(f, name, param, 0); -} - -template -struct GetInfoFunctor0 -{ - Func f_; const Arg0& arg0_; - cl_int operator ()( - cl_uint param, ::size_t size, void* value, ::size_t* size_ret) - { return f_(arg0_, param, size, value, size_ret); } -}; - -template -struct GetInfoFunctor1 -{ - Func f_; const Arg0& arg0_; const Arg1& arg1_; - cl_int operator ()( - cl_uint param, ::size_t size, void* value, ::size_t* size_ret) - { return f_(arg0_, arg1_, param, size, value, size_ret); } -}; - -template -inline cl_int -getInfo(Func f, const Arg0& arg0, cl_uint name, T* param) -{ - GetInfoFunctor0 f0 = { f, arg0 }; - return getInfoHelper(f0, name, param, 0); -} - -template -inline cl_int -getInfo(Func f, const Arg0& arg0, const Arg1& arg1, cl_uint name, T* param) -{ - GetInfoFunctor1 f0 = { f, arg0, arg1 }; - return getInfoHelper(f0, name, param, 0); -} - -template -struct ReferenceHandler -{ }; - -#if defined(CL_VERSION_1_2) -/** - * OpenCL 1.2 devices do have retain/release. - */ -template <> -struct ReferenceHandler -{ - /** - * Retain the device. - * \param device A valid device created using createSubDevices - * \return - * CL_SUCCESS if the function executed successfully. - * CL_INVALID_DEVICE if device was not a valid subdevice - * CL_OUT_OF_RESOURCES - * CL_OUT_OF_HOST_MEMORY - */ - static cl_int retain(cl_device_id device) - { return ::clRetainDevice(device); } - /** - * Retain the device. - * \param device A valid device created using createSubDevices - * \return - * CL_SUCCESS if the function executed successfully. - * CL_INVALID_DEVICE if device was not a valid subdevice - * CL_OUT_OF_RESOURCES - * CL_OUT_OF_HOST_MEMORY - */ - static cl_int release(cl_device_id device) - { return ::clReleaseDevice(device); } -}; -#else // #if defined(CL_VERSION_1_2) -/** - * OpenCL 1.1 devices do not have retain/release. - */ -template <> -struct ReferenceHandler -{ - // cl_device_id does not have retain(). - static cl_int retain(cl_device_id) - { return CL_SUCCESS; } - // cl_device_id does not have release(). - static cl_int release(cl_device_id) - { return CL_SUCCESS; } -}; -#endif // #if defined(CL_VERSION_1_2) - -template <> -struct ReferenceHandler -{ - // cl_platform_id does not have retain(). - static cl_int retain(cl_platform_id) - { return CL_SUCCESS; } - // cl_platform_id does not have release(). - static cl_int release(cl_platform_id) - { return CL_SUCCESS; } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_context context) - { return ::clRetainContext(context); } - static cl_int release(cl_context context) - { return ::clReleaseContext(context); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_command_queue queue) - { return ::clRetainCommandQueue(queue); } - static cl_int release(cl_command_queue queue) - { return ::clReleaseCommandQueue(queue); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_mem memory) - { return ::clRetainMemObject(memory); } - static cl_int release(cl_mem memory) - { return ::clReleaseMemObject(memory); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_sampler sampler) - { return ::clRetainSampler(sampler); } - static cl_int release(cl_sampler sampler) - { return ::clReleaseSampler(sampler); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_program program) - { return ::clRetainProgram(program); } - static cl_int release(cl_program program) - { return ::clReleaseProgram(program); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_kernel kernel) - { return ::clRetainKernel(kernel); } - static cl_int release(cl_kernel kernel) - { return ::clReleaseKernel(kernel); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_event event) - { return ::clRetainEvent(event); } - static cl_int release(cl_event event) - { return ::clReleaseEvent(event); } -}; - - -// Extracts version number with major in the upper 16 bits, minor in the lower 16 -static cl_uint getVersion(const char *versionInfo) -{ - int highVersion = 0; - int lowVersion = 0; - int index = 7; - while(versionInfo[index] != '.' ) { - highVersion *= 10; - highVersion += versionInfo[index]-'0'; - ++index; - } - ++index; - while(versionInfo[index] != ' ' ) { - lowVersion *= 10; - lowVersion += versionInfo[index]-'0'; - ++index; - } - return (highVersion << 16) | lowVersion; -} - -static cl_uint getPlatformVersion(cl_platform_id platform) -{ - ::size_t size = 0; - clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 0, NULL, &size); - char *versionInfo = (char *) alloca(size); - clGetPlatformInfo(platform, CL_PLATFORM_VERSION, size, &versionInfo[0], &size); - return getVersion(versionInfo); -} - -static cl_uint getDevicePlatformVersion(cl_device_id device) -{ - cl_platform_id platform; - clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), &platform, NULL); - return getPlatformVersion(platform); -} - -#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) -static cl_uint getContextPlatformVersion(cl_context context) -{ - // The platform cannot be queried directly, so we first have to grab a - // device and obtain its context - ::size_t size = 0; - clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size); - if (size == 0) - return 0; - cl_device_id *devices = (cl_device_id *) alloca(size); - clGetContextInfo(context, CL_CONTEXT_DEVICES, size, devices, NULL); - return getDevicePlatformVersion(devices[0]); -} -#endif // #if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - -template -class Wrapper -{ -public: - typedef T cl_type; - -protected: - cl_type object_; - -public: - Wrapper() : object_(NULL) { } - - Wrapper(const cl_type &obj) : object_(obj) { } - - ~Wrapper() - { - if (object_ != NULL) { release(); } - } - - Wrapper(const Wrapper& rhs) - { - object_ = rhs.object_; - if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } - } - - Wrapper& operator = (const Wrapper& rhs) - { - if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } - object_ = rhs.object_; - if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } - return *this; - } - - Wrapper& operator = (const cl_type &rhs) - { - if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } - object_ = rhs; - return *this; - } - - cl_type operator ()() const { return object_; } - - cl_type& operator ()() { return object_; } - -protected: - template - friend inline cl_int getInfoHelper(Func, cl_uint, U*, int, typename U::cl_type); - - cl_int retain() const - { - return ReferenceHandler::retain(object_); - } - - cl_int release() const - { - return ReferenceHandler::release(object_); - } -}; - -template <> -class Wrapper -{ -public: - typedef cl_device_id cl_type; - -protected: - cl_type object_; - bool referenceCountable_; - - static bool isReferenceCountable(cl_device_id device) - { - bool retVal = false; - if (device != NULL) { - int version = getDevicePlatformVersion(device); - if(version > ((1 << 16) + 1)) { - retVal = true; - } - } - return retVal; - } - -public: - Wrapper() : object_(NULL), referenceCountable_(false) - { - } - - Wrapper(const cl_type &obj) : object_(obj), referenceCountable_(false) - { - referenceCountable_ = isReferenceCountable(obj); - } - - ~Wrapper() - { - if (object_ != NULL) { release(); } - } - - Wrapper(const Wrapper& rhs) - { - object_ = rhs.object_; - referenceCountable_ = isReferenceCountable(object_); - if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } - } - - Wrapper& operator = (const Wrapper& rhs) - { - if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } - object_ = rhs.object_; - referenceCountable_ = rhs.referenceCountable_; - if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } - return *this; - } - - Wrapper& operator = (const cl_type &rhs) - { - if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } - object_ = rhs; - referenceCountable_ = isReferenceCountable(object_); - return *this; - } - - cl_type operator ()() const { return object_; } - - cl_type& operator ()() { return object_; } - -protected: - template - friend inline cl_int getInfoHelper(Func, cl_uint, U*, int, typename U::cl_type); - - template - friend inline cl_int getInfoHelper(Func, cl_uint, VECTOR_CLASS*, int, typename U::cl_type); - - cl_int retain() const - { - if( referenceCountable_ ) { - return ReferenceHandler::retain(object_); - } - else { - return CL_SUCCESS; - } - } - - cl_int release() const - { - if( referenceCountable_ ) { - return ReferenceHandler::release(object_); - } - else { - return CL_SUCCESS; - } - } -}; - -} // namespace detail -//! \endcond - -/*! \stuct ImageFormat - * \brief Adds constructors and member functions for cl_image_format. - * - * \see cl_image_format - */ -struct ImageFormat : public cl_image_format -{ - //! \brief Default constructor - performs no initialization. - ImageFormat(){} - - //! \brief Initializing constructor. - ImageFormat(cl_channel_order order, cl_channel_type type) - { - image_channel_order = order; - image_channel_data_type = type; - } - - //! \brief Assignment operator. - ImageFormat& operator = (const ImageFormat& rhs) - { - if (this != &rhs) { - this->image_channel_data_type = rhs.image_channel_data_type; - this->image_channel_order = rhs.image_channel_order; - } - return *this; - } -}; - -/*! \brief Class interface for cl_device_id. - * - * \note Copies of these objects are inexpensive, since they don't 'own' - * any underlying resources or data structures. - * - * \see cl_device_id - */ -class Device : public detail::Wrapper -{ -public: - //! \brief Default constructor - initializes to NULL. - Device() : detail::Wrapper() { } - - /*! \brief Copy constructor. - * - * This simply copies the device ID value, which is an inexpensive operation. - */ - Device(const Device& device) : detail::Wrapper(device) { } - - /*! \brief Constructor from cl_device_id. - * - * This simply copies the device ID value, which is an inexpensive operation. - */ - Device(const cl_device_id &device) : detail::Wrapper(device) { } - - /*! \brief Returns the first device on the default context. - * - * \see Context::getDefault() - */ - static Device getDefault(cl_int * err = NULL); - - /*! \brief Assignment operator from Device. - * - * This simply copies the device ID value, which is an inexpensive operation. - */ - Device& operator = (const Device& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - /*! \brief Assignment operator from cl_device_id. - * - * This simply copies the device ID value, which is an inexpensive operation. - */ - Device& operator = (const cl_device_id& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - //! \brief Wrapper for clGetDeviceInfo(). - template - cl_int getInfo(cl_device_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetDeviceInfo, object_, name, param), - __GET_DEVICE_INFO_ERR); - } - - //! \brief Wrapper for clGetDeviceInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_device_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - /** - * CL 1.2 version - */ -#if defined(CL_VERSION_1_2) - //! \brief Wrapper for clCreateSubDevicesEXT(). - cl_int createSubDevices( - const cl_device_partition_property * properties, - VECTOR_CLASS* devices) - { - cl_uint n = 0; - cl_int err = clCreateSubDevices(object_, properties, 0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_SUB_DEVICES); - } - - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = clCreateSubDevices(object_, properties, n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_SUB_DEVICES); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } -#endif // #if defined(CL_VERSION_1_2) - -/** - * CL 1.1 version that uses device fission. - */ -#if defined(CL_VERSION_1_1) -#if defined(USE_CL_DEVICE_FISSION) - cl_int createSubDevices( - const cl_device_partition_property_ext * properties, - VECTOR_CLASS* devices) - { - typedef CL_API_ENTRY cl_int - ( CL_API_CALL * PFN_clCreateSubDevicesEXT)( - cl_device_id /*in_device*/, - const cl_device_partition_property_ext * /* properties */, - cl_uint /*num_entries*/, - cl_device_id * /*out_devices*/, - cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; - - static PFN_clCreateSubDevicesEXT pfn_clCreateSubDevicesEXT = NULL; - __INIT_CL_EXT_FCN_PTR(clCreateSubDevicesEXT); - - cl_uint n = 0; - cl_int err = pfn_clCreateSubDevicesEXT(object_, properties, 0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_SUB_DEVICES); - } - - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = pfn_clCreateSubDevicesEXT(object_, properties, n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_SUB_DEVICES); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } -#endif // #if defined(USE_CL_DEVICE_FISSION) -#endif // #if defined(CL_VERSION_1_1) -}; - -/*! \brief Class interface for cl_platform_id. - * - * \note Copies of these objects are inexpensive, since they don't 'own' - * any underlying resources or data structures. - * - * \see cl_platform_id - */ -class Platform : public detail::Wrapper -{ -public: - //! \brief Default constructor - initializes to NULL. - Platform() : detail::Wrapper() { } - - /*! \brief Copy constructor. - * - * This simply copies the platform ID value, which is an inexpensive operation. - */ - Platform(const Platform& platform) : detail::Wrapper(platform) { } - - /*! \brief Constructor from cl_platform_id. - * - * This simply copies the platform ID value, which is an inexpensive operation. - */ - Platform(const cl_platform_id &platform) : detail::Wrapper(platform) { } - - /*! \brief Assignment operator from Platform. - * - * This simply copies the platform ID value, which is an inexpensive operation. - */ - Platform& operator = (const Platform& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - /*! \brief Assignment operator from cl_platform_id. - * - * This simply copies the platform ID value, which is an inexpensive operation. - */ - Platform& operator = (const cl_platform_id& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - //! \brief Wrapper for clGetPlatformInfo(). - cl_int getInfo(cl_platform_info name, STRING_CLASS* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetPlatformInfo, object_, name, param), - __GET_PLATFORM_INFO_ERR); - } - - //! \brief Wrapper for clGetPlatformInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_platform_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - /*! \brief Gets a list of devices for this platform. - * - * Wraps clGetDeviceIDs(). - */ - cl_int getDevices( - cl_device_type type, - VECTOR_CLASS* devices) const - { - cl_uint n = 0; - if( devices == NULL ) { - return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_DEVICE_IDS_ERR); - } - cl_int err = ::clGetDeviceIDs(object_, type, 0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = ::clGetDeviceIDs(object_, type, n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } - -#if defined(USE_DX_INTEROP) - /*! \brief Get the list of available D3D10 devices. - * - * \param d3d_device_source. - * - * \param d3d_object. - * - * \param d3d_device_set. - * - * \param devices returns a vector of OpenCL D3D10 devices found. The cl::Device - * values returned in devices can be used to identify a specific OpenCL - * device. If \a devices argument is NULL, this argument is ignored. - * - * \return One of the following values: - * - CL_SUCCESS if the function is executed successfully. - * - * The application can query specific capabilities of the OpenCL device(s) - * returned by cl::getDevices. This can be used by the application to - * determine which device(s) to use. - * - * \note In the case that exceptions are enabled and a return value - * other than CL_SUCCESS is generated, then cl::Error exception is - * generated. - */ - cl_int getDevices( - cl_d3d10_device_source_khr d3d_device_source, - void * d3d_object, - cl_d3d10_device_set_khr d3d_device_set, - VECTOR_CLASS* devices) const - { - typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clGetDeviceIDsFromD3D10KHR)( - cl_platform_id platform, - cl_d3d10_device_source_khr d3d_device_source, - void * d3d_object, - cl_d3d10_device_set_khr d3d_device_set, - cl_uint num_entries, - cl_device_id * devices, - cl_uint* num_devices); - - if( devices == NULL ) { - return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_DEVICE_IDS_ERR); - } - - static PFN_clGetDeviceIDsFromD3D10KHR pfn_clGetDeviceIDsFromD3D10KHR = NULL; - __INIT_CL_EXT_FCN_PTR_PLATFORM(object_, clGetDeviceIDsFromD3D10KHR); - - cl_uint n = 0; - cl_int err = pfn_clGetDeviceIDsFromD3D10KHR( - object_, - d3d_device_source, - d3d_object, - d3d_device_set, - 0, - NULL, - &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = pfn_clGetDeviceIDsFromD3D10KHR( - object_, - d3d_device_source, - d3d_object, - d3d_device_set, - n, - ids, - NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } -#endif - - /*! \brief Gets a list of available platforms. - * - * Wraps clGetPlatformIDs(). - */ - static cl_int get( - VECTOR_CLASS* platforms) - { - cl_uint n = 0; - - if( platforms == NULL ) { - return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_PLATFORM_IDS_ERR); - } - - cl_int err = ::clGetPlatformIDs(0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - cl_platform_id* ids = (cl_platform_id*) alloca( - n * sizeof(cl_platform_id)); - err = ::clGetPlatformIDs(n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - platforms->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } - - /*! \brief Gets the first available platform. - * - * Wraps clGetPlatformIDs(), returning the first result. - */ - static cl_int get( - Platform * platform) - { - cl_uint n = 0; - - if( platform == NULL ) { - return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_PLATFORM_IDS_ERR); - } - - cl_int err = ::clGetPlatformIDs(0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - cl_platform_id* ids = (cl_platform_id*) alloca( - n * sizeof(cl_platform_id)); - err = ::clGetPlatformIDs(n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - *platform = ids[0]; - return CL_SUCCESS; - } - - /*! \brief Gets the first available platform, returning it by value. - * - * Wraps clGetPlatformIDs(), returning the first result. - */ - static Platform get( - cl_int * errResult = NULL) - { - Platform platform; - cl_uint n = 0; - cl_int err = ::clGetPlatformIDs(0, NULL, &n); - if (err != CL_SUCCESS) { - detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - if (errResult != NULL) { - *errResult = err; - } - } - - cl_platform_id* ids = (cl_platform_id*) alloca( - n * sizeof(cl_platform_id)); - err = ::clGetPlatformIDs(n, ids, NULL); - - if (err != CL_SUCCESS) { - detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - if (errResult != NULL) { - *errResult = err; - } - - return ids[0]; - } - - static Platform getDefault( - cl_int *errResult = NULL ) - { - return get(errResult); - } - - -#if defined(CL_VERSION_1_2) - //! \brief Wrapper for clUnloadCompiler(). - cl_int - unloadCompiler() - { - return ::clUnloadPlatformCompiler(object_); - } -#endif // #if defined(CL_VERSION_1_2) -}; // class Platform - -/** - * Deprecated APIs for 1.2 - */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) -/** - * Unload the OpenCL compiler. - * \note Deprecated for OpenCL 1.2. Use Platform::unloadCompiler instead. - */ -inline CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int -UnloadCompiler() CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; -inline cl_int -UnloadCompiler() -{ - return ::clUnloadCompiler(); -} -#endif // #if defined(CL_VERSION_1_1) - -/*! \brief Class interface for cl_context. - * - * \note Copies of these objects are shallow, meaning that the copy will refer - * to the same underlying cl_context as the original. For details, see - * clRetainContext() and clReleaseContext(). - * - * \see cl_context - */ -class Context - : public detail::Wrapper -{ -private: - static volatile int default_initialized_; - static Context default_; - static volatile cl_int default_error_; -public: - /*! \brief Destructor. - * - * This calls clReleaseContext() on the value held by this instance. - */ - ~Context() { } - - /*! \brief Constructs a context including a list of specified devices. - * - * Wraps clCreateContext(). - */ - Context( - const VECTOR_CLASS& devices, - cl_context_properties* properties = NULL, - void (CL_CALLBACK * notifyFptr)( - const char *, - const void *, - ::size_t, - void *) = NULL, - void* data = NULL, - cl_int* err = NULL) - { - cl_int error; - - ::size_t numDevices = devices.size(); - cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); - for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { - deviceIDs[deviceIndex] = (devices[deviceIndex])(); - } - - object_ = ::clCreateContext( - properties, (cl_uint) numDevices, - deviceIDs, - notifyFptr, data, &error); - - detail::errHandler(error, __CREATE_CONTEXT_ERR); - if (err != NULL) { - *err = error; - } - } - - Context( - const Device& device, - cl_context_properties* properties = NULL, - void (CL_CALLBACK * notifyFptr)( - const char *, - const void *, - ::size_t, - void *) = NULL, - void* data = NULL, - cl_int* err = NULL) - { - cl_int error; - - cl_device_id deviceID = device(); - - object_ = ::clCreateContext( - properties, 1, - &deviceID, - notifyFptr, data, &error); - - detail::errHandler(error, __CREATE_CONTEXT_ERR); - if (err != NULL) { - *err = error; - } - } - - /*! \brief Constructs a context including all or a subset of devices of a specified type. - * - * Wraps clCreateContextFromType(). - */ - Context( - cl_device_type type, - cl_context_properties* properties = NULL, - void (CL_CALLBACK * notifyFptr)( - const char *, - const void *, - ::size_t, - void *) = NULL, - void* data = NULL, - cl_int* err = NULL) - { - cl_int error; - -#if !defined(__APPLE__) || !defined(__MACOS) - cl_context_properties prop[4] = {CL_CONTEXT_PLATFORM, 0, 0, 0 }; - - if (properties == NULL) { - // Get a valid platform ID as we cannot send in a blank one - VECTOR_CLASS platforms; - error = Platform::get(&platforms); - if (error != CL_SUCCESS) { - detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != NULL) { - *err = error; - } - return; - } - - // Check the platforms we found for a device of our specified type - cl_context_properties platform_id = 0; - for (unsigned int i = 0; i < platforms.size(); i++) { - - VECTOR_CLASS devices; - -#if defined(__CL_ENABLE_EXCEPTIONS) - try { -#endif - - error = platforms[i].getDevices(type, &devices); - -#if defined(__CL_ENABLE_EXCEPTIONS) - } catch (Error) {} - // Catch if exceptions are enabled as we don't want to exit if first platform has no devices of type - // We do error checking next anyway, and can throw there if needed -#endif - - // Only squash CL_SUCCESS and CL_DEVICE_NOT_FOUND - if (error != CL_SUCCESS && error != CL_DEVICE_NOT_FOUND) { - detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != NULL) { - *err = error; - } - } - - if (devices.size() > 0) { - platform_id = (cl_context_properties)platforms[i](); - break; - } - } - - if (platform_id == 0) { - detail::errHandler(CL_DEVICE_NOT_FOUND, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != NULL) { - *err = CL_DEVICE_NOT_FOUND; - } - return; - } - - prop[1] = platform_id; - properties = &prop[0]; - } -#endif - object_ = ::clCreateContextFromType( - properties, type, notifyFptr, data, &error); - - detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != NULL) { - *err = error; - } - } - - /*! \brief Returns a singleton context including all devices of CL_DEVICE_TYPE_DEFAULT. - * - * \note All calls to this function return the same cl_context as the first. - */ - static Context getDefault(cl_int * err = NULL) - { - int state = detail::compare_exchange( - &default_initialized_, - __DEFAULT_BEING_INITIALIZED, __DEFAULT_NOT_INITIALIZED); - - if (state & __DEFAULT_INITIALIZED) { - if (err != NULL) { - *err = default_error_; - } - return default_; - } - - if (state & __DEFAULT_BEING_INITIALIZED) { - // Assume writes will propagate eventually... - while(default_initialized_ != __DEFAULT_INITIALIZED) { - detail::fence(); - } - - if (err != NULL) { - *err = default_error_; - } - return default_; - } - - cl_int error; - default_ = Context( - CL_DEVICE_TYPE_DEFAULT, - NULL, - NULL, - NULL, - &error); - - detail::fence(); - - default_error_ = error; - // Assume writes will propagate eventually... - default_initialized_ = __DEFAULT_INITIALIZED; - - detail::fence(); - - if (err != NULL) { - *err = default_error_; - } - return default_; - - } - - //! \brief Default constructor - initializes to NULL. - Context() : detail::Wrapper() { } - - /*! \brief Copy constructor. - * - * This calls clRetainContext() on the parameter's cl_context. - */ - Context(const Context& context) : detail::Wrapper(context) { } - - /*! \brief Constructor from cl_context - takes ownership. - * - * This effectively transfers ownership of a refcount on the cl_context - * into the new Context object. - */ - __CL_EXPLICIT_CONSTRUCTORS Context(const cl_context& context) : detail::Wrapper(context) { } - - /*! \brief Assignment operator from Context. - * - * This calls clRetainContext() on the parameter and clReleaseContext() on - * the previous value held by this instance. - */ - Context& operator = (const Context& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - /*! \brief Assignment operator from cl_context - takes ownership. - * - * This effectively transfers ownership of a refcount on the rhs and calls - * clReleaseContext() on the value previously held by this instance. - */ - Context& operator = (const cl_context& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - //! \brief Wrapper for clGetContextInfo(). - template - cl_int getInfo(cl_context_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetContextInfo, object_, name, param), - __GET_CONTEXT_INFO_ERR); - } - - //! \brief Wrapper for clGetContextInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_context_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - /*! \brief Gets a list of supported image formats. - * - * Wraps clGetSupportedImageFormats(). - */ - cl_int getSupportedImageFormats( - cl_mem_flags flags, - cl_mem_object_type type, - VECTOR_CLASS* formats) const - { - cl_uint numEntries; - cl_int err = ::clGetSupportedImageFormats( - object_, - flags, - type, - 0, - NULL, - &numEntries); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); - } - - ImageFormat* value = (ImageFormat*) - alloca(numEntries * sizeof(ImageFormat)); - err = ::clGetSupportedImageFormats( - object_, - flags, - type, - numEntries, - (cl_image_format*) value, - NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); - } - - formats->assign(&value[0], &value[numEntries]); - return CL_SUCCESS; - } -}; - -inline Device Device::getDefault(cl_int * err) -{ - cl_int error; - Device device; - - Context context = Context::getDefault(&error); - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - - if (error != CL_SUCCESS) { - if (err != NULL) { - *err = error; - } - } - else { - device = context.getInfo()[0]; - if (err != NULL) { - *err = CL_SUCCESS; - } - } - - return device; -} - - -#ifdef _WIN32 -__declspec(selectany) volatile int Context::default_initialized_ = __DEFAULT_NOT_INITIALIZED; -__declspec(selectany) Context Context::default_; -__declspec(selectany) volatile cl_int Context::default_error_ = CL_SUCCESS; -#else -__attribute__((weak)) volatile int Context::default_initialized_ = __DEFAULT_NOT_INITIALIZED; -__attribute__((weak)) Context Context::default_; -__attribute__((weak)) volatile cl_int Context::default_error_ = CL_SUCCESS; -#endif - -/*! \brief Class interface for cl_event. - * - * \note Copies of these objects are shallow, meaning that the copy will refer - * to the same underlying cl_event as the original. For details, see - * clRetainEvent() and clReleaseEvent(). - * - * \see cl_event - */ -class Event : public detail::Wrapper -{ -public: - /*! \brief Destructor. - * - * This calls clReleaseEvent() on the value held by this instance. - */ - ~Event() { } - - //! \brief Default constructor - initializes to NULL. - Event() : detail::Wrapper() { } - - /*! \brief Copy constructor. - * - * This calls clRetainEvent() on the parameter's cl_event. - */ - Event(const Event& event) : detail::Wrapper(event) { } - - /*! \brief Constructor from cl_event - takes ownership. - * - * This effectively transfers ownership of a refcount on the cl_event - * into the new Event object. - */ - Event(const cl_event& event) : detail::Wrapper(event) { } - - /*! \brief Assignment operator from cl_event - takes ownership. - * - * This effectively transfers ownership of a refcount on the rhs and calls - * clReleaseEvent() on the value previously held by this instance. - */ - Event& operator = (const Event& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - /*! \brief Assignment operator from cl_event. - * - * This calls clRetainEvent() on the parameter and clReleaseEvent() on - * the previous value held by this instance. - */ - Event& operator = (const cl_event& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - //! \brief Wrapper for clGetEventInfo(). - template - cl_int getInfo(cl_event_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetEventInfo, object_, name, param), - __GET_EVENT_INFO_ERR); - } - - //! \brief Wrapper for clGetEventInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_event_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - //! \brief Wrapper for clGetEventProfilingInfo(). - template - cl_int getProfilingInfo(cl_profiling_info name, T* param) const - { - return detail::errHandler(detail::getInfo( - &::clGetEventProfilingInfo, object_, name, param), - __GET_EVENT_PROFILE_INFO_ERR); - } - - //! \brief Wrapper for clGetEventProfilingInfo() that returns by value. - template typename - detail::param_traits::param_type - getProfilingInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_profiling_info, name>::param_type param; - cl_int result = getProfilingInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - /*! \brief Blocks the calling thread until this event completes. - * - * Wraps clWaitForEvents(). - */ - cl_int wait() const - { - return detail::errHandler( - ::clWaitForEvents(1, &object_), - __WAIT_FOR_EVENTS_ERR); - } - -#if defined(CL_VERSION_1_1) - /*! \brief Registers a user callback function for a specific command execution status. - * - * Wraps clSetEventCallback(). - */ - cl_int setCallback( - cl_int type, - void (CL_CALLBACK * pfn_notify)(cl_event, cl_int, void *), - void * user_data = NULL) - { - return detail::errHandler( - ::clSetEventCallback( - object_, - type, - pfn_notify, - user_data), - __SET_EVENT_CALLBACK_ERR); - } -#endif - - /*! \brief Blocks the calling thread until every event specified is complete. - * - * Wraps clWaitForEvents(). - */ - static cl_int - waitForEvents(const VECTOR_CLASS& events) - { - return detail::errHandler( - ::clWaitForEvents( - (cl_uint) events.size(), (cl_event*)&events.front()), - __WAIT_FOR_EVENTS_ERR); - } -}; - -#if defined(CL_VERSION_1_1) -/*! \brief Class interface for user events (a subset of cl_event's). - * - * See Event for details about copy semantics, etc. - */ -class UserEvent : public Event -{ -public: - /*! \brief Constructs a user event on a given context. - * - * Wraps clCreateUserEvent(). - */ - UserEvent( - const Context& context, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateUserEvent( - context(), - &error); - - detail::errHandler(error, __CREATE_USER_EVENT_ERR); - if (err != NULL) { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. - UserEvent() : Event() { } - - //! \brief Copy constructor - performs shallow copy. - UserEvent(const UserEvent& event) : Event(event) { } - - //! \brief Assignment Operator - performs shallow copy. - UserEvent& operator = (const UserEvent& rhs) - { - if (this != &rhs) { - Event::operator=(rhs); - } - return *this; - } - - /*! \brief Sets the execution status of a user event object. - * - * Wraps clSetUserEventStatus(). - */ - cl_int setStatus(cl_int status) - { - return detail::errHandler( - ::clSetUserEventStatus(object_,status), - __SET_USER_EVENT_STATUS_ERR); - } -}; -#endif - -/*! \brief Blocks the calling thread until every event specified is complete. - * - * Wraps clWaitForEvents(). - */ -inline static cl_int -WaitForEvents(const VECTOR_CLASS& events) -{ - return detail::errHandler( - ::clWaitForEvents( - (cl_uint) events.size(), (cl_event*)&events.front()), - __WAIT_FOR_EVENTS_ERR); -} - -/*! \brief Class interface for cl_mem. - * - * \note Copies of these objects are shallow, meaning that the copy will refer - * to the same underlying cl_mem as the original. For details, see - * clRetainMemObject() and clReleaseMemObject(). - * - * \see cl_mem - */ -class Memory : public detail::Wrapper -{ -public: - - /*! \brief Destructor. - * - * This calls clReleaseMemObject() on the value held by this instance. - */ - ~Memory() {} - - //! \brief Default constructor - initializes to NULL. - Memory() : detail::Wrapper() { } - - /*! \brief Copy constructor - performs shallow copy. - * - * This calls clRetainMemObject() on the parameter's cl_mem. - */ - Memory(const Memory& memory) : detail::Wrapper(memory) { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * This effectively transfers ownership of a refcount on the cl_mem - * into the new Memory object. - */ - __CL_EXPLICIT_CONSTRUCTORS Memory(const cl_mem& memory) : detail::Wrapper(memory) { } - - /*! \brief Assignment operator from Memory. - * - * This calls clRetainMemObject() on the parameter and clReleaseMemObject() - * on the previous value held by this instance. - */ - Memory& operator = (const Memory& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - /*! \brief Assignment operator from cl_mem - takes ownership. - * - * This effectively transfers ownership of a refcount on the rhs and calls - * clReleaseMemObject() on the value previously held by this instance. - */ - Memory& operator = (const cl_mem& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - //! \brief Wrapper for clGetMemObjectInfo(). - template - cl_int getInfo(cl_mem_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetMemObjectInfo, object_, name, param), - __GET_MEM_OBJECT_INFO_ERR); - } - - //! \brief Wrapper for clGetMemObjectInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_mem_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - -#if defined(CL_VERSION_1_1) - /*! \brief Registers a callback function to be called when the memory object - * is no longer needed. - * - * Wraps clSetMemObjectDestructorCallback(). - * - * Repeated calls to this function, for a given cl_mem value, will append - * to the list of functions called (in reverse order) when memory object's - * resources are freed and the memory object is deleted. - * - * \note - * The registered callbacks are associated with the underlying cl_mem - * value - not the Memory class instance. - */ - cl_int setDestructorCallback( - void (CL_CALLBACK * pfn_notify)(cl_mem, void *), - void * user_data = NULL) - { - return detail::errHandler( - ::clSetMemObjectDestructorCallback( - object_, - pfn_notify, - user_data), - __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR); - } -#endif - -}; - -// Pre-declare copy functions -class Buffer; -template< typename IteratorType > -cl_int copy( IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ); -template< typename IteratorType > -cl_int copy( const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ); -template< typename IteratorType > -cl_int copy( const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ); -template< typename IteratorType > -cl_int copy( const CommandQueue &queue, const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ); - - -/*! \brief Class interface for Buffer Memory Objects. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Buffer : public Memory -{ -public: - - /*! \brief Constructs a Buffer in a specified context. - * - * Wraps clCreateBuffer(). - * - * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was - * specified. Note alignment & exclusivity requirements. - */ - Buffer( - const Context& context, - cl_mem_flags flags, - ::size_t size, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - /*! \brief Constructs a Buffer in the default context. - * - * Wraps clCreateBuffer(). - * - * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was - * specified. Note alignment & exclusivity requirements. - * - * \see Context::getDefault() - */ - Buffer( - cl_mem_flags flags, - ::size_t size, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - - Context context = Context::getDefault(err); - - object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - /*! - * \brief Construct a Buffer from a host container via iterators. - * IteratorType must be random access. - * If useHostPtr is specified iterators must represent contiguous data. - */ - template< typename IteratorType > - Buffer( - IteratorType startIterator, - IteratorType endIterator, - bool readOnly, - bool useHostPtr = false, - cl_int* err = NULL) - { - typedef typename std::iterator_traits::value_type DataType; - cl_int error; - - cl_mem_flags flags = 0; - if( readOnly ) { - flags |= CL_MEM_READ_ONLY; - } - else { - flags |= CL_MEM_READ_WRITE; - } - if( useHostPtr ) { - flags |= CL_MEM_USE_HOST_PTR; - } - - ::size_t size = sizeof(DataType)*(endIterator - startIterator); - - Context context = Context::getDefault(err); - - if( useHostPtr ) { - object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); - } else { - object_ = ::clCreateBuffer(context(), flags, size, 0, &error); - } - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - - if( !useHostPtr ) { - error = cl::copy(startIterator, endIterator, *this); - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - } - - /*! - * \brief Construct a Buffer from a host container via iterators using a specified context. - * IteratorType must be random access. - * If useHostPtr is specified iterators must represent contiguous data. - */ - template< typename IteratorType > - Buffer(const Context &context, IteratorType startIterator, IteratorType endIterator, - bool readOnly, bool useHostPtr = false, cl_int* err = NULL); - - //! \brief Default constructor - initializes to NULL. - Buffer() : Memory() { } - - /*! \brief Copy constructor - performs shallow copy. - * - * See Memory for further details. - */ - Buffer(const Buffer& buffer) : Memory(buffer) { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS Buffer(const cl_mem& buffer) : Memory(buffer) { } - - /*! \brief Assignment from Buffer - performs shallow copy. - * - * See Memory for further details. - */ - Buffer& operator = (const Buffer& rhs) - { - if (this != &rhs) { - Memory::operator=(rhs); - } - return *this; - } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Buffer& operator = (const cl_mem& rhs) - { - Memory::operator=(rhs); - return *this; - } - -#if defined(CL_VERSION_1_1) - /*! \brief Creates a new buffer object from this. - * - * Wraps clCreateSubBuffer(). - */ - Buffer createSubBuffer( - cl_mem_flags flags, - cl_buffer_create_type buffer_create_type, - const void * buffer_create_info, - cl_int * err = NULL) - { - Buffer result; - cl_int error; - result.object_ = ::clCreateSubBuffer( - object_, - flags, - buffer_create_type, - buffer_create_info, - &error); - - detail::errHandler(error, __CREATE_SUBBUFFER_ERR); - if (err != NULL) { - *err = error; - } - - return result; - } -#endif -}; - -#if defined (USE_DX_INTEROP) -/*! \brief Class interface for creating OpenCL buffers from ID3D10Buffer's. - * - * This is provided to facilitate interoperability with Direct3D. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class BufferD3D10 : public Buffer -{ -public: - typedef CL_API_ENTRY cl_mem (CL_API_CALL *PFN_clCreateFromD3D10BufferKHR)( - cl_context context, cl_mem_flags flags, ID3D10Buffer* buffer, - cl_int* errcode_ret); - - /*! \brief Constructs a BufferD3D10, in a specified context, from a - * given ID3D10Buffer. - * - * Wraps clCreateFromD3D10BufferKHR(). - */ - BufferD3D10( - const Context& context, - cl_mem_flags flags, - ID3D10Buffer* bufobj, - cl_int * err = NULL) - { - static PFN_clCreateFromD3D10BufferKHR pfn_clCreateFromD3D10BufferKHR = NULL; - -#if defined(CL_VERSION_1_2) - vector props = context.getInfo(); - cl_platform platform = -1; - for( int i = 0; i < props.size(); ++i ) { - if( props[i] == CL_CONTEXT_PLATFORM ) { - platform = props[i+1]; - } - } - __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clCreateFromD3D10BufferKHR); -#endif -#if defined(CL_VERSION_1_1) - __INIT_CL_EXT_FCN_PTR(clCreateFromD3D10BufferKHR); -#endif - - cl_int error; - object_ = pfn_clCreateFromD3D10BufferKHR( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. - BufferD3D10() : Buffer() { } - - /*! \brief Copy constructor - performs shallow copy. - * - * See Memory for further details. - */ - BufferD3D10(const BufferD3D10& buffer) : Buffer(buffer) { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS BufferD3D10(const cl_mem& buffer) : Buffer(buffer) { } - - /*! \brief Assignment from BufferD3D10 - performs shallow copy. - * - * See Memory for further details. - */ - BufferD3D10& operator = (const BufferD3D10& rhs) - { - if (this != &rhs) { - Buffer::operator=(rhs); - } - return *this; - } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - BufferD3D10& operator = (const cl_mem& rhs) - { - Buffer::operator=(rhs); - return *this; - } -}; -#endif - -/*! \brief Class interface for GL Buffer Memory Objects. - * - * This is provided to facilitate interoperability with OpenGL. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class BufferGL : public Buffer -{ -public: - /*! \brief Constructs a BufferGL in a specified context, from a given - * GL buffer. - * - * Wraps clCreateFromGLBuffer(). - */ - BufferGL( - const Context& context, - cl_mem_flags flags, - GLuint bufobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLBuffer( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. - BufferGL() : Buffer() { } - - /*! \brief Copy constructor - performs shallow copy. - * - * See Memory for further details. - */ - BufferGL(const BufferGL& buffer) : Buffer(buffer) { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS BufferGL(const cl_mem& buffer) : Buffer(buffer) { } - - /*! \brief Assignment from BufferGL - performs shallow copy. - * - * See Memory for further details. - */ - BufferGL& operator = (const BufferGL& rhs) - { - if (this != &rhs) { - Buffer::operator=(rhs); - } - return *this; - } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - BufferGL& operator = (const cl_mem& rhs) - { - Buffer::operator=(rhs); - return *this; - } - - //! \brief Wrapper for clGetGLObjectInfo(). - cl_int getObjectInfo( - cl_gl_object_type *type, - GLuint * gl_object_name) - { - return detail::errHandler( - ::clGetGLObjectInfo(object_,type,gl_object_name), - __GET_GL_OBJECT_INFO_ERR); - } -}; - -/*! \brief Class interface for GL Render Buffer Memory Objects. - * - * This is provided to facilitate interoperability with OpenGL. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class BufferRenderGL : public Buffer -{ -public: - /*! \brief Constructs a BufferRenderGL in a specified context, from a given - * GL Renderbuffer. - * - * Wraps clCreateFromGLRenderbuffer(). - */ - BufferRenderGL( - const Context& context, - cl_mem_flags flags, - GLuint bufobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLRenderbuffer( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_RENDER_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. - BufferRenderGL() : Buffer() { } - - /*! \brief Copy constructor - performs shallow copy. - * - * See Memory for further details. - */ - BufferRenderGL(const BufferGL& buffer) : Buffer(buffer) { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS BufferRenderGL(const cl_mem& buffer) : Buffer(buffer) { } - - /*! \brief Assignment from BufferGL - performs shallow copy. - * - * See Memory for further details. - */ - BufferRenderGL& operator = (const BufferRenderGL& rhs) - { - if (this != &rhs) { - Buffer::operator=(rhs); - } - return *this; - } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - BufferRenderGL& operator = (const cl_mem& rhs) - { - Buffer::operator=(rhs); - return *this; - } - - //! \brief Wrapper for clGetGLObjectInfo(). - cl_int getObjectInfo( - cl_gl_object_type *type, - GLuint * gl_object_name) - { - return detail::errHandler( - ::clGetGLObjectInfo(object_,type,gl_object_name), - __GET_GL_OBJECT_INFO_ERR); - } -}; - -/*! \brief C++ base class for Image Memory objects. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Image : public Memory -{ -protected: - //! \brief Default constructor - initializes to NULL. - Image() : Memory() { } - - /*! \brief Copy constructor - performs shallow copy. - * - * See Memory for further details. - */ - Image(const Image& image) : Memory(image) { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS Image(const cl_mem& image) : Memory(image) { } - - /*! \brief Assignment from Image - performs shallow copy. - * - * See Memory for further details. - */ - Image& operator = (const Image& rhs) - { - if (this != &rhs) { - Memory::operator=(rhs); - } - return *this; - } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image& operator = (const cl_mem& rhs) - { - Memory::operator=(rhs); - return *this; - } - -public: - //! \brief Wrapper for clGetImageInfo(). - template - cl_int getImageInfo(cl_image_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetImageInfo, object_, name, param), - __GET_IMAGE_INFO_ERR); - } - - //! \brief Wrapper for clGetImageInfo() that returns by value. - template typename - detail::param_traits::param_type - getImageInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_image_info, name>::param_type param; - cl_int result = getImageInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } -}; - -#if defined(CL_VERSION_1_2) -/*! \brief Class interface for 1D Image Memory objects. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Image1D : public Image -{ -public: - /*! \brief Constructs a 1D Image in a specified context. - * - * Wraps clCreateImage(). - */ - Image1D( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - ::size_t width, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE1D, - width, - 0, 0, 0, 0, 0, 0, 0, 0 - }; - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - host_ptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. - Image1D() { } - - /*! \brief Copy constructor - performs shallow copy. - * - * See Memory for further details. - */ - Image1D(const Image1D& image1D) : Image(image1D) { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS Image1D(const cl_mem& image1D) : Image(image1D) { } - - /*! \brief Assignment from Image1D - performs shallow copy. - * - * See Memory for further details. - */ - Image1D& operator = (const Image1D& rhs) - { - if (this != &rhs) { - Image::operator=(rhs); - } - return *this; - } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image1D& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } -}; - -/*! \class Image1DBuffer - * \brief Image interface for 1D buffer images. - */ -class Image1DBuffer : public Image -{ -public: - Image1DBuffer( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - ::size_t width, - const Buffer &buffer, - cl_int* err = NULL) - { - cl_int error; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE1D_BUFFER, - width, - 0, 0, 0, 0, 0, 0, 0, - buffer() - }; - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - NULL, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - } - - Image1DBuffer() { } - - Image1DBuffer(const Image1DBuffer& image1D) : Image(image1D) { } - - __CL_EXPLICIT_CONSTRUCTORS Image1DBuffer(const cl_mem& image1D) : Image(image1D) { } - - Image1DBuffer& operator = (const Image1DBuffer& rhs) - { - if (this != &rhs) { - Image::operator=(rhs); - } - return *this; - } - - Image1DBuffer& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } -}; - -/*! \class Image1DArray - * \brief Image interface for arrays of 1D images. - */ -class Image1DArray : public Image -{ -public: - Image1DArray( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - ::size_t arraySize, - ::size_t width, - ::size_t rowPitch, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE1D_ARRAY, - width, - 0, 0, // height, depth (unused) - arraySize, - rowPitch, - 0, 0, 0, 0 - }; - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - host_ptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - } - - Image1DArray() { } - - Image1DArray(const Image1DArray& imageArray) : Image(imageArray) { } - - __CL_EXPLICIT_CONSTRUCTORS Image1DArray(const cl_mem& imageArray) : Image(imageArray) { } - - Image1DArray& operator = (const Image1DArray& rhs) - { - if (this != &rhs) { - Image::operator=(rhs); - } - return *this; - } - - Image1DArray& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } -}; -#endif // #if defined(CL_VERSION_1_2) - - -/*! \brief Class interface for 2D Image Memory objects. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Image2D : public Image -{ -public: - /*! \brief Constructs a 1D Image in a specified context. - * - * Wraps clCreateImage(). - */ - Image2D( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - ::size_t width, - ::size_t height, - ::size_t row_pitch = 0, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - bool useCreateImage; - -#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - // Run-time decision based on the actual platform - { - cl_uint version = detail::getContextPlatformVersion(context()); - useCreateImage = (version >= 0x10002); // OpenCL 1.2 or above - } -#elif defined(CL_VERSION_1_2) - useCreateImage = true; -#else - useCreateImage = false; -#endif - -#if defined(CL_VERSION_1_2) - if (useCreateImage) - { - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE2D, - width, - height, - 0, 0, // depth, array size (unused) - row_pitch, - 0, 0, 0, 0 - }; - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - host_ptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // #if defined(CL_VERSION_1_2) -#if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - if (!useCreateImage) - { - object_ = ::clCreateImage2D( - context(), flags,&format, width, height, row_pitch, host_ptr, &error); - - detail::errHandler(error, __CREATE_IMAGE2D_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // #if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - } - - //! \brief Default constructor - initializes to NULL. - Image2D() { } - - /*! \brief Copy constructor - performs shallow copy. - * - * See Memory for further details. - */ - Image2D(const Image2D& image2D) : Image(image2D) { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS Image2D(const cl_mem& image2D) : Image(image2D) { } - - /*! \brief Assignment from Image2D - performs shallow copy. - * - * See Memory for further details. - */ - Image2D& operator = (const Image2D& rhs) - { - if (this != &rhs) { - Image::operator=(rhs); - } - return *this; - } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image2D& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } -}; - - -#if !defined(CL_VERSION_1_2) -/*! \brief Class interface for GL 2D Image Memory objects. - * - * This is provided to facilitate interoperability with OpenGL. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - * \note Deprecated for OpenCL 1.2. Please use ImageGL instead. - */ -class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED Image2DGL CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED : public Image2D -{ -public: - /*! \brief Constructs an Image2DGL in a specified context, from a given - * GL Texture. - * - * Wraps clCreateFromGLTexture2D(). - */ - Image2DGL( - const Context& context, - cl_mem_flags flags, - GLenum target, - GLint miplevel, - GLuint texobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLTexture2D( - context(), - flags, - target, - miplevel, - texobj, - &error); - - detail::errHandler(error, __CREATE_GL_TEXTURE_2D_ERR); - if (err != NULL) { - *err = error; - } - - } - - //! \brief Default constructor - initializes to NULL. - Image2DGL() : Image2D() { } - - /*! \brief Copy constructor - performs shallow copy. - * - * See Memory for further details. - */ - Image2DGL(const Image2DGL& image) : Image2D(image) { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS Image2DGL(const cl_mem& image) : Image2D(image) { } - - /*! \brief Assignment from Image2DGL - performs shallow copy. - * - * See Memory for further details. - */ - Image2DGL& operator = (const Image2DGL& rhs) - { - if (this != &rhs) { - Image2D::operator=(rhs); - } - return *this; - } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image2DGL& operator = (const cl_mem& rhs) - { - Image2D::operator=(rhs); - return *this; - } -}; -#endif // #if !defined(CL_VERSION_1_2) - -#if defined(CL_VERSION_1_2) -/*! \class Image2DArray - * \brief Image interface for arrays of 2D images. - */ -class Image2DArray : public Image -{ -public: - Image2DArray( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - ::size_t arraySize, - ::size_t width, - ::size_t height, - ::size_t rowPitch, - ::size_t slicePitch, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE2D_ARRAY, - width, - height, - 0, // depth (unused) - arraySize, - rowPitch, - slicePitch, - 0, 0, 0 - }; - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - host_ptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - } - - Image2DArray() { } - - Image2DArray(const Image2DArray& imageArray) : Image(imageArray) { } - - __CL_EXPLICIT_CONSTRUCTORS Image2DArray(const cl_mem& imageArray) : Image(imageArray) { } - - Image2DArray& operator = (const Image2DArray& rhs) - { - if (this != &rhs) { - Image::operator=(rhs); - } - return *this; - } - - Image2DArray& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } -}; -#endif // #if defined(CL_VERSION_1_2) - -/*! \brief Class interface for 3D Image Memory objects. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Image3D : public Image -{ -public: - /*! \brief Constructs a 3D Image in a specified context. - * - * Wraps clCreateImage(). - */ - Image3D( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - ::size_t width, - ::size_t height, - ::size_t depth, - ::size_t row_pitch = 0, - ::size_t slice_pitch = 0, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - bool useCreateImage; - -#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - // Run-time decision based on the actual platform - { - cl_uint version = detail::getContextPlatformVersion(context()); - useCreateImage = (version >= 0x10002); // OpenCL 1.2 or above - } -#elif defined(CL_VERSION_1_2) - useCreateImage = true; -#else - useCreateImage = false; -#endif - -#if defined(CL_VERSION_1_2) - if (useCreateImage) - { - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE3D, - width, - height, - depth, - 0, // array size (unused) - row_pitch, - slice_pitch, - 0, 0, 0 - }; - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - host_ptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // #if defined(CL_VERSION_1_2) -#if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - if (!useCreateImage) - { - object_ = ::clCreateImage3D( - context(), flags, &format, width, height, depth, row_pitch, - slice_pitch, host_ptr, &error); - - detail::errHandler(error, __CREATE_IMAGE3D_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // #if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - } - - //! \brief Default constructor - initializes to NULL. - Image3D() { } - - /*! \brief Copy constructor - performs shallow copy. - * - * See Memory for further details. - */ - Image3D(const Image3D& image3D) : Image(image3D) { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS Image3D(const cl_mem& image3D) : Image(image3D) { } - - /*! \brief Assignment from Image3D - performs shallow copy. - * - * See Memory for further details. - */ - Image3D& operator = (const Image3D& rhs) - { - if (this != &rhs) { - Image::operator=(rhs); - } - return *this; - } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image3D& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } -}; - -#if !defined(CL_VERSION_1_2) -/*! \brief Class interface for GL 3D Image Memory objects. - * - * This is provided to facilitate interoperability with OpenGL. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Image3DGL : public Image3D -{ -public: - /*! \brief Constructs an Image3DGL in a specified context, from a given - * GL Texture. - * - * Wraps clCreateFromGLTexture3D(). - */ - Image3DGL( - const Context& context, - cl_mem_flags flags, - GLenum target, - GLint miplevel, - GLuint texobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLTexture3D( - context(), - flags, - target, - miplevel, - texobj, - &error); - - detail::errHandler(error, __CREATE_GL_TEXTURE_3D_ERR); - if (err != NULL) { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. - Image3DGL() : Image3D() { } - - /*! \brief Copy constructor - performs shallow copy. - * - * See Memory for further details. - */ - Image3DGL(const Image3DGL& image) : Image3D(image) { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS Image3DGL(const cl_mem& image) : Image3D(image) { } - - /*! \brief Assignment from Image3DGL - performs shallow copy. - * - * See Memory for further details. - */ - Image3DGL& operator = (const Image3DGL& rhs) - { - if (this != &rhs) { - Image3D::operator=(rhs); - } - return *this; - } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image3DGL& operator = (const cl_mem& rhs) - { - Image3D::operator=(rhs); - return *this; - } -}; -#endif // #if !defined(CL_VERSION_1_2) - -#if defined(CL_VERSION_1_2) -/*! \class ImageGL - * \brief general image interface for GL interop. - * We abstract the 2D and 3D GL images into a single instance here - * that wraps all GL sourced images on the grounds that setup information - * was performed by OpenCL anyway. - */ -class ImageGL : public Image -{ -public: - ImageGL( - const Context& context, - cl_mem_flags flags, - GLenum target, - GLint miplevel, - GLuint texobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLTexture( - context(), - flags, - target, - miplevel, - texobj, - &error); - - detail::errHandler(error, __CREATE_GL_TEXTURE_ERR); - if (err != NULL) { - *err = error; - } - } - - ImageGL() : Image() { } - - ImageGL(const ImageGL& image) : Image(image) { } - - __CL_EXPLICIT_CONSTRUCTORS ImageGL(const cl_mem& image) : Image(image) { } - - ImageGL& operator = (const ImageGL& rhs) - { - if (this != &rhs) { - Image::operator=(rhs); - } - return *this; - } - - ImageGL& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } -}; -#endif // #if defined(CL_VERSION_1_2) - -/*! \brief Class interface for cl_sampler. - * - * \note Copies of these objects are shallow, meaning that the copy will refer - * to the same underlying cl_sampler as the original. For details, see - * clRetainSampler() and clReleaseSampler(). - * - * \see cl_sampler - */ -class Sampler : public detail::Wrapper -{ -public: - /*! \brief Destructor. - * - * This calls clReleaseSampler() on the value held by this instance. - */ - ~Sampler() { } - - //! \brief Default constructor - initializes to NULL. - Sampler() { } - - /*! \brief Constructs a Sampler in a specified context. - * - * Wraps clCreateSampler(). - */ - Sampler( - const Context& context, - cl_bool normalized_coords, - cl_addressing_mode addressing_mode, - cl_filter_mode filter_mode, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateSampler( - context(), - normalized_coords, - addressing_mode, - filter_mode, - &error); - - detail::errHandler(error, __CREATE_SAMPLER_ERR); - if (err != NULL) { - *err = error; - } - } - - /*! \brief Copy constructor - performs shallow copy. - * - * This calls clRetainSampler() on the parameter's cl_sampler. - */ - Sampler(const Sampler& sampler) : detail::Wrapper(sampler) { } - - /*! \brief Constructor from cl_sampler - takes ownership. - * - * This effectively transfers ownership of a refcount on the cl_sampler - * into the new Sampler object. - */ - Sampler(const cl_sampler& sampler) : detail::Wrapper(sampler) { } - - /*! \brief Assignment operator from Sampler. - * - * This calls clRetainSampler() on the parameter and clReleaseSampler() - * on the previous value held by this instance. - */ - Sampler& operator = (const Sampler& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - /*! \brief Assignment operator from cl_sampler - takes ownership. - * - * This effectively transfers ownership of a refcount on the rhs and calls - * clReleaseSampler() on the value previously held by this instance. - */ - Sampler& operator = (const cl_sampler& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - //! \brief Wrapper for clGetSamplerInfo(). - template - cl_int getInfo(cl_sampler_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetSamplerInfo, object_, name, param), - __GET_SAMPLER_INFO_ERR); - } - - //! \brief Wrapper for clGetSamplerInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_sampler_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } -}; - -class Program; -class CommandQueue; -class Kernel; - -//! \brief Class interface for specifying NDRange values. -class NDRange -{ -private: - size_t<3> sizes_; - cl_uint dimensions_; - -public: - //! \brief Default constructor - resulting range has zero dimensions. - NDRange() - : dimensions_(0) - { } - - //! \brief Constructs one-dimensional range. - NDRange(::size_t size0) - : dimensions_(1) - { - sizes_[0] = size0; - } - - //! \brief Constructs two-dimensional range. - NDRange(::size_t size0, ::size_t size1) - : dimensions_(2) - { - sizes_[0] = size0; - sizes_[1] = size1; - } - - //! \brief Constructs three-dimensional range. - NDRange(::size_t size0, ::size_t size1, ::size_t size2) - : dimensions_(3) - { - sizes_[0] = size0; - sizes_[1] = size1; - sizes_[2] = size2; - } - - /*! \brief Conversion operator to const ::size_t *. - * - * \returns a pointer to the size of the first dimension. - */ - operator const ::size_t*() const { - return (const ::size_t*) sizes_; - } - - //! \brief Queries the number of dimensions in the range. - ::size_t dimensions() const { return dimensions_; } -}; - -//! \brief A zero-dimensional range. -static const NDRange NullRange; - -//! \brief Local address wrapper for use with Kernel::setArg -struct LocalSpaceArg -{ - ::size_t size_; -}; - -namespace detail { - -template -struct KernelArgumentHandler -{ - static ::size_t size(const T&) { return sizeof(T); } - static T* ptr(T& value) { return &value; } -}; - -template <> -struct KernelArgumentHandler -{ - static ::size_t size(const LocalSpaceArg& value) { return value.size_; } - static void* ptr(LocalSpaceArg&) { return NULL; } -}; - -} -//! \endcond - -/*! __local - * \brief Helper function for generating LocalSpaceArg objects. - * Deprecated. Replaced with Local. - */ -inline CL_EXT_PREFIX__VERSION_1_1_DEPRECATED LocalSpaceArg -__local(::size_t size) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; -inline LocalSpaceArg -__local(::size_t size) -{ - LocalSpaceArg ret = { size }; - return ret; -} - -/*! Local - * \brief Helper function for generating LocalSpaceArg objects. - */ -inline LocalSpaceArg -Local(::size_t size) -{ - LocalSpaceArg ret = { size }; - return ret; -} - -//class KernelFunctor; - -/*! \brief Class interface for cl_kernel. - * - * \note Copies of these objects are shallow, meaning that the copy will refer - * to the same underlying cl_kernel as the original. For details, see - * clRetainKernel() and clReleaseKernel(). - * - * \see cl_kernel - */ -class Kernel : public detail::Wrapper -{ -public: - inline Kernel(const Program& program, const char* name, cl_int* err = NULL); - - /*! \brief Destructor. - * - * This calls clReleaseKernel() on the value held by this instance. - */ - ~Kernel() { } - - //! \brief Default constructor - initializes to NULL. - Kernel() { } - - /*! \brief Copy constructor - performs shallow copy. - * - * This calls clRetainKernel() on the parameter's cl_kernel. - */ - Kernel(const Kernel& kernel) : detail::Wrapper(kernel) { } - - /*! \brief Constructor from cl_kernel - takes ownership. - * - * This effectively transfers ownership of a refcount on the cl_kernel - * into the new Kernel object. - */ - __CL_EXPLICIT_CONSTRUCTORS Kernel(const cl_kernel& kernel) : detail::Wrapper(kernel) { } - - /*! \brief Assignment operator from Kernel. - * - * This calls clRetainKernel() on the parameter and clReleaseKernel() - * on the previous value held by this instance. - */ - Kernel& operator = (const Kernel& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - /*! \brief Assignment operator from cl_kernel - takes ownership. - * - * This effectively transfers ownership of a refcount on the rhs and calls - * clReleaseKernel() on the value previously held by this instance. - */ - Kernel& operator = (const cl_kernel& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - template - cl_int getInfo(cl_kernel_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetKernelInfo, object_, name, param), - __GET_KERNEL_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_kernel_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - -#if defined(CL_VERSION_1_2) - template - cl_int getArgInfo(cl_uint argIndex, cl_kernel_arg_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetKernelArgInfo, object_, argIndex, name, param), - __GET_KERNEL_ARG_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getArgInfo(cl_uint argIndex, cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_kernel_arg_info, name>::param_type param; - cl_int result = getArgInfo(argIndex, name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } -#endif // #if defined(CL_VERSION_1_2) - - template - cl_int getWorkGroupInfo( - const Device& device, cl_kernel_work_group_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetKernelWorkGroupInfo, object_, device(), name, param), - __GET_KERNEL_WORK_GROUP_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getWorkGroupInfo(const Device& device, cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_kernel_work_group_info, name>::param_type param; - cl_int result = getWorkGroupInfo(device, name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - template - cl_int setArg(cl_uint index, T value) - { - return detail::errHandler( - ::clSetKernelArg( - object_, - index, - detail::KernelArgumentHandler::size(value), - detail::KernelArgumentHandler::ptr(value)), - __SET_KERNEL_ARGS_ERR); - } - - cl_int setArg(cl_uint index, ::size_t size, void* argPtr) - { - return detail::errHandler( - ::clSetKernelArg(object_, index, size, argPtr), - __SET_KERNEL_ARGS_ERR); - } -}; - -/*! \class Program - * \brief Program interface that implements cl_program. - */ -class Program : public detail::Wrapper -{ -public: - typedef VECTOR_CLASS > Binaries; - typedef VECTOR_CLASS > Sources; - - Program( - const STRING_CLASS& source, - bool build = false, - cl_int* err = NULL) - { - cl_int error; - - const char * strings = source.c_str(); - const ::size_t length = source.size(); - - Context context = Context::getDefault(err); - - object_ = ::clCreateProgramWithSource( - context(), (cl_uint)1, &strings, &length, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); - - if (error == CL_SUCCESS && build) { - - error = ::clBuildProgram( - object_, - 0, - NULL, - "", - NULL, - NULL); - - detail::errHandler(error, __BUILD_PROGRAM_ERR); - } - - if (err != NULL) { - *err = error; - } - } - - Program( - const Context& context, - const STRING_CLASS& source, - bool build = false, - cl_int* err = NULL) - { - cl_int error; - - const char * strings = source.c_str(); - const ::size_t length = source.size(); - - object_ = ::clCreateProgramWithSource( - context(), (cl_uint)1, &strings, &length, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); - - if (error == CL_SUCCESS && build) { - - error = ::clBuildProgram( - object_, - 0, - NULL, - "", - NULL, - NULL); - - detail::errHandler(error, __BUILD_PROGRAM_ERR); - } - - if (err != NULL) { - *err = error; - } - } - - Program( - const Context& context, - const Sources& sources, - cl_int* err = NULL) - { - cl_int error; - - const ::size_t n = (::size_t)sources.size(); - ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t)); - const char** strings = (const char**) alloca(n * sizeof(const char*)); - - for (::size_t i = 0; i < n; ++i) { - strings[i] = sources[(int)i].first; - lengths[i] = sources[(int)i].second; - } - - object_ = ::clCreateProgramWithSource( - context(), (cl_uint)n, strings, lengths, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); - if (err != NULL) { - *err = error; - } - } - - /** - * Construct a program object from a list of devices and a per-device list of binaries. - * \param context A valid OpenCL context in which to construct the program. - * \param devices A vector of OpenCL device objects for which the program will be created. - * \param binaries A vector of pairs of a pointer to a binary object and its length. - * \param binaryStatus An optional vector that on completion will be resized to - * match the size of binaries and filled with values to specify if each binary - * was successfully loaded. - * Set to CL_SUCCESS if the binary was successfully loaded. - * Set to CL_INVALID_VALUE if the length is 0 or the binary pointer is NULL. - * Set to CL_INVALID_BINARY if the binary provided is not valid for the matching device. - * \param err if non-NULL will be set to CL_SUCCESS on successful operation or one of the following errors: - * CL_INVALID_CONTEXT if context is not a valid context. - * CL_INVALID_VALUE if the length of devices is zero; or if the length of binaries does not match the length of devices; - * or if any entry in binaries is NULL or has length 0. - * CL_INVALID_DEVICE if OpenCL devices listed in devices are not in the list of devices associated with context. - * CL_INVALID_BINARY if an invalid program binary was encountered for any device. binaryStatus will return specific status for each device. - * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the OpenCL implementation on the host. - */ - Program( - const Context& context, - const VECTOR_CLASS& devices, - const Binaries& binaries, - VECTOR_CLASS* binaryStatus = NULL, - cl_int* err = NULL) - { - cl_int error; - - const ::size_t numDevices = devices.size(); - - // Catch size mismatch early and return - if(binaries.size() != numDevices) { - error = CL_INVALID_VALUE; - detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); - if (err != NULL) { - *err = error; - } - return; - } - - ::size_t* lengths = (::size_t*) alloca(numDevices * sizeof(::size_t)); - const unsigned char** images = (const unsigned char**) alloca(numDevices * sizeof(const unsigned char**)); - - for (::size_t i = 0; i < numDevices; ++i) { - images[i] = (const unsigned char*)binaries[i].first; - lengths[i] = binaries[(int)i].second; - } - - cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); - for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { - deviceIDs[deviceIndex] = (devices[deviceIndex])(); - } - - if(binaryStatus) { - binaryStatus->resize(numDevices); - } - - object_ = ::clCreateProgramWithBinary( - context(), (cl_uint) devices.size(), - deviceIDs, - lengths, images, binaryStatus != NULL - ? &binaryStatus->front() - : NULL, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); - if (err != NULL) { - *err = error; - } - } - - -#if defined(CL_VERSION_1_2) - /** - * Create program using builtin kernels. - * \param kernelNames Semi-colon separated list of builtin kernel names - */ - Program( - const Context& context, - const VECTOR_CLASS& devices, - const STRING_CLASS& kernelNames, - cl_int* err = NULL) - { - cl_int error; - - - ::size_t numDevices = devices.size(); - cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); - for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { - deviceIDs[deviceIndex] = (devices[deviceIndex])(); - } - - object_ = ::clCreateProgramWithBuiltInKernels( - context(), - (cl_uint) devices.size(), - deviceIDs, - kernelNames.c_str(), - &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // #if defined(CL_VERSION_1_2) - - Program() { } - - Program(const Program& program) : detail::Wrapper(program) { } - - __CL_EXPLICIT_CONSTRUCTORS Program(const cl_program& program) : detail::Wrapper(program) { } - - Program& operator = (const Program& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - Program& operator = (const cl_program& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - cl_int build( - const VECTOR_CLASS& devices, - const char* options = NULL, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, - void* data = NULL) const - { - ::size_t numDevices = devices.size(); - cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); - for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { - deviceIDs[deviceIndex] = (devices[deviceIndex])(); - } - - return detail::errHandler( - ::clBuildProgram( - object_, - (cl_uint) - devices.size(), - deviceIDs, - options, - notifyFptr, - data), - __BUILD_PROGRAM_ERR); - } - - cl_int build( - const char* options = NULL, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, - void* data = NULL) const - { - return detail::errHandler( - ::clBuildProgram( - object_, - 0, - NULL, - options, - notifyFptr, - data), - __BUILD_PROGRAM_ERR); - } - -#if defined(CL_VERSION_1_2) - cl_int compile( - const char* options = NULL, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, - void* data = NULL) const - { - return detail::errHandler( - ::clCompileProgram( - object_, - 0, - NULL, - options, - 0, - NULL, - NULL, - notifyFptr, - data), - __COMPILE_PROGRAM_ERR); - } -#endif - - template - cl_int getInfo(cl_program_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetProgramInfo, object_, name, param), - __GET_PROGRAM_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_program_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - template - cl_int getBuildInfo( - const Device& device, cl_program_build_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetProgramBuildInfo, object_, device(), name, param), - __GET_PROGRAM_BUILD_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getBuildInfo(const Device& device, cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_program_build_info, name>::param_type param; - cl_int result = getBuildInfo(device, name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int createKernels(VECTOR_CLASS* kernels) - { - cl_uint numKernels; - cl_int err = ::clCreateKernelsInProgram(object_, 0, NULL, &numKernels); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); - } - - Kernel* value = (Kernel*) alloca(numKernels * sizeof(Kernel)); - err = ::clCreateKernelsInProgram( - object_, numKernels, (cl_kernel*) value, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); - } - - kernels->assign(&value[0], &value[numKernels]); - return CL_SUCCESS; - } -}; - -#if defined(CL_VERSION_1_2) -inline Program linkProgram( - Program input1, - Program input2, - const char* options = NULL, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, - void* data = NULL, - cl_int* err = NULL) -{ - cl_int err_local = CL_SUCCESS; - - cl_program programs[2] = { input1(), input2() }; - - Context ctx = input1.getInfo(); - - cl_program prog = ::clLinkProgram( - ctx(), - 0, - NULL, - options, - 2, - programs, - notifyFptr, - data, - &err_local); - - detail::errHandler(err_local,__COMPILE_PROGRAM_ERR); - if (err != NULL) { - *err = err_local; - } - - return Program(prog); -} - -inline Program linkProgram( - VECTOR_CLASS inputPrograms, - const char* options = NULL, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, - void* data = NULL, - cl_int* err = NULL) -{ - cl_int err_local = CL_SUCCESS; - - cl_program * programs = (cl_program*) alloca(inputPrograms.size() * sizeof(cl_program)); - - if (programs != NULL) { - for (unsigned int i = 0; i < inputPrograms.size(); i++) { - programs[i] = inputPrograms[i](); - } - } - - cl_program prog = ::clLinkProgram( - Context::getDefault()(), - 0, - NULL, - options, - (cl_uint)inputPrograms.size(), - programs, - notifyFptr, - data, - &err_local); - - detail::errHandler(err_local,__COMPILE_PROGRAM_ERR); - if (err != NULL) { - *err = err_local; - } - - return Program(prog); -} -#endif - -template<> -inline VECTOR_CLASS cl::Program::getInfo(cl_int* err) const -{ - VECTOR_CLASS< ::size_t> sizes = getInfo(); - VECTOR_CLASS binaries; - for (VECTOR_CLASS< ::size_t>::iterator s = sizes.begin(); s != sizes.end(); ++s) - { - char *ptr = NULL; - if (*s != 0) - ptr = new char[*s]; - binaries.push_back(ptr); - } - - cl_int result = getInfo(CL_PROGRAM_BINARIES, &binaries); - if (err != NULL) { - *err = result; - } - return binaries; -} - -inline Kernel::Kernel(const Program& program, const char* name, cl_int* err) -{ - cl_int error; - - object_ = ::clCreateKernel(program(), name, &error); - detail::errHandler(error, __CREATE_KERNEL_ERR); - - if (err != NULL) { - *err = error; - } - -} - -/*! \class CommandQueue - * \brief CommandQueue interface for cl_command_queue. - */ -class CommandQueue : public detail::Wrapper -{ -private: - static volatile int default_initialized_; - static CommandQueue default_; - static volatile cl_int default_error_; -public: - CommandQueue( - cl_command_queue_properties properties, - cl_int* err = NULL) - { - cl_int error; - - Context context = Context::getDefault(&error); - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - - if (error != CL_SUCCESS) { - if (err != NULL) { - *err = error; - } - } - else { - Device device = context.getInfo()[0]; - - object_ = ::clCreateCommandQueue( - context(), device(), properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - if (err != NULL) { - *err = error; - } - } - } - /*! - * \brief Constructs a CommandQueue for an implementation defined device in the given context - */ - explicit CommandQueue( - const Context& context, - cl_command_queue_properties properties = 0, - cl_int* err = NULL) - { - cl_int error; - VECTOR_CLASS devices; - error = context.getInfo(CL_CONTEXT_DEVICES, &devices); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - - if (error != CL_SUCCESS) - { - if (err != NULL) { - *err = error; - } - return; - } - - object_ = ::clCreateCommandQueue(context(), devices[0](), properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - - if (err != NULL) { - *err = error; - } - - } - - CommandQueue( - const Context& context, - const Device& device, - cl_command_queue_properties properties = 0, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateCommandQueue( - context(), device(), properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - if (err != NULL) { - *err = error; - } - } - - static CommandQueue getDefault(cl_int * err = NULL) - { - int state = detail::compare_exchange( - &default_initialized_, - __DEFAULT_BEING_INITIALIZED, __DEFAULT_NOT_INITIALIZED); - - if (state & __DEFAULT_INITIALIZED) { - if (err != NULL) { - *err = default_error_; - } - return default_; - } - - if (state & __DEFAULT_BEING_INITIALIZED) { - // Assume writes will propagate eventually... - while(default_initialized_ != __DEFAULT_INITIALIZED) { - detail::fence(); - } - - if (err != NULL) { - *err = default_error_; - } - return default_; - } - - cl_int error; - - Context context = Context::getDefault(&error); - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - - if (error != CL_SUCCESS) { - if (err != NULL) { - *err = error; - } - } - else { - Device device = context.getInfo()[0]; - - default_ = CommandQueue(context, device, 0, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - if (err != NULL) { - *err = error; - } - } - - detail::fence(); - - default_error_ = error; - // Assume writes will propagate eventually... - default_initialized_ = __DEFAULT_INITIALIZED; - - detail::fence(); - - if (err != NULL) { - *err = default_error_; - } - return default_; - - } - - CommandQueue() { } - - CommandQueue(const CommandQueue& commandQueue) : detail::Wrapper(commandQueue) { } - - CommandQueue(const cl_command_queue& commandQueue) : detail::Wrapper(commandQueue) { } - - CommandQueue& operator = (const CommandQueue& rhs) - { - if (this != &rhs) { - detail::Wrapper::operator=(rhs); - } - return *this; - } - - CommandQueue& operator = (const cl_command_queue& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - template - cl_int getInfo(cl_command_queue_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetCommandQueueInfo, object_, name, param), - __GET_COMMAND_QUEUE_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_command_queue_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int enqueueReadBuffer( - const Buffer& buffer, - cl_bool blocking, - ::size_t offset, - ::size_t size, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueReadBuffer( - object_, buffer(), blocking, offset, size, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_READ_BUFFER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueWriteBuffer( - const Buffer& buffer, - cl_bool blocking, - ::size_t offset, - ::size_t size, - const void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueWriteBuffer( - object_, buffer(), blocking, offset, size, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_WRITE_BUFFER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueCopyBuffer( - const Buffer& src, - const Buffer& dst, - ::size_t src_offset, - ::size_t dst_offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueCopyBuffer( - object_, src(), dst(), src_offset, dst_offset, size, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQEUE_COPY_BUFFER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueReadBufferRect( - const Buffer& buffer, - cl_bool blocking, - const size_t<3>& buffer_offset, - const size_t<3>& host_offset, - const size_t<3>& region, - ::size_t buffer_row_pitch, - ::size_t buffer_slice_pitch, - ::size_t host_row_pitch, - ::size_t host_slice_pitch, - void *ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueReadBufferRect( - object_, - buffer(), - blocking, - (const ::size_t *)buffer_offset, - (const ::size_t *)host_offset, - (const ::size_t *)region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_READ_BUFFER_RECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueWriteBufferRect( - const Buffer& buffer, - cl_bool blocking, - const size_t<3>& buffer_offset, - const size_t<3>& host_offset, - const size_t<3>& region, - ::size_t buffer_row_pitch, - ::size_t buffer_slice_pitch, - ::size_t host_row_pitch, - ::size_t host_slice_pitch, - void *ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueWriteBufferRect( - object_, - buffer(), - blocking, - (const ::size_t *)buffer_offset, - (const ::size_t *)host_offset, - (const ::size_t *)region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_WRITE_BUFFER_RECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueCopyBufferRect( - const Buffer& src, - const Buffer& dst, - const size_t<3>& src_origin, - const size_t<3>& dst_origin, - const size_t<3>& region, - ::size_t src_row_pitch, - ::size_t src_slice_pitch, - ::size_t dst_row_pitch, - ::size_t dst_slice_pitch, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueCopyBufferRect( - object_, - src(), - dst(), - (const ::size_t *)src_origin, - (const ::size_t *)dst_origin, - (const ::size_t *)region, - src_row_pitch, - src_slice_pitch, - dst_row_pitch, - dst_slice_pitch, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQEUE_COPY_BUFFER_RECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - -#if defined(CL_VERSION_1_2) - /** - * Enqueue a command to fill a buffer object with a pattern - * of a given size. The pattern is specified a as vector. - * \tparam PatternType The datatype of the pattern field. - * The pattern type must be an accepted OpenCL data type. - */ - template - cl_int enqueueFillBuffer( - const Buffer& buffer, - PatternType pattern, - ::size_t offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueFillBuffer( - object_, - buffer(), - static_cast(&pattern), - sizeof(PatternType), - offset, - size, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_FILL_BUFFER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } -#endif // #if defined(CL_VERSION_1_2) - - cl_int enqueueReadImage( - const Image& image, - cl_bool blocking, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t row_pitch, - ::size_t slice_pitch, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueReadImage( - object_, image(), blocking, (const ::size_t *) origin, - (const ::size_t *) region, row_pitch, slice_pitch, ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_READ_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueWriteImage( - const Image& image, - cl_bool blocking, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t row_pitch, - ::size_t slice_pitch, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueWriteImage( - object_, image(), blocking, (const ::size_t *) origin, - (const ::size_t *) region, row_pitch, slice_pitch, ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_WRITE_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueCopyImage( - const Image& src, - const Image& dst, - const size_t<3>& src_origin, - const size_t<3>& dst_origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueCopyImage( - object_, src(), dst(), (const ::size_t *) src_origin, - (const ::size_t *)dst_origin, (const ::size_t *) region, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_COPY_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - -#if defined(CL_VERSION_1_2) - /** - * Enqueue a command to fill an image object with a specified color. - * \param fillColor is the color to use to fill the image. - * This is a four component RGBA floating-point color value if - * the image channel data type is not an unnormalized signed or - * unsigned data type. - */ - cl_int enqueueFillImage( - const Image& image, - cl_float4 fillColor, - const size_t<3>& origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueFillImage( - object_, - image(), - static_cast(&fillColor), - (const ::size_t *) origin, - (const ::size_t *) region, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_FILL_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * Enqueue a command to fill an image object with a specified color. - * \param fillColor is the color to use to fill the image. - * This is a four component RGBA signed integer color value if - * the image channel data type is an unnormalized signed integer - * type. - */ - cl_int enqueueFillImage( - const Image& image, - cl_int4 fillColor, - const size_t<3>& origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueFillImage( - object_, - image(), - static_cast(&fillColor), - (const ::size_t *) origin, - (const ::size_t *) region, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_FILL_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * Enqueue a command to fill an image object with a specified color. - * \param fillColor is the color to use to fill the image. - * This is a four component RGBA unsigned integer color value if - * the image channel data type is an unnormalized unsigned integer - * type. - */ - cl_int enqueueFillImage( - const Image& image, - cl_uint4 fillColor, - const size_t<3>& origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueFillImage( - object_, - image(), - static_cast(&fillColor), - (const ::size_t *) origin, - (const ::size_t *) region, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_FILL_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } -#endif // #if defined(CL_VERSION_1_2) - - cl_int enqueueCopyImageToBuffer( - const Image& src, - const Buffer& dst, - const size_t<3>& src_origin, - const size_t<3>& region, - ::size_t dst_offset, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueCopyImageToBuffer( - object_, src(), dst(), (const ::size_t *) src_origin, - (const ::size_t *) region, dst_offset, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueCopyBufferToImage( - const Buffer& src, - const Image& dst, - ::size_t src_offset, - const size_t<3>& dst_origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueCopyBufferToImage( - object_, src(), dst(), src_offset, - (const ::size_t *) dst_origin, (const ::size_t *) region, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - void* enqueueMapBuffer( - const Buffer& buffer, - cl_bool blocking, - cl_map_flags flags, - ::size_t offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, - Event* event = NULL, - cl_int* err = NULL) const - { - cl_int error; - void * result = ::clEnqueueMapBuffer( - object_, buffer(), blocking, flags, offset, size, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event, - &error); - - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - return result; - } - - void* enqueueMapImage( - const Image& buffer, - cl_bool blocking, - cl_map_flags flags, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t * row_pitch, - ::size_t * slice_pitch, - const VECTOR_CLASS* events = NULL, - Event* event = NULL, - cl_int* err = NULL) const - { - cl_int error; - void * result = ::clEnqueueMapImage( - object_, buffer(), blocking, flags, - (const ::size_t *) origin, (const ::size_t *) region, - row_pitch, slice_pitch, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event, - &error); - - detail::errHandler(error, __ENQUEUE_MAP_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - return result; - } - - cl_int enqueueUnmapMemObject( - const Memory& memory, - void* mapped_ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueUnmapMemObject( - object_, memory(), mapped_ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_UNMAP_MEM_OBJECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - -#if defined(CL_VERSION_1_2) - /** - * Enqueues a marker command which waits for either a list of events to complete, - * or all previously enqueued commands to complete. - * - * Enqueues a marker command which waits for either a list of events to complete, - * or if the list is empty it waits for all commands previously enqueued in command_queue - * to complete before it completes. This command returns an event which can be waited on, - * i.e. this event can be waited on to insure that all events either in the event_wait_list - * or all previously enqueued commands, queued before this command to command_queue, - * have completed. - */ - cl_int enqueueMarkerWithWaitList( - const VECTOR_CLASS *events = 0, - Event *event = 0) - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueMarkerWithWaitList( - object_, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_MARKER_WAIT_LIST_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * A synchronization point that enqueues a barrier operation. - * - * Enqueues a barrier command which waits for either a list of events to complete, - * or if the list is empty it waits for all commands previously enqueued in command_queue - * to complete before it completes. This command blocks command execution, that is, any - * following commands enqueued after it do not execute until it completes. This command - * returns an event which can be waited on, i.e. this event can be waited on to insure that - * all events either in the event_wait_list or all previously enqueued commands, queued - * before this command to command_queue, have completed. - */ - cl_int enqueueBarrierWithWaitList( - const VECTOR_CLASS *events = 0, - Event *event = 0) - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueBarrierWithWaitList( - object_, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_BARRIER_WAIT_LIST_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * Enqueues a command to indicate with which device a set of memory objects - * should be associated. - */ - cl_int enqueueMigrateMemObjects( - const VECTOR_CLASS &memObjects, - cl_mem_migration_flags flags, - const VECTOR_CLASS* events = NULL, - Event* event = NULL - ) - { - cl_event tmp; - - cl_mem* localMemObjects = static_cast(alloca(memObjects.size() * sizeof(cl_mem))); - for( int i = 0; i < (int)memObjects.size(); ++i ) { - localMemObjects[i] = memObjects[i](); - } - - - cl_int err = detail::errHandler( - ::clEnqueueMigrateMemObjects( - object_, - (cl_uint)memObjects.size(), - static_cast(localMemObjects), - flags, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_UNMAP_MEM_OBJECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } -#endif // #if defined(CL_VERSION_1_2) - - cl_int enqueueNDRangeKernel( - const Kernel& kernel, - const NDRange& offset, - const NDRange& global, - const NDRange& local = NullRange, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueNDRangeKernel( - object_, kernel(), (cl_uint) global.dimensions(), - offset.dimensions() != 0 ? (const ::size_t*) offset : NULL, - (const ::size_t*) global, - local.dimensions() != 0 ? (const ::size_t*) local : NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_NDRANGE_KERNEL_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueTask( - const Kernel& kernel, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueTask( - object_, kernel(), - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_TASK_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueNativeKernel( - void (CL_CALLBACK *userFptr)(void *), - std::pair args, - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* mem_locs = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_mem * mems = (mem_objects != NULL && mem_objects->size() > 0) - ? (cl_mem*) alloca(mem_objects->size() * sizeof(cl_mem)) - : NULL; - - if (mems != NULL) { - for (unsigned int i = 0; i < mem_objects->size(); i++) { - mems[i] = ((*mem_objects)[i])(); - } - } - - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueNativeKernel( - object_, userFptr, args.first, args.second, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - mems, - (mem_locs != NULL) ? (const void **) &mem_locs->front() : NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_NATIVE_KERNEL); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - -/** - * Deprecated APIs for 1.2 - */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) - CL_EXT_PREFIX__VERSION_1_1_DEPRECATED - cl_int enqueueMarker(Event* event = NULL) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED - { - return detail::errHandler( - ::clEnqueueMarker(object_, (cl_event*) event), - __ENQUEUE_MARKER_ERR); - } - - CL_EXT_PREFIX__VERSION_1_1_DEPRECATED - cl_int enqueueWaitForEvents(const VECTOR_CLASS& events) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED - { - return detail::errHandler( - ::clEnqueueWaitForEvents( - object_, - (cl_uint) events.size(), - (const cl_event*) &events.front()), - __ENQUEUE_WAIT_FOR_EVENTS_ERR); - } -#endif // #if defined(CL_VERSION_1_1) - - cl_int enqueueAcquireGLObjects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueAcquireGLObjects( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_ACQUIRE_GL_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueReleaseGLObjects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueReleaseGLObjects( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_RELEASE_GL_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - -#if defined (USE_DX_INTEROP) -typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueAcquireD3D10ObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem* mem_objects, cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, cl_event* event); -typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem* mem_objects, cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, cl_event* event); - - cl_int enqueueAcquireD3D10Objects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - static PFN_clEnqueueAcquireD3D10ObjectsKHR pfn_clEnqueueAcquireD3D10ObjectsKHR = NULL; -#if defined(CL_VERSION_1_2) - cl_context context = getInfo(); - cl::Device device(getInfo()); - cl_platform_id platform = device.getInfo(); - __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clEnqueueAcquireD3D10ObjectsKHR); -#endif -#if defined(CL_VERSION_1_1) - __INIT_CL_EXT_FCN_PTR(clEnqueueAcquireD3D10ObjectsKHR); -#endif - - cl_event tmp; - cl_int err = detail::errHandler( - pfn_clEnqueueAcquireD3D10ObjectsKHR( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_ACQUIRE_GL_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueReleaseD3D10Objects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - static PFN_clEnqueueReleaseD3D10ObjectsKHR pfn_clEnqueueReleaseD3D10ObjectsKHR = NULL; -#if defined(CL_VERSION_1_2) - cl_context context = getInfo(); - cl::Device device(getInfo()); - cl_platform_id platform = device.getInfo(); - __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clEnqueueReleaseD3D10ObjectsKHR); -#endif // #if defined(CL_VERSION_1_2) -#if defined(CL_VERSION_1_1) - __INIT_CL_EXT_FCN_PTR(clEnqueueReleaseD3D10ObjectsKHR); -#endif // #if defined(CL_VERSION_1_1) - - cl_event tmp; - cl_int err = detail::errHandler( - pfn_clEnqueueReleaseD3D10ObjectsKHR( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_RELEASE_GL_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } -#endif - -/** - * Deprecated APIs for 1.2 - */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) - CL_EXT_PREFIX__VERSION_1_1_DEPRECATED - cl_int enqueueBarrier() const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED - { - return detail::errHandler( - ::clEnqueueBarrier(object_), - __ENQUEUE_BARRIER_ERR); - } -#endif // #if defined(CL_VERSION_1_1) - - cl_int flush() const - { - return detail::errHandler(::clFlush(object_), __FLUSH_ERR); - } - - cl_int finish() const - { - return detail::errHandler(::clFinish(object_), __FINISH_ERR); - } -}; - -#ifdef _WIN32 -__declspec(selectany) volatile int CommandQueue::default_initialized_ = __DEFAULT_NOT_INITIALIZED; -__declspec(selectany) CommandQueue CommandQueue::default_; -__declspec(selectany) volatile cl_int CommandQueue::default_error_ = CL_SUCCESS; -#else -__attribute__((weak)) volatile int CommandQueue::default_initialized_ = __DEFAULT_NOT_INITIALIZED; -__attribute__((weak)) CommandQueue CommandQueue::default_; -__attribute__((weak)) volatile cl_int CommandQueue::default_error_ = CL_SUCCESS; -#endif - -template< typename IteratorType > -Buffer::Buffer( - const Context &context, - IteratorType startIterator, - IteratorType endIterator, - bool readOnly, - bool useHostPtr, - cl_int* err) -{ - typedef typename std::iterator_traits::value_type DataType; - cl_int error; - - cl_mem_flags flags = 0; - if( readOnly ) { - flags |= CL_MEM_READ_ONLY; - } - else { - flags |= CL_MEM_READ_WRITE; - } - if( useHostPtr ) { - flags |= CL_MEM_USE_HOST_PTR; - } - - ::size_t size = sizeof(DataType)*(endIterator - startIterator); - - if( useHostPtr ) { - object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); - } else { - object_ = ::clCreateBuffer(context(), flags, size, 0, &error); - } - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - - if( !useHostPtr ) { - CommandQueue queue(context, 0, &error); - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - - error = cl::copy(queue, startIterator, endIterator, *this); - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } -} - -inline cl_int enqueueReadBuffer( - const Buffer& buffer, - cl_bool blocking, - ::size_t offset, - ::size_t size, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueReadBuffer(buffer, blocking, offset, size, ptr, events, event); -} - -inline cl_int enqueueWriteBuffer( - const Buffer& buffer, - cl_bool blocking, - ::size_t offset, - ::size_t size, - const void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueWriteBuffer(buffer, blocking, offset, size, ptr, events, event); -} - -inline void* enqueueMapBuffer( - const Buffer& buffer, - cl_bool blocking, - cl_map_flags flags, - ::size_t offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, - Event* event = NULL, - cl_int* err = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - - void * result = ::clEnqueueMapBuffer( - queue(), buffer(), blocking, flags, offset, size, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event, - &error); - - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - return result; -} - -inline cl_int enqueueUnmapMemObject( - const Memory& memory, - void* mapped_ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (error != CL_SUCCESS) { - return error; - } - - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueUnmapMemObject( - queue(), memory(), mapped_ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_UNMAP_MEM_OBJECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; -} - -inline cl_int enqueueCopyBuffer( - const Buffer& src, - const Buffer& dst, - ::size_t src_offset, - ::size_t dst_offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueCopyBuffer(src, dst, src_offset, dst_offset, size, events, event); -} - -/** - * Blocking copy operation between iterators and a buffer. - * Host to Device. - * Uses default command queue. - */ -template< typename IteratorType > -inline cl_int copy( IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - if (error != CL_SUCCESS) - return error; - - return cl::copy(queue, startIterator, endIterator, buffer); -} - -/** - * Blocking copy operation between iterators and a buffer. - * Device to Host. - * Uses default command queue. - */ -template< typename IteratorType > -inline cl_int copy( const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - if (error != CL_SUCCESS) - return error; - - return cl::copy(queue, buffer, startIterator, endIterator); -} - -/** - * Blocking copy operation between iterators and a buffer. - * Host to Device. - * Uses specified queue. - */ -template< typename IteratorType > -inline cl_int copy( const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ) -{ - typedef typename std::iterator_traits::value_type DataType; - cl_int error; - - ::size_t length = endIterator-startIterator; - ::size_t byteLength = length*sizeof(DataType); - - DataType *pointer = - static_cast(queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_WRITE, 0, byteLength, 0, 0, &error)); - // if exceptions enabled, enqueueMapBuffer will throw - if( error != CL_SUCCESS ) { - return error; - } -#if defined(_MSC_VER) - std::copy( - startIterator, - endIterator, - stdext::checked_array_iterator( - pointer, length)); -#else - std::copy(startIterator, endIterator, pointer); -#endif - Event endEvent; - error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent); - // if exceptions enabled, enqueueUnmapMemObject will throw - if( error != CL_SUCCESS ) { - return error; - } - endEvent.wait(); - return CL_SUCCESS; -} - -/** - * Blocking copy operation between iterators and a buffer. - * Device to Host. - * Uses specified queue. - */ -template< typename IteratorType > -inline cl_int copy( const CommandQueue &queue, const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ) -{ - typedef typename std::iterator_traits::value_type DataType; - cl_int error; - - ::size_t length = endIterator-startIterator; - ::size_t byteLength = length*sizeof(DataType); - - DataType *pointer = - static_cast(queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_READ, 0, byteLength, 0, 0, &error)); - // if exceptions enabled, enqueueMapBuffer will throw - if( error != CL_SUCCESS ) { - return error; - } - std::copy(pointer, pointer + length, startIterator); - Event endEvent; - error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent); - // if exceptions enabled, enqueueUnmapMemObject will throw - if( error != CL_SUCCESS ) { - return error; - } - endEvent.wait(); - return CL_SUCCESS; -} - -#if defined(CL_VERSION_1_1) -inline cl_int enqueueReadBufferRect( - const Buffer& buffer, - cl_bool blocking, - const size_t<3>& buffer_offset, - const size_t<3>& host_offset, - const size_t<3>& region, - ::size_t buffer_row_pitch, - ::size_t buffer_slice_pitch, - ::size_t host_row_pitch, - ::size_t host_slice_pitch, - void *ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueReadBufferRect( - buffer, - blocking, - buffer_offset, - host_offset, - region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - events, - event); -} - -inline cl_int enqueueWriteBufferRect( - const Buffer& buffer, - cl_bool blocking, - const size_t<3>& buffer_offset, - const size_t<3>& host_offset, - const size_t<3>& region, - ::size_t buffer_row_pitch, - ::size_t buffer_slice_pitch, - ::size_t host_row_pitch, - ::size_t host_slice_pitch, - void *ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueWriteBufferRect( - buffer, - blocking, - buffer_offset, - host_offset, - region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - events, - event); -} - -inline cl_int enqueueCopyBufferRect( - const Buffer& src, - const Buffer& dst, - const size_t<3>& src_origin, - const size_t<3>& dst_origin, - const size_t<3>& region, - ::size_t src_row_pitch, - ::size_t src_slice_pitch, - ::size_t dst_row_pitch, - ::size_t dst_slice_pitch, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueCopyBufferRect( - src, - dst, - src_origin, - dst_origin, - region, - src_row_pitch, - src_slice_pitch, - dst_row_pitch, - dst_slice_pitch, - events, - event); -} -#endif - -inline cl_int enqueueReadImage( - const Image& image, - cl_bool blocking, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t row_pitch, - ::size_t slice_pitch, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueReadImage( - image, - blocking, - origin, - region, - row_pitch, - slice_pitch, - ptr, - events, - event); -} - -inline cl_int enqueueWriteImage( - const Image& image, - cl_bool blocking, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t row_pitch, - ::size_t slice_pitch, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueWriteImage( - image, - blocking, - origin, - region, - row_pitch, - slice_pitch, - ptr, - events, - event); -} - -inline cl_int enqueueCopyImage( - const Image& src, - const Image& dst, - const size_t<3>& src_origin, - const size_t<3>& dst_origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueCopyImage( - src, - dst, - src_origin, - dst_origin, - region, - events, - event); -} - -inline cl_int enqueueCopyImageToBuffer( - const Image& src, - const Buffer& dst, - const size_t<3>& src_origin, - const size_t<3>& region, - ::size_t dst_offset, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueCopyImageToBuffer( - src, - dst, - src_origin, - region, - dst_offset, - events, - event); -} - -inline cl_int enqueueCopyBufferToImage( - const Buffer& src, - const Image& dst, - ::size_t src_offset, - const size_t<3>& dst_origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueCopyBufferToImage( - src, - dst, - src_offset, - dst_origin, - region, - events, - event); -} - - -inline cl_int flush(void) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.flush(); -} - -inline cl_int finish(void) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - - return queue.finish(); -} - -// Kernel Functor support -// New interface as of September 2011 -// Requires the C++11 std::tr1::function (note do not support TR1) -// Visual Studio 2010 and GCC 4.2 - -struct EnqueueArgs -{ - CommandQueue queue_; - const NDRange offset_; - const NDRange global_; - const NDRange local_; - VECTOR_CLASS events_; - - EnqueueArgs(NDRange global) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(NullRange) - { - - } - - EnqueueArgs(NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(local) - { - - } - - EnqueueArgs(NDRange offset, NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(offset), - global_(global), - local_(local) - { - - } - - EnqueueArgs(Event e, NDRange global) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(NullRange) - { - events_.push_back(e); - } - - EnqueueArgs(Event e, NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(local) - { - events_.push_back(e); - } - - EnqueueArgs(Event e, NDRange offset, NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(offset), - global_(global), - local_(local) - { - events_.push_back(e); - } - - EnqueueArgs(const VECTOR_CLASS &events, NDRange global) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(NullRange), - events_(events) - { - - } - - EnqueueArgs(const VECTOR_CLASS &events, NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(local), - events_(events) - { - - } - - EnqueueArgs(const VECTOR_CLASS &events, NDRange offset, NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(offset), - global_(global), - local_(local), - events_(events) - { - - } - - EnqueueArgs(CommandQueue &queue, NDRange global) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(NullRange) - { - - } - - EnqueueArgs(CommandQueue &queue, NDRange global, NDRange local) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(local) - { - - } - - EnqueueArgs(CommandQueue &queue, NDRange offset, NDRange global, NDRange local) : - queue_(queue), - offset_(offset), - global_(global), - local_(local) - { - - } - - EnqueueArgs(CommandQueue &queue, Event e, NDRange global) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(NullRange) - { - events_.push_back(e); - } - - EnqueueArgs(CommandQueue &queue, Event e, NDRange global, NDRange local) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(local) - { - events_.push_back(e); - } - - EnqueueArgs(CommandQueue &queue, Event e, NDRange offset, NDRange global, NDRange local) : - queue_(queue), - offset_(offset), - global_(global), - local_(local) - { - events_.push_back(e); - } - - EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS &events, NDRange global) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(NullRange), - events_(events) - { - - } - - EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS &events, NDRange global, NDRange local) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(local), - events_(events) - { - - } - - EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS &events, NDRange offset, NDRange global, NDRange local) : - queue_(queue), - offset_(offset), - global_(global), - local_(local), - events_(events) - { - - } -}; - -namespace detail { - -class NullType {}; - -template -struct SetArg -{ - static void set (Kernel kernel, T0 arg) - { - kernel.setArg(index, arg); - } -}; - -template -struct SetArg -{ - static void set (Kernel, NullType) - { - } -}; - -template < - typename T0, typename T1, typename T2, typename T3, - typename T4, typename T5, typename T6, typename T7, - typename T8, typename T9, typename T10, typename T11, - typename T12, typename T13, typename T14, typename T15, - typename T16, typename T17, typename T18, typename T19, - typename T20, typename T21, typename T22, typename T23, - typename T24, typename T25, typename T26, typename T27, - typename T28, typename T29, typename T30, typename T31 -> -class KernelFunctorGlobal -{ -private: - Kernel kernel_; - -public: - KernelFunctorGlobal( - Kernel kernel) : - kernel_(kernel) - {} - - KernelFunctorGlobal( - const Program& program, - const STRING_CLASS name, - cl_int * err = NULL) : - kernel_(program, name.c_str(), err) - {} - - Event operator() ( - const EnqueueArgs& args, - T0 t0, - T1 t1 = NullType(), - T2 t2 = NullType(), - T3 t3 = NullType(), - T4 t4 = NullType(), - T5 t5 = NullType(), - T6 t6 = NullType(), - T7 t7 = NullType(), - T8 t8 = NullType(), - T9 t9 = NullType(), - T10 t10 = NullType(), - T11 t11 = NullType(), - T12 t12 = NullType(), - T13 t13 = NullType(), - T14 t14 = NullType(), - T15 t15 = NullType(), - T16 t16 = NullType(), - T17 t17 = NullType(), - T18 t18 = NullType(), - T19 t19 = NullType(), - T20 t20 = NullType(), - T21 t21 = NullType(), - T22 t22 = NullType(), - T23 t23 = NullType(), - T24 t24 = NullType(), - T25 t25 = NullType(), - T26 t26 = NullType(), - T27 t27 = NullType(), - T28 t28 = NullType(), - T29 t29 = NullType(), - T30 t30 = NullType(), - T31 t31 = NullType() - ) - { - Event event; - SetArg<0, T0>::set(kernel_, t0); - SetArg<1, T1>::set(kernel_, t1); - SetArg<2, T2>::set(kernel_, t2); - SetArg<3, T3>::set(kernel_, t3); - SetArg<4, T4>::set(kernel_, t4); - SetArg<5, T5>::set(kernel_, t5); - SetArg<6, T6>::set(kernel_, t6); - SetArg<7, T7>::set(kernel_, t7); - SetArg<8, T8>::set(kernel_, t8); - SetArg<9, T9>::set(kernel_, t9); - SetArg<10, T10>::set(kernel_, t10); - SetArg<11, T11>::set(kernel_, t11); - SetArg<12, T12>::set(kernel_, t12); - SetArg<13, T13>::set(kernel_, t13); - SetArg<14, T14>::set(kernel_, t14); - SetArg<15, T15>::set(kernel_, t15); - SetArg<16, T16>::set(kernel_, t16); - SetArg<17, T17>::set(kernel_, t17); - SetArg<18, T18>::set(kernel_, t18); - SetArg<19, T19>::set(kernel_, t19); - SetArg<20, T20>::set(kernel_, t20); - SetArg<21, T21>::set(kernel_, t21); - SetArg<22, T22>::set(kernel_, t22); - SetArg<23, T23>::set(kernel_, t23); - SetArg<24, T24>::set(kernel_, t24); - SetArg<25, T25>::set(kernel_, t25); - SetArg<26, T26>::set(kernel_, t26); - SetArg<27, T27>::set(kernel_, t27); - SetArg<28, T28>::set(kernel_, t28); - SetArg<29, T29>::set(kernel_, t29); - SetArg<30, T30>::set(kernel_, t30); - SetArg<31, T31>::set(kernel_, t31); - - args.queue_.enqueueNDRangeKernel( - kernel_, - args.offset_, - args.global_, - args.local_, - &args.events_, - &event); - - return event; - } - -}; - -//------------------------------------------------------------------------------------------------------ - - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25, - typename T26, - typename T27, - typename T28, - typename T29, - typename T30, - typename T31> -struct functionImplementation_ -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - T30, - T31> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 32)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - T30, - T31); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25, - T26 arg26, - T27 arg27, - T28 arg28, - T29 arg29, - T30 arg30, - T31 arg31) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25, - arg26, - arg27, - arg28, - arg29, - arg30, - arg31); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25, - typename T26, - typename T27, - typename T28, - typename T29, - typename T30> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - T30, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - T30, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 31)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - T30); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25, - T26 arg26, - T27 arg27, - T28 arg28, - T29 arg29, - T30 arg30) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25, - arg26, - arg27, - arg28, - arg29, - arg30); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25, - typename T26, - typename T27, - typename T28, - typename T29> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 30)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25, - T26 arg26, - T27 arg27, - T28 arg28, - T29 arg29) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25, - arg26, - arg27, - arg28, - arg29); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25, - typename T26, - typename T27, - typename T28> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 29)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25, - T26 arg26, - T27 arg27, - T28 arg28) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25, - arg26, - arg27, - arg28); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25, - typename T26, - typename T27> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 28)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25, - T26 arg26, - T27 arg27) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25, - arg26, - arg27); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25, - typename T26> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 27)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25, - T26 arg26) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25, - arg26); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 26)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 25)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 24)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 23)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 22)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 21)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 20)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 19)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 18)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 17)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 16)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 15)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 14)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 13)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 12)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 11)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 10)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 9)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 8)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 7)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 6)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 5)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 4)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3); - } - - -}; - -template< - typename T0, - typename T1, - typename T2> -struct functionImplementation_ -< T0, - T1, - T2, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 3)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2); - } - - -}; - -template< - typename T0, - typename T1> -struct functionImplementation_ -< T0, - T1, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 2)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1) - { - return functor_( - enqueueArgs, - arg0, - arg1); - } - - -}; - -template< - typename T0> -struct functionImplementation_ -< T0, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 1)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0) - { - return functor_( - enqueueArgs, - arg0); - } - - -}; - - - - - -} // namespace detail - -//---------------------------------------------------------------------------------------------- - -template < - typename T0, typename T1 = detail::NullType, typename T2 = detail::NullType, - typename T3 = detail::NullType, typename T4 = detail::NullType, - typename T5 = detail::NullType, typename T6 = detail::NullType, - typename T7 = detail::NullType, typename T8 = detail::NullType, - typename T9 = detail::NullType, typename T10 = detail::NullType, - typename T11 = detail::NullType, typename T12 = detail::NullType, - typename T13 = detail::NullType, typename T14 = detail::NullType, - typename T15 = detail::NullType, typename T16 = detail::NullType, - typename T17 = detail::NullType, typename T18 = detail::NullType, - typename T19 = detail::NullType, typename T20 = detail::NullType, - typename T21 = detail::NullType, typename T22 = detail::NullType, - typename T23 = detail::NullType, typename T24 = detail::NullType, - typename T25 = detail::NullType, typename T26 = detail::NullType, - typename T27 = detail::NullType, typename T28 = detail::NullType, - typename T29 = detail::NullType, typename T30 = detail::NullType, - typename T31 = detail::NullType -> -struct make_kernel : - public detail::functionImplementation_< - T0, T1, T2, T3, - T4, T5, T6, T7, - T8, T9, T10, T11, - T12, T13, T14, T15, - T16, T17, T18, T19, - T20, T21, T22, T23, - T24, T25, T26, T27, - T28, T29, T30, T31 - > -{ -public: - typedef detail::KernelFunctorGlobal< - T0, T1, T2, T3, - T4, T5, T6, T7, - T8, T9, T10, T11, - T12, T13, T14, T15, - T16, T17, T18, T19, - T20, T21, T22, T23, - T24, T25, T26, T27, - T28, T29, T30, T31 - > FunctorType; - - make_kernel( - const Program& program, - const STRING_CLASS name, - cl_int * err = NULL) : - detail::functionImplementation_< - T0, T1, T2, T3, - T4, T5, T6, T7, - T8, T9, T10, T11, - T12, T13, T14, T15, - T16, T17, T18, T19, - T20, T21, T22, T23, - T24, T25, T26, T27, - T28, T29, T30, T31 - >( - FunctorType(program, name, err)) - {} - - make_kernel( - const Kernel kernel) : - detail::functionImplementation_< - T0, T1, T2, T3, - T4, T5, T6, T7, - T8, T9, T10, T11, - T12, T13, T14, T15, - T16, T17, T18, T19, - T20, T21, T22, T23, - T24, T25, T26, T27, - T28, T29, T30, T31 - >( - FunctorType(kernel)) - {} -}; - - -//---------------------------------------------------------------------------------------------------------------------- - -#undef __ERR_STR -#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) -#undef __GET_DEVICE_INFO_ERR -#undef __GET_PLATFORM_INFO_ERR -#undef __GET_DEVICE_IDS_ERR -#undef __GET_CONTEXT_INFO_ERR -#undef __GET_EVENT_INFO_ERR -#undef __GET_EVENT_PROFILE_INFO_ERR -#undef __GET_MEM_OBJECT_INFO_ERR -#undef __GET_IMAGE_INFO_ERR -#undef __GET_SAMPLER_INFO_ERR -#undef __GET_KERNEL_INFO_ERR -#undef __GET_KERNEL_ARG_INFO_ERR -#undef __GET_KERNEL_WORK_GROUP_INFO_ERR -#undef __GET_PROGRAM_INFO_ERR -#undef __GET_PROGRAM_BUILD_INFO_ERR -#undef __GET_COMMAND_QUEUE_INFO_ERR - -#undef __CREATE_CONTEXT_ERR -#undef __CREATE_CONTEXT_FROM_TYPE_ERR -#undef __GET_SUPPORTED_IMAGE_FORMATS_ERR - -#undef __CREATE_BUFFER_ERR -#undef __CREATE_SUBBUFFER_ERR -#undef __CREATE_IMAGE2D_ERR -#undef __CREATE_IMAGE3D_ERR -#undef __CREATE_SAMPLER_ERR -#undef __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR - -#undef __CREATE_USER_EVENT_ERR -#undef __SET_USER_EVENT_STATUS_ERR -#undef __SET_EVENT_CALLBACK_ERR -#undef __SET_PRINTF_CALLBACK_ERR - -#undef __WAIT_FOR_EVENTS_ERR - -#undef __CREATE_KERNEL_ERR -#undef __SET_KERNEL_ARGS_ERR -#undef __CREATE_PROGRAM_WITH_SOURCE_ERR -#undef __CREATE_PROGRAM_WITH_BINARY_ERR -#undef __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR -#undef __BUILD_PROGRAM_ERR -#undef __CREATE_KERNELS_IN_PROGRAM_ERR - -#undef __CREATE_COMMAND_QUEUE_ERR -#undef __SET_COMMAND_QUEUE_PROPERTY_ERR -#undef __ENQUEUE_READ_BUFFER_ERR -#undef __ENQUEUE_WRITE_BUFFER_ERR -#undef __ENQUEUE_READ_BUFFER_RECT_ERR -#undef __ENQUEUE_WRITE_BUFFER_RECT_ERR -#undef __ENQEUE_COPY_BUFFER_ERR -#undef __ENQEUE_COPY_BUFFER_RECT_ERR -#undef __ENQUEUE_READ_IMAGE_ERR -#undef __ENQUEUE_WRITE_IMAGE_ERR -#undef __ENQUEUE_COPY_IMAGE_ERR -#undef __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR -#undef __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR -#undef __ENQUEUE_MAP_BUFFER_ERR -#undef __ENQUEUE_MAP_IMAGE_ERR -#undef __ENQUEUE_UNMAP_MEM_OBJECT_ERR -#undef __ENQUEUE_NDRANGE_KERNEL_ERR -#undef __ENQUEUE_TASK_ERR -#undef __ENQUEUE_NATIVE_KERNEL - -#undef __CL_EXPLICIT_CONSTRUCTORS - -#undef __UNLOAD_COMPILER_ERR -#endif //__CL_USER_OVERRIDE_ERROR_STRINGS - -#undef __CL_FUNCTION_TYPE - -// Extensions -/** - * Deprecated APIs for 1.2 - */ -#if defined(CL_VERSION_1_1) -#undef __INIT_CL_EXT_FCN_PTR -#endif // #if defined(CL_VERSION_1_1) -#undef __CREATE_SUB_DEVICES - -#if defined(USE_CL_DEVICE_FISSION) -#undef __PARAM_NAME_DEVICE_FISSION -#endif // USE_CL_DEVICE_FISSION - -#undef __DEFAULT_NOT_INITIALIZED -#undef __DEFAULT_BEING_INITIALIZED -#undef __DEFAULT_INITIALIZED - -} // namespace cl - -#ifdef _WIN32 -#pragma pop_macro("max") -#endif // _WIN32 - -#endif // CL_HPP_ diff --git a/mace/core/runtime/opencl/cl2_header.h b/mace/core/runtime/opencl/cl2_header.h index f7c4af4b1cfee051e8afd869d945700eefa1cf20..476fe5ebbefdeed960bdb1c6b9b932de88bef6df 100644 --- a/mace/core/runtime/opencl/cl2_header.h +++ b/mace/core/runtime/opencl/cl2_header.h @@ -5,6 +5,8 @@ #ifndef MACE_CORE_RUNTIME_OPENCL_CL2_HEADER_H_ #define MACE_CORE_RUNTIME_OPENCL_CL2_HEADER_H_ +// Do not include cl2.hpp directly, include this header instead. + #define CL_HPP_TARGET_OPENCL_VERSION 200 #include "mace/core/runtime/opencl/cl2.hpp" diff --git a/mace/core/opencl_allocator.cc b/mace/core/runtime/opencl/opencl_allocator.cc similarity index 89% rename from mace/core/opencl_allocator.cc rename to mace/core/runtime/opencl/opencl_allocator.cc index 0c4cf8f0f87069d20650622c578308983d61560b..9c8b5ceee552a0a2d8df5bf62eb6608fdd44c47d 100644 --- a/mace/core/opencl_allocator.cc +++ b/mace/core/runtime/opencl/opencl_allocator.cc @@ -3,7 +3,7 @@ // #include "mace/core/runtime/opencl/cl2_header.h" -#include "mace/core/opencl_allocator.h" +#include "mace/core/runtime/opencl/opencl_allocator.h" #include "mace/core/runtime/opencl/opencl_runtime.h" namespace mace { @@ -37,7 +37,7 @@ OpenCLAllocator::OpenCLAllocator() {} OpenCLAllocator::~OpenCLAllocator() {} void *OpenCLAllocator::New(size_t nbytes) { cl_int error; - cl::Buffer *buffer = new cl::Buffer(OpenCLRuntime::Get()->context(), + cl::Buffer *buffer = new cl::Buffer(OpenCLRuntime::Global()->context(), CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, nbytes, nullptr, &error); MACE_CHECK(error == CL_SUCCESS); @@ -53,7 +53,7 @@ void *OpenCLAllocator::NewImage(const std::vector &image_shape, cl_int error; cl::Image2D *cl_image = - new cl::Image2D(OpenCLRuntime::Get()->context(), + new cl::Image2D(OpenCLRuntime::Global()->context(), CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, img_format, image_shape[0], image_shape[1], @@ -79,7 +79,7 @@ void OpenCLAllocator::DeleteImage(void *buffer) { void *OpenCLAllocator::Map(void *buffer, size_t nbytes) { auto cl_buffer = static_cast(buffer); - auto queue = OpenCLRuntime::Get()->command_queue(); + auto queue = OpenCLRuntime::Global()->command_queue(); // TODO(heliangliang) Non-blocking call cl_int error; void *mapped_ptr = @@ -101,7 +101,7 @@ void *OpenCLAllocator::MapImage(void *buffer, mapped_image_pitch.resize(2); cl_int error; void *mapped_ptr = - OpenCLRuntime::Get()->command_queue().enqueueMapImage(*cl_image, + OpenCLRuntime::Global()->command_queue().enqueueMapImage(*cl_image, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, origin, region, &mapped_image_pitch[0], @@ -114,12 +114,13 @@ void *OpenCLAllocator::MapImage(void *buffer, void OpenCLAllocator::Unmap(void *buffer, void *mapped_ptr) { auto cl_buffer = static_cast(buffer); - auto queue = OpenCLRuntime::Get()->command_queue(); + auto queue = OpenCLRuntime::Global()->command_queue(); MACE_CHECK(queue.enqueueUnmapMemObject(*cl_buffer, mapped_ptr, nullptr, nullptr) == CL_SUCCESS); } bool OpenCLAllocator::OnHost() { return false; } +MACE_REGISTER_ALLOCATOR(DeviceType::OPENCL, new OpenCLAllocator()); } // namespace mace diff --git a/mace/core/opencl_allocator.h b/mace/core/runtime/opencl/opencl_allocator.h similarity index 100% rename from mace/core/opencl_allocator.h rename to mace/core/runtime/opencl/opencl_allocator.h diff --git a/mace/core/runtime/opencl/opencl_runtime.cc b/mace/core/runtime/opencl/opencl_runtime.cc index 93cb44545393d3816bc9de871b10d4e946ebefa8..1cc57079410bc014f2f9be131d491cde4a504d11 100644 --- a/mace/core/runtime/opencl/opencl_runtime.cc +++ b/mace/core/runtime/opencl/opencl_runtime.cc @@ -7,15 +7,17 @@ #include #include -#include "mace/core/logging.h" #include "mace/core/runtime/opencl/opencl_runtime.h" +#include "mace/utils/logging.h" +#include "mace/utils/tuner.h" #include namespace mace { namespace { -bool ReadFile(const std::string &filename, bool binary, +bool ReadFile(const std::string &filename, + bool binary, std::vector *content_ptr) { MACE_CHECK_NOTNULL(content_ptr); @@ -55,7 +57,8 @@ bool ReadFile(const std::string &filename, bool binary, return true; } -bool WriteFile(const std::string &filename, bool binary, +bool WriteFile(const std::string &filename, + bool binary, const std::vector &content) { std::ios_base::openmode mode = std::ios::out; if (binary) { @@ -76,124 +79,92 @@ bool WriteFile(const std::string &filename, bool binary, } // namespace -bool OpenCLRuntime::enable_profiling_ = false; -std::unique_ptr OpenCLRuntime::profiling_ev_ = nullptr; +void OpenCLProfilingTimer::StartTiming() {} -OpenCLRuntime *OpenCLRuntime::Get() { - static std::once_flag init_once; - static OpenCLRuntime *instance = nullptr; - std::call_once(init_once, []() { - if (!mace::OpenCLLibrary::Supported()) { - LOG(ERROR) << "OpenCL not supported"; - return; - } +void OpenCLProfilingTimer::StopTiming() { + OpenCLRuntime::Global()->command_queue().finish(); + start_nanos_ = event_->getProfilingInfo(); + stop_nanos_ = event_->getProfilingInfo(); +} - std::vector all_platforms; - cl::Platform::get(&all_platforms); - if (all_platforms.size() == 0) { - LOG(ERROR) << "No OpenCL platforms found"; - return; - } - cl::Platform default_platform = all_platforms[0]; - VLOG(1) << "Using platform: " - << default_platform.getInfo() << ", " - << default_platform.getInfo() << ", " - << default_platform.getInfo(); - - // get default device (CPUs, GPUs) of the default platform - std::vector all_devices; - default_platform.getDevices(CL_DEVICE_TYPE_ALL, &all_devices); - if (all_devices.size() == 0) { - LOG(ERROR) << "No OpenCL devices found"; - return; - } +double OpenCLProfilingTimer::ElapsedMicros() { + return (stop_nanos_ - start_nanos_) / 1000.0; +} - bool gpu_detected = false; - cl::Device gpu_device; - for (auto device : all_devices) { - if (device.getInfo() == CL_DEVICE_TYPE_GPU) { - gpu_device = device; - gpu_detected = true; - VLOG(1) << "Using device: " << device.getInfo(); - break; - } - } - if (!gpu_detected) { - LOG(ERROR) << "No GPU device found"; - return; - } +OpenCLRuntime *OpenCLRuntime::Global() { + static OpenCLRuntime instance; + return &instance; +} - cl_command_queue_properties properties = 0; -#ifdef __ENABLE_PROFILING - enable_profiling_ = true; - profiling_ev_.reset(new cl::Event()); - properties = CL_QUEUE_PROFILING_ENABLE; -#endif +OpenCLRuntime::OpenCLRuntime() { + LoadOpenCLLibrary(); - // a context is like a "runtime link" to the device and platform; - // i.e. communication is possible - cl::Context context({gpu_device}); - cl::CommandQueue command_queue(context, gpu_device, properties); - instance = new OpenCLRuntime(context, gpu_device, command_queue); + std::vector all_platforms; + cl::Platform::get(&all_platforms); + if (all_platforms.size() == 0) { + LOG(FATAL) << "No OpenCL platforms found"; + } + cl::Platform default_platform = all_platforms[0]; + VLOG(1) << "Using platform: " << default_platform.getInfo() + << ", " << default_platform.getInfo() << ", " + << default_platform.getInfo(); + + // get default device (CPUs, GPUs) of the default platform + std::vector all_devices; + default_platform.getDevices(CL_DEVICE_TYPE_ALL, &all_devices); + if (all_devices.size() == 0) { + LOG(FATAL) << "No OpenCL devices found"; + } - }); + bool gpu_detected = false; + cl::Device gpu_device; + for (auto device : all_devices) { + if (device.getInfo() == CL_DEVICE_TYPE_GPU) { + gpu_device = device; + gpu_detected = true; + VLOG(1) << "Using device: " << device.getInfo(); + break; + } + } + if (!gpu_detected) { + LOG(FATAL) << "No GPU device found"; + } - return instance; -} + cl_command_queue_properties properties = 0; -void OpenCLRuntime::EnableProfiling() { enable_profiling_ = true; } +#ifdef MACE_OPENCL_PROFILING + properties |= CL_QUEUE_PROFILING_ENABLE; +#endif -cl::Event *OpenCLRuntime::GetDefaultEvent() { return profiling_ev_.get(); } + // a context is like a "runtime link" to the device and platform; + // i.e. communication is possible + cl::Context context({gpu_device}); + cl::CommandQueue command_queue(context, gpu_device, properties); -cl_ulong OpenCLRuntime::GetEventProfilingStartInfo() { - MACE_CHECK(profiling_ev_, "is NULL, should enable profiling first."); - return profiling_ev_->getProfilingInfo(); -} + const char *kernel_path = getenv("MACE_KERNEL_PATH"); + this->kernel_path_ = std::string(kernel_path == nullptr ? "" : kernel_path) + "/"; -cl_ulong OpenCLRuntime::GetEventProfilingEndInfo() { - MACE_CHECK(profiling_ev_, "is NULL, should enable profiling first."); - return profiling_ev_->getProfilingInfo(); + this->device_ = new cl::Device(gpu_device); + this->context_ = new cl::Context(context); + this->command_queue_ = new cl::CommandQueue(command_queue); } -OpenCLRuntime::OpenCLRuntime(cl::Context context, cl::Device device, - cl::CommandQueue command_queue) - : context_(context), device_(device), command_queue_(command_queue) { - const char *kernel_path = getenv("MACE_KERNEL_PATH"); - kernel_path_ = std::string(kernel_path == nullptr ? "" : kernel_path) + "/"; +OpenCLRuntime::~OpenCLRuntime() { + built_program_map_.clear(); + delete command_queue_; + delete context_; + delete device_; + UnloadOpenCLLibrary(); } -OpenCLRuntime::~OpenCLRuntime() {} - -cl::Context &OpenCLRuntime::context() { return context_; } +cl::Context &OpenCLRuntime::context() { return *context_; } -cl::Device &OpenCLRuntime::device() { return device_; } +cl::Device &OpenCLRuntime::device() { return *device_; } -cl::CommandQueue &OpenCLRuntime::command_queue() { return command_queue_; } +cl::CommandQueue &OpenCLRuntime::command_queue() { return *command_queue_; } -cl::Program &OpenCLRuntime::program() { - // TODO(liuqi) : useless, leave it for old code. - return program_; -} - -// TODO(heliangliang) Support binary format -const std::map OpenCLRuntime::program_map_ = { - {"addn", "addn.cl"}, - {"batch_norm", "batch_norm.cl"}, - {"bias_add", "bias_add.cl"}, - {"buffer_to_image", "buffer_to_image.cl"}, - {"conv_2d", "conv_2d.cl"}, - {"conv_2d_1x1", "conv_2d_1x1.cl"}, - {"conv_2d_3x3", "conv_2d_3x3.cl"}, - {"depthwise_conv_3x3", "depthwise_conv_3x3.cl"}, - {"pooling", "pooling.cl"}, - {"relu", "relu.cl"}, - {"concat", "concat.cl"}, - {"resize_bilinear", "resize_bilinear.cl"}, - {"space_to_batch", "space_to_batch.cl"}, -}; - -std::string -OpenCLRuntime::GenerateCLBinaryFilenamePrefix(const std::string &filename_msg) { +std::string OpenCLRuntime::GenerateCLBinaryFilenamePrefix( + const std::string &filename_msg) { std::string filename_prefix = filename_msg; for (auto it = filename_prefix.begin(); it != filename_prefix.end(); ++it) { if (*it == ' ' || *it == '-' || *it == '=') { @@ -262,7 +233,7 @@ void OpenCLRuntime::BuildProgram(const std::string &program_file_name, program_binary_sizes.get(), nullptr); MACE_CHECK(err == CL_SUCCESS) << "Error code: " << err; std::unique_ptr[]> program_binaries( - new std::unique_ptr[ device_list_size ]); + new std::unique_ptr[device_list_size]); for (cl_uint i = 0; i < device_list_size; ++i) { program_binaries[i] = std::unique_ptr( new unsigned char[program_binary_sizes[i]]); @@ -281,16 +252,11 @@ void OpenCLRuntime::BuildProgram(const std::string &program_file_name, } } -cl::Kernel -OpenCLRuntime::BuildKernel(const std::string &program_name, - const std::string &kernel_name, - const std::set &build_options) { - auto kernel_program_it = program_map_.find(program_name); - if (kernel_program_it == program_map_.end()) { - MACE_CHECK(false, program_name, " opencl kernel doesn't exist."); - } - - std::string program_file_name = kernel_program_it->second; +cl::Kernel OpenCLRuntime::BuildKernel( + const std::string &program_name, + const std::string &kernel_name, + const std::set &build_options) { + std::string program_file_name = program_name + ".cl"; std::string build_options_str; for (auto &option : build_options) { build_options_str += " " + option; @@ -312,15 +278,24 @@ OpenCLRuntime::BuildKernel(const std::string &program_name, return cl::Kernel(program, kernel_name.c_str()); } +void OpenCLRuntime::GetCallStats(const cl::Event &event, CallStats *stats) { + if (stats != nullptr) { + stats->start_micros = + event.getProfilingInfo() / 1000; + stats->end_micros = + event.getProfilingInfo() / 1000; + } +} + uint32_t OpenCLRuntime::GetDeviceMaxWorkGroupSize() { unsigned long long size = 0; - device_.getInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE, &size); + device_->getInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE, &size); return static_cast(size); } uint32_t OpenCLRuntime::GetKernelMaxWorkGroupSize(const cl::Kernel &kernel) { unsigned long long size = 0; - kernel.getWorkGroupInfo(device_, CL_KERNEL_WORK_GROUP_SIZE, &size); + kernel.getWorkGroupInfo(*device_, CL_KERNEL_WORK_GROUP_SIZE, &size); return static_cast(size); } diff --git a/mace/core/runtime/opencl/opencl_runtime.h b/mace/core/runtime/opencl/opencl_runtime.h index cd9852511f018b49e1fe930ee049cb61056c55b1..7245b926997459da7c52992524f635bc041d0c92 100644 --- a/mace/core/runtime/opencl/opencl_runtime.h +++ b/mace/core/runtime/opencl/opencl_runtime.h @@ -10,36 +10,42 @@ #include #include +#include "mace/core/future.h" #include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/runtime/opencl/opencl_wrapper.h" +#include "mace/utils/timer.h" namespace mace { -class OpenCLRuntime { - public: - static OpenCLRuntime *Get(); +class OpenCLProfilingTimer : public Timer { + public: + explicit OpenCLProfilingTimer(const cl::Event *event) : event_(event) {}; + void StartTiming() override; + void StopTiming() override; + double ElapsedMicros() override; - static void EnableProfiling(); - cl::Event *GetDefaultEvent(); - - cl_ulong GetEventProfilingStartInfo(); - cl_ulong GetEventProfilingEndInfo(); + private: + const cl::Event *event_; + double start_nanos_; + double stop_nanos_; +}; +class OpenCLRuntime { + public: + static OpenCLRuntime *Global(); cl::Context &context(); cl::Device &device(); cl::CommandQueue &command_queue(); - cl::Program &program(); + void GetCallStats(const cl::Event &event, CallStats *stats); uint32_t GetDeviceMaxWorkGroupSize(); uint32_t GetKernelMaxWorkGroupSize(const cl::Kernel& kernel); cl::Kernel BuildKernel(const std::string &program_name, const std::string &kernel_name, const std::set &build_options); private: - OpenCLRuntime(cl::Context context, - cl::Device device, - cl::CommandQueue command_queue); + OpenCLRuntime(); ~OpenCLRuntime(); OpenCLRuntime(const OpenCLRuntime&) = delete; OpenCLRuntime &operator=(const OpenCLRuntime&) = delete; @@ -51,19 +57,14 @@ class OpenCLRuntime { std::string GenerateCLBinaryFilenamePrefix(const std::string &filename_msg); private: - static bool enable_profiling_; - static std::unique_ptr profiling_ev_; - - cl::Context context_; - cl::Device device_; - cl::CommandQueue command_queue_; - cl::Program program_; + // All OpenCL object must be a pointer and manually deleted before unloading + // OpenCL library. + cl::Context *context_; + cl::Device *device_; + cl::CommandQueue *command_queue_; + std::map built_program_map_; std::mutex program_build_mutex_; std::string kernel_path_; - static const std::map program_map_; - mutable std::map built_program_map_; }; } // namespace mace diff --git a/mace/core/runtime/opencl/opencl_wrapper.cc b/mace/core/runtime/opencl/opencl_wrapper.cc index afd2b1737a103f20bc8a86b1ed2e091e4fb62c35..34d8da3156934b48d481fbe2b67a4cb8b4764fbc 100644 --- a/mace/core/runtime/opencl/opencl_wrapper.cc +++ b/mace/core/runtime/opencl/opencl_wrapper.cc @@ -4,11 +4,10 @@ #include "CL/opencl.h" -#include "mace/core/logging.h" +#include "mace/utils/logging.h" #include "mace/core/runtime/opencl/opencl_wrapper.h" #include -#include /** * Wrapper of OpenCL 2.0 (based on 1.2) @@ -18,10 +17,8 @@ namespace mace { namespace { class OpenCLLibraryImpl final { public: - static OpenCLLibraryImpl &Get(); bool Load(); void Unload(); - bool loaded() { return handle_ != nullptr; } using clGetPlatformIDsFunc = cl_int (*)(cl_uint, cl_platform_id *, cl_uint *); using clGetPlatformInfoFunc = @@ -113,11 +110,8 @@ class OpenCLLibraryImpl final { const cl_event *, cl_event *, cl_int *); - using clCreateCommandQueueWithPropertiesFunc = - cl_command_queue (*)(cl_context /* context */, - cl_device_id /* device */, - const cl_queue_properties * /* properties */, - cl_int * /* errcode_ret */); + using clCreateCommandQueueWithPropertiesFunc = cl_command_queue (*)( + cl_context, cl_device_id, const cl_queue_properties *, cl_int *); using clReleaseCommandQueueFunc = cl_int (*)(cl_command_queue); using clCreateProgramWithBinaryFunc = cl_program (*)(cl_context, cl_uint, @@ -161,82 +155,70 @@ class OpenCLLibraryImpl final { void *, size_t *); using clGetEventProfilingInfoFunc = cl_int (*)(cl_event event, - cl_profiling_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret); - using clGetImageInfoFunc = cl_int (*)(cl_mem, - cl_image_info, - size_t, - void *, - size_t *); - -#define DEFINE_FUNC_PTR(func) func##Func func = nullptr - - DEFINE_FUNC_PTR(clGetPlatformIDs); - DEFINE_FUNC_PTR(clGetPlatformInfo); - DEFINE_FUNC_PTR(clBuildProgram); - DEFINE_FUNC_PTR(clEnqueueNDRangeKernel); - DEFINE_FUNC_PTR(clSetKernelArg); - DEFINE_FUNC_PTR(clReleaseKernel); - DEFINE_FUNC_PTR(clCreateProgramWithSource); - DEFINE_FUNC_PTR(clCreateBuffer); - DEFINE_FUNC_PTR(clCreateImage); - DEFINE_FUNC_PTR(clRetainKernel); - DEFINE_FUNC_PTR(clCreateKernel); - DEFINE_FUNC_PTR(clGetProgramInfo); - DEFINE_FUNC_PTR(clFlush); - DEFINE_FUNC_PTR(clFinish); - DEFINE_FUNC_PTR(clReleaseProgram); - DEFINE_FUNC_PTR(clRetainContext); - DEFINE_FUNC_PTR(clGetContextInfo); - DEFINE_FUNC_PTR(clCreateProgramWithBinary); - DEFINE_FUNC_PTR(clCreateCommandQueueWithProperties); - DEFINE_FUNC_PTR(clReleaseCommandQueue); - DEFINE_FUNC_PTR(clEnqueueMapBuffer); - DEFINE_FUNC_PTR(clEnqueueMapImage); - DEFINE_FUNC_PTR(clRetainProgram); - DEFINE_FUNC_PTR(clGetProgramBuildInfo); - DEFINE_FUNC_PTR(clEnqueueReadBuffer); - DEFINE_FUNC_PTR(clEnqueueWriteBuffer); - DEFINE_FUNC_PTR(clWaitForEvents); - DEFINE_FUNC_PTR(clReleaseEvent); - DEFINE_FUNC_PTR(clCreateContext); - DEFINE_FUNC_PTR(clCreateContextFromType); - DEFINE_FUNC_PTR(clReleaseContext); - DEFINE_FUNC_PTR(clRetainCommandQueue); - DEFINE_FUNC_PTR(clEnqueueUnmapMemObject); - DEFINE_FUNC_PTR(clRetainMemObject); - DEFINE_FUNC_PTR(clReleaseMemObject); - DEFINE_FUNC_PTR(clGetDeviceInfo); - DEFINE_FUNC_PTR(clGetDeviceIDs); - DEFINE_FUNC_PTR(clRetainDevice); - DEFINE_FUNC_PTR(clReleaseDevice); - DEFINE_FUNC_PTR(clRetainEvent); - DEFINE_FUNC_PTR(clGetKernelWorkGroupInfo); - DEFINE_FUNC_PTR(clGetEventProfilingInfo); - DEFINE_FUNC_PTR(clGetImageInfo); - -#undef DEFINE_FUNC_PTR + cl_profiling_info param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret); + using clGetImageInfoFunc = + cl_int (*)(cl_mem, cl_image_info, size_t, void *, size_t *); + +#define MACE_CL_DEFINE_FUNC_PTR(func) func##Func func = nullptr + + MACE_CL_DEFINE_FUNC_PTR(clGetPlatformIDs); + MACE_CL_DEFINE_FUNC_PTR(clGetPlatformInfo); + MACE_CL_DEFINE_FUNC_PTR(clBuildProgram); + MACE_CL_DEFINE_FUNC_PTR(clEnqueueNDRangeKernel); + MACE_CL_DEFINE_FUNC_PTR(clSetKernelArg); + MACE_CL_DEFINE_FUNC_PTR(clReleaseKernel); + MACE_CL_DEFINE_FUNC_PTR(clCreateProgramWithSource); + MACE_CL_DEFINE_FUNC_PTR(clCreateBuffer); + MACE_CL_DEFINE_FUNC_PTR(clCreateImage); + MACE_CL_DEFINE_FUNC_PTR(clRetainKernel); + MACE_CL_DEFINE_FUNC_PTR(clCreateKernel); + MACE_CL_DEFINE_FUNC_PTR(clGetProgramInfo); + MACE_CL_DEFINE_FUNC_PTR(clFlush); + MACE_CL_DEFINE_FUNC_PTR(clFinish); + MACE_CL_DEFINE_FUNC_PTR(clReleaseProgram); + MACE_CL_DEFINE_FUNC_PTR(clRetainContext); + MACE_CL_DEFINE_FUNC_PTR(clGetContextInfo); + MACE_CL_DEFINE_FUNC_PTR(clCreateProgramWithBinary); + MACE_CL_DEFINE_FUNC_PTR(clCreateCommandQueueWithProperties); + MACE_CL_DEFINE_FUNC_PTR(clReleaseCommandQueue); + MACE_CL_DEFINE_FUNC_PTR(clEnqueueMapBuffer); + MACE_CL_DEFINE_FUNC_PTR(clEnqueueMapImage); + MACE_CL_DEFINE_FUNC_PTR(clRetainProgram); + MACE_CL_DEFINE_FUNC_PTR(clGetProgramBuildInfo); + MACE_CL_DEFINE_FUNC_PTR(clEnqueueReadBuffer); + MACE_CL_DEFINE_FUNC_PTR(clEnqueueWriteBuffer); + MACE_CL_DEFINE_FUNC_PTR(clWaitForEvents); + MACE_CL_DEFINE_FUNC_PTR(clReleaseEvent); + MACE_CL_DEFINE_FUNC_PTR(clCreateContext); + MACE_CL_DEFINE_FUNC_PTR(clCreateContextFromType); + MACE_CL_DEFINE_FUNC_PTR(clReleaseContext); + MACE_CL_DEFINE_FUNC_PTR(clRetainCommandQueue); + MACE_CL_DEFINE_FUNC_PTR(clEnqueueUnmapMemObject); + MACE_CL_DEFINE_FUNC_PTR(clRetainMemObject); + MACE_CL_DEFINE_FUNC_PTR(clReleaseMemObject); + MACE_CL_DEFINE_FUNC_PTR(clGetDeviceInfo); + MACE_CL_DEFINE_FUNC_PTR(clGetDeviceIDs); + MACE_CL_DEFINE_FUNC_PTR(clRetainDevice); + MACE_CL_DEFINE_FUNC_PTR(clReleaseDevice); + MACE_CL_DEFINE_FUNC_PTR(clRetainEvent); + MACE_CL_DEFINE_FUNC_PTR(clGetKernelWorkGroupInfo); + MACE_CL_DEFINE_FUNC_PTR(clGetEventProfilingInfo); + MACE_CL_DEFINE_FUNC_PTR(clGetImageInfo); + +#undef MACE_CL_DEFINE_FUNC_PTR private: void *LoadFromPath(const std::string &path); void *handle_ = nullptr; }; -OpenCLLibraryImpl &OpenCLLibraryImpl::Get() { - static std::once_flag load_once; - static OpenCLLibraryImpl instance; - std::call_once(load_once, []() { instance.Load(); }); - return instance; -} - bool OpenCLLibraryImpl::Load() { - if (loaded()) return true; + if (handle_ != nullptr) { return true; } - // TODO(heliangliang) Make this configurable - // TODO(heliangliang) Benchmark 64 bit overhead - static const std::vector paths = { + const std::vector paths = { "libOpenCL.so", #if defined(__aarch64__) // Qualcomm Adreno @@ -260,12 +242,16 @@ bool OpenCLLibraryImpl::Load() { void *handle = LoadFromPath(path); if (handle != nullptr) { handle_ = handle; - return true; + break; } } - LOG(ERROR) << "Failed to load OpenCL library"; - return false; + if (handle_ == nullptr) { + LOG(ERROR) << "Failed to load OpenCL library"; + return false; + } + + return true; } void OpenCLLibraryImpl::Unload() { @@ -286,7 +272,7 @@ void *OpenCLLibraryImpl::LoadFromPath(const std::string &path) { return nullptr; } -#define ASSIGN_FROM_DLSYM(func) \ +#define MACE_CL_ASSIGN_FROM_DLSYM(func) \ do { \ void *ptr = dlsym(handle, #func); \ if (ptr == nullptr) { \ @@ -298,86 +284,91 @@ void *OpenCLLibraryImpl::LoadFromPath(const std::string &path) { VLOG(2) << "Loaded " << #func << " from " << path; \ } while (false) - ASSIGN_FROM_DLSYM(clGetPlatformIDs); - ASSIGN_FROM_DLSYM(clGetPlatformInfo); - ASSIGN_FROM_DLSYM(clBuildProgram); - ASSIGN_FROM_DLSYM(clEnqueueNDRangeKernel); - ASSIGN_FROM_DLSYM(clSetKernelArg); - ASSIGN_FROM_DLSYM(clReleaseKernel); - ASSIGN_FROM_DLSYM(clCreateProgramWithSource); - ASSIGN_FROM_DLSYM(clCreateBuffer); - ASSIGN_FROM_DLSYM(clCreateImage); - ASSIGN_FROM_DLSYM(clRetainKernel); - ASSIGN_FROM_DLSYM(clCreateKernel); - ASSIGN_FROM_DLSYM(clGetProgramInfo); - ASSIGN_FROM_DLSYM(clFlush); - ASSIGN_FROM_DLSYM(clFinish); - ASSIGN_FROM_DLSYM(clReleaseProgram); - ASSIGN_FROM_DLSYM(clRetainContext); - ASSIGN_FROM_DLSYM(clGetContextInfo); - ASSIGN_FROM_DLSYM(clCreateProgramWithBinary); - ASSIGN_FROM_DLSYM(clCreateCommandQueueWithProperties); - ASSIGN_FROM_DLSYM(clReleaseCommandQueue); - ASSIGN_FROM_DLSYM(clEnqueueMapBuffer); - ASSIGN_FROM_DLSYM(clEnqueueMapImage); - ASSIGN_FROM_DLSYM(clRetainProgram); - ASSIGN_FROM_DLSYM(clGetProgramBuildInfo); - ASSIGN_FROM_DLSYM(clEnqueueReadBuffer); - ASSIGN_FROM_DLSYM(clEnqueueWriteBuffer); - ASSIGN_FROM_DLSYM(clWaitForEvents); - ASSIGN_FROM_DLSYM(clReleaseEvent); - ASSIGN_FROM_DLSYM(clCreateContext); - ASSIGN_FROM_DLSYM(clCreateContextFromType); - ASSIGN_FROM_DLSYM(clReleaseContext); - ASSIGN_FROM_DLSYM(clRetainCommandQueue); - ASSIGN_FROM_DLSYM(clEnqueueUnmapMemObject); - ASSIGN_FROM_DLSYM(clRetainMemObject); - ASSIGN_FROM_DLSYM(clReleaseMemObject); - ASSIGN_FROM_DLSYM(clGetDeviceInfo); - ASSIGN_FROM_DLSYM(clGetDeviceIDs); - ASSIGN_FROM_DLSYM(clRetainDevice); - ASSIGN_FROM_DLSYM(clReleaseDevice); - ASSIGN_FROM_DLSYM(clRetainEvent); - ASSIGN_FROM_DLSYM(clGetKernelWorkGroupInfo); - ASSIGN_FROM_DLSYM(clGetEventProfilingInfo); - ASSIGN_FROM_DLSYM(clGetImageInfo); - -#undef ASSIGN_FROM_DLSYM + MACE_CL_ASSIGN_FROM_DLSYM(clGetPlatformIDs); + MACE_CL_ASSIGN_FROM_DLSYM(clGetPlatformInfo); + MACE_CL_ASSIGN_FROM_DLSYM(clBuildProgram); + MACE_CL_ASSIGN_FROM_DLSYM(clEnqueueNDRangeKernel); + MACE_CL_ASSIGN_FROM_DLSYM(clSetKernelArg); + MACE_CL_ASSIGN_FROM_DLSYM(clReleaseKernel); + MACE_CL_ASSIGN_FROM_DLSYM(clCreateProgramWithSource); + MACE_CL_ASSIGN_FROM_DLSYM(clCreateBuffer); + MACE_CL_ASSIGN_FROM_DLSYM(clCreateImage); + MACE_CL_ASSIGN_FROM_DLSYM(clRetainKernel); + MACE_CL_ASSIGN_FROM_DLSYM(clCreateKernel); + MACE_CL_ASSIGN_FROM_DLSYM(clGetProgramInfo); + MACE_CL_ASSIGN_FROM_DLSYM(clFlush); + MACE_CL_ASSIGN_FROM_DLSYM(clFinish); + MACE_CL_ASSIGN_FROM_DLSYM(clReleaseProgram); + MACE_CL_ASSIGN_FROM_DLSYM(clRetainContext); + MACE_CL_ASSIGN_FROM_DLSYM(clGetContextInfo); + MACE_CL_ASSIGN_FROM_DLSYM(clCreateProgramWithBinary); + MACE_CL_ASSIGN_FROM_DLSYM(clCreateCommandQueueWithProperties); + MACE_CL_ASSIGN_FROM_DLSYM(clReleaseCommandQueue); + MACE_CL_ASSIGN_FROM_DLSYM(clEnqueueMapBuffer); + MACE_CL_ASSIGN_FROM_DLSYM(clEnqueueMapImage); + MACE_CL_ASSIGN_FROM_DLSYM(clRetainProgram); + MACE_CL_ASSIGN_FROM_DLSYM(clGetProgramBuildInfo); + MACE_CL_ASSIGN_FROM_DLSYM(clEnqueueReadBuffer); + MACE_CL_ASSIGN_FROM_DLSYM(clEnqueueWriteBuffer); + MACE_CL_ASSIGN_FROM_DLSYM(clWaitForEvents); + MACE_CL_ASSIGN_FROM_DLSYM(clReleaseEvent); + MACE_CL_ASSIGN_FROM_DLSYM(clCreateContext); + MACE_CL_ASSIGN_FROM_DLSYM(clCreateContextFromType); + MACE_CL_ASSIGN_FROM_DLSYM(clReleaseContext); + MACE_CL_ASSIGN_FROM_DLSYM(clRetainCommandQueue); + MACE_CL_ASSIGN_FROM_DLSYM(clEnqueueUnmapMemObject); + MACE_CL_ASSIGN_FROM_DLSYM(clRetainMemObject); + MACE_CL_ASSIGN_FROM_DLSYM(clReleaseMemObject); + MACE_CL_ASSIGN_FROM_DLSYM(clGetDeviceInfo); + MACE_CL_ASSIGN_FROM_DLSYM(clGetDeviceIDs); + MACE_CL_ASSIGN_FROM_DLSYM(clRetainDevice); + MACE_CL_ASSIGN_FROM_DLSYM(clReleaseDevice); + MACE_CL_ASSIGN_FROM_DLSYM(clRetainEvent); + MACE_CL_ASSIGN_FROM_DLSYM(clGetKernelWorkGroupInfo); + MACE_CL_ASSIGN_FROM_DLSYM(clGetEventProfilingInfo); + MACE_CL_ASSIGN_FROM_DLSYM(clGetImageInfo); + +#undef MACE_CL_ASSIGN_FROM_DLSYM return handle; } -} // namespace -bool OpenCLLibrary::Supported() { return OpenCLLibraryImpl::Get().loaded(); } +OpenCLLibraryImpl *openclLibraryImpl = nullptr; +} // namespace -void OpenCLLibrary::Load() { OpenCLLibraryImpl::Get().Load(); } +void LoadOpenCLLibrary() { + if (openclLibraryImpl == nullptr) { + openclLibraryImpl = new OpenCLLibraryImpl(); + MACE_CHECK(openclLibraryImpl->Load()); + } +} -void OpenCLLibrary::Unload() { OpenCLLibraryImpl::Get().Unload(); } +void UnloadOpenCLLibrary() { + openclLibraryImpl->Unload(); + delete openclLibraryImpl; + openclLibraryImpl = nullptr; +} } // namespace mace cl_int clGetPlatformIDs(cl_uint num_entries, cl_platform_id *platforms, cl_uint *num_platforms) { - auto func = mace::OpenCLLibraryImpl::Get().clGetPlatformIDs; - if (func != nullptr) { - return func(num_entries, platforms, num_platforms); - } else { - return CL_OUT_OF_RESOURCES; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clGetPlatformIDs; + MACE_CHECK_NOTNULL(func); + return func(num_entries, platforms, num_platforms); } cl_int clGetPlatformInfo(cl_platform_id platform, cl_platform_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { - auto func = mace::OpenCLLibraryImpl::Get().clGetPlatformInfo; - if (func != nullptr) { - return func(platform, param_name, param_value_size, param_value, - param_value_size_ret); - } else { - return CL_OUT_OF_RESOURCES; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clGetPlatformInfo; + MACE_CHECK_NOTNULL(func); + return func(platform, param_name, param_value_size, param_value, + param_value_size_ret); } cl_int clBuildProgram(cl_program program, @@ -387,13 +378,11 @@ cl_int clBuildProgram(cl_program program, void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), void *user_data) { - auto func = mace::OpenCLLibraryImpl::Get().clBuildProgram; - if (func != nullptr) { - return func(program, num_devices, device_list, options, pfn_notify, - user_data); - } else { - return CL_OUT_OF_RESOURCES; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clBuildProgram; + MACE_CHECK_NOTNULL(func); + return func(program, num_devices, device_list, options, pfn_notify, + user_data); } cl_int clEnqueueNDRangeKernel(cl_command_queue command_queue, @@ -405,44 +394,36 @@ cl_int clEnqueueNDRangeKernel(cl_command_queue command_queue, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) { - auto func = mace::OpenCLLibraryImpl::Get().clEnqueueNDRangeKernel; - if (func != nullptr) { - return func(command_queue, kernel, work_dim, global_work_offset, - global_work_size, local_work_size, num_events_in_wait_list, - event_wait_list, event); - } else { - return CL_OUT_OF_RESOURCES; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clEnqueueNDRangeKernel; + MACE_CHECK_NOTNULL(func); + return func(command_queue, kernel, work_dim, global_work_offset, + global_work_size, local_work_size, num_events_in_wait_list, + event_wait_list, event); } cl_int clSetKernelArg(cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void *arg_value) { - auto func = mace::OpenCLLibraryImpl::Get().clSetKernelArg; - if (func != nullptr) { - return func(kernel, arg_index, arg_size, arg_value); - } else { - return CL_OUT_OF_RESOURCES; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clSetKernelArg; + MACE_CHECK_NOTNULL(func); + return func(kernel, arg_index, arg_size, arg_value); } cl_int clRetainMemObject(cl_mem memobj) { - auto func = mace::OpenCLLibraryImpl::Get().clRetainMemObject; - if (func != nullptr) { - return func(memobj); - } else { - return CL_OUT_OF_RESOURCES; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clRetainMemObject; + MACE_CHECK_NOTNULL(func); + return func(memobj); } cl_int clReleaseMemObject(cl_mem memobj) { - auto func = mace::OpenCLLibraryImpl::Get().clReleaseMemObject; - if (func != nullptr) { - return func(memobj); - } else { - return CL_OUT_OF_RESOURCES; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clReleaseMemObject; + MACE_CHECK_NOTNULL(func); + return func(memobj); } cl_int clEnqueueUnmapMemObject(cl_command_queue command_queue, @@ -451,23 +432,20 @@ cl_int clEnqueueUnmapMemObject(cl_command_queue command_queue, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) { - auto func = mace::OpenCLLibraryImpl::Get().clEnqueueUnmapMemObject; - if (func != nullptr) { - return func(command_queue, memobj, mapped_ptr, num_events_in_wait_list, - event_wait_list, event); - } else { - return CL_OUT_OF_RESOURCES; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clEnqueueUnmapMemObject; + MACE_CHECK_NOTNULL(func); + return func(command_queue, memobj, mapped_ptr, num_events_in_wait_list, + event_wait_list, event); } cl_int clRetainCommandQueue(cl_command_queue command_queue) { - auto func = mace::OpenCLLibraryImpl::Get().clRetainCommandQueue; - if (func != nullptr) { - return func(command_queue); - } else { - return CL_OUT_OF_RESOURCES; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clRetainCommandQueue; + MACE_CHECK_NOTNULL(func); + return func(command_queue); } + cl_context clCreateContext( const cl_context_properties *properties, cl_uint num_devices, @@ -475,53 +453,44 @@ cl_context clCreateContext( void(CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *), void *user_data, cl_int *errcode_ret) { - auto func = mace::OpenCLLibraryImpl::Get().clCreateContext; - if (func != nullptr) { - return func(properties, num_devices, devices, pfn_notify, user_data, - errcode_ret); - } else { - return nullptr; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clCreateContext; + MACE_CHECK_NOTNULL(func); + return func(properties, num_devices, devices, pfn_notify, user_data, + errcode_ret); } + cl_context clCreateContextFromType( const cl_context_properties *properties, cl_device_type device_type, void(CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *), void *user_data, cl_int *errcode_ret) { - auto func = mace::OpenCLLibraryImpl::Get().clCreateContextFromType; - if (func != nullptr) { - return func(properties, device_type, pfn_notify, user_data, errcode_ret); - } else { - return nullptr; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clCreateContextFromType; + MACE_CHECK_NOTNULL(func); + return func(properties, device_type, pfn_notify, user_data, errcode_ret); } cl_int clReleaseContext(cl_context context) { - auto func = mace::OpenCLLibraryImpl::Get().clReleaseContext; - if (func != nullptr) { - return func(context); - } else { - return CL_OUT_OF_RESOURCES; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clReleaseContext; + MACE_CHECK_NOTNULL(func); + return func(context); } cl_int clWaitForEvents(cl_uint num_events, const cl_event *event_list) { - auto func = mace::OpenCLLibraryImpl::Get().clWaitForEvents; - if (func != nullptr) { - return func(num_events, event_list); - } else { - return CL_OUT_OF_RESOURCES; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clWaitForEvents; + MACE_CHECK_NOTNULL(func); + return func(num_events, event_list); } cl_int clReleaseEvent(cl_event event) { - auto func = mace::OpenCLLibraryImpl::Get().clReleaseEvent; - if (func != nullptr) { - return func(event); - } else { - return CL_OUT_OF_RESOURCES; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clReleaseEvent; + MACE_CHECK_NOTNULL(func); + return func(event); } cl_int clEnqueueWriteBuffer(cl_command_queue command_queue, @@ -533,13 +502,11 @@ cl_int clEnqueueWriteBuffer(cl_command_queue command_queue, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) { - auto func = mace::OpenCLLibraryImpl::Get().clEnqueueWriteBuffer; - if (func != nullptr) { - return func(command_queue, buffer, blocking_write, offset, size, ptr, - num_events_in_wait_list, event_wait_list, event); - } else { - return CL_OUT_OF_RESOURCES; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clEnqueueWriteBuffer; + MACE_CHECK_NOTNULL(func); + return func(command_queue, buffer, blocking_write, offset, size, ptr, + num_events_in_wait_list, event_wait_list, event); } cl_int clEnqueueReadBuffer(cl_command_queue command_queue, @@ -551,13 +518,11 @@ cl_int clEnqueueReadBuffer(cl_command_queue command_queue, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) { - auto func = mace::OpenCLLibraryImpl::Get().clEnqueueReadBuffer; - if (func != nullptr) { - return func(command_queue, buffer, blocking_read, offset, size, ptr, - num_events_in_wait_list, event_wait_list, event); - } else { - return CL_OUT_OF_RESOURCES; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clEnqueueReadBuffer; + MACE_CHECK_NOTNULL(func); + return func(command_queue, buffer, blocking_read, offset, size, ptr, + num_events_in_wait_list, event_wait_list, event); } cl_int clGetProgramBuildInfo(cl_program program, @@ -566,22 +531,18 @@ cl_int clGetProgramBuildInfo(cl_program program, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { - auto func = mace::OpenCLLibraryImpl::Get().clGetProgramBuildInfo; - if (func != nullptr) { - return func(program, device, param_name, param_value_size, param_value, - param_value_size_ret); - } else { - return CL_OUT_OF_RESOURCES; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clGetProgramBuildInfo; + MACE_CHECK_NOTNULL(func); + return func(program, device, param_name, param_value_size, param_value, + param_value_size_ret); } cl_int clRetainProgram(cl_program program) { - auto func = mace::OpenCLLibraryImpl::Get().clRetainProgram; - if (func != nullptr) { - return func(program); - } else { - return CL_OUT_OF_RESOURCES; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clRetainProgram; + MACE_CHECK_NOTNULL(func); + return func(program); } void *clEnqueueMapBuffer(cl_command_queue command_queue, @@ -594,16 +555,11 @@ void *clEnqueueMapBuffer(cl_command_queue command_queue, const cl_event *event_wait_list, cl_event *event, cl_int *errcode_ret) { - auto func = mace::OpenCLLibraryImpl::Get().clEnqueueMapBuffer; - if (func != nullptr) { - return func(command_queue, buffer, blocking_map, map_flags, offset, size, - num_events_in_wait_list, event_wait_list, event, errcode_ret); - } else { - if (errcode_ret != nullptr) { - *errcode_ret = CL_OUT_OF_RESOURCES; - } - return nullptr; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clEnqueueMapBuffer; + MACE_CHECK_NOTNULL(func); + return func(command_queue, buffer, blocking_map, map_flags, offset, size, + num_events_in_wait_list, event_wait_list, event, errcode_ret); } void *clEnqueueMapImage(cl_command_queue command_queue, @@ -618,38 +574,30 @@ void *clEnqueueMapImage(cl_command_queue command_queue, const cl_event *event_wait_list, cl_event *event, cl_int *errcode_ret) { - auto func = mace::OpenCLLibraryImpl::Get().clEnqueueMapImage; - if (func != nullptr) { - return func(command_queue, image, blocking_map, map_flags, origin, region, - image_row_pitch, image_slice_pitch, - num_events_in_wait_list, event_wait_list, event, errcode_ret); - } else { - if (errcode_ret != nullptr) { - *errcode_ret = CL_OUT_OF_RESOURCES; - } - return nullptr; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clEnqueueMapImage; + MACE_CHECK_NOTNULL(func); + return func(command_queue, image, blocking_map, map_flags, origin, region, + image_row_pitch, image_slice_pitch, num_events_in_wait_list, + event_wait_list, event, errcode_ret); } + cl_command_queue clCreateCommandQueueWithProperties( cl_context context, cl_device_id device, const cl_queue_properties *properties, cl_int *errcode_ret) { - auto func = mace::OpenCLLibraryImpl::Get().clCreateCommandQueueWithProperties; - if (func != nullptr) { - return func(context, device, properties, errcode_ret); - } else { - return nullptr; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clCreateCommandQueueWithProperties; + MACE_CHECK_NOTNULL(func); + return func(context, device, properties, errcode_ret); } cl_int clReleaseCommandQueue(cl_command_queue command_queue) { - auto func = mace::OpenCLLibraryImpl::Get().clReleaseCommandQueue; - if (func != nullptr) { - return func(command_queue); - } else { - return CL_OUT_OF_RESOURCES; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clReleaseCommandQueue; + MACE_CHECK_NOTNULL(func); + return func(command_queue); } cl_program clCreateProgramWithBinary(cl_context context, @@ -659,25 +607,18 @@ cl_program clCreateProgramWithBinary(cl_context context, const unsigned char **binaries, cl_int *binary_status, cl_int *errcode_ret) { - auto func = mace::OpenCLLibraryImpl::Get().clCreateProgramWithBinary; - if (func != nullptr) { - return func(context, num_devices, device_list, lengths, binaries, - binary_status, errcode_ret); - } else { - if (errcode_ret != nullptr) { - *errcode_ret = CL_OUT_OF_RESOURCES; - } - return nullptr; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clCreateProgramWithBinary; + MACE_CHECK_NOTNULL(func); + return func(context, num_devices, device_list, lengths, binaries, + binary_status, errcode_ret); } cl_int clRetainContext(cl_context context) { - auto func = mace::OpenCLLibraryImpl::Get().clRetainContext; - if (func != nullptr) { - return func(context); - } else { - return CL_OUT_OF_RESOURCES; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clRetainContext; + MACE_CHECK_NOTNULL(func); + return func(context); } cl_int clGetContextInfo(cl_context context, @@ -685,40 +626,32 @@ cl_int clGetContextInfo(cl_context context, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { - auto func = mace::OpenCLLibraryImpl::Get().clGetContextInfo; - if (func != nullptr) { - return func(context, param_name, param_value_size, param_value, - param_value_size_ret); - } else { - return CL_OUT_OF_RESOURCES; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clGetContextInfo; + MACE_CHECK_NOTNULL(func); + return func(context, param_name, param_value_size, param_value, + param_value_size_ret); } cl_int clReleaseProgram(cl_program program) { - auto func = mace::OpenCLLibraryImpl::Get().clReleaseProgram; - if (func != nullptr) { - return func(program); - } else { - return CL_OUT_OF_RESOURCES; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clReleaseProgram; + MACE_CHECK_NOTNULL(func); + return func(program); } cl_int clFlush(cl_command_queue command_queue) { - auto func = mace::OpenCLLibraryImpl::Get().clFlush; - if (func != nullptr) { - return func(command_queue); - } else { - return CL_OUT_OF_RESOURCES; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clFlush; + MACE_CHECK_NOTNULL(func); + return func(command_queue); } cl_int clFinish(cl_command_queue command_queue) { - auto func = mace::OpenCLLibraryImpl::Get().clFinish; - if (func != nullptr) { - return func(command_queue); - } else { - return CL_OUT_OF_RESOURCES; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clFinish; + MACE_CHECK_NOTNULL(func); + return func(command_queue); } cl_int clGetProgramInfo(cl_program program, @@ -726,36 +659,27 @@ cl_int clGetProgramInfo(cl_program program, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { - auto func = mace::OpenCLLibraryImpl::Get().clGetProgramInfo; - if (func != nullptr) { - return func(program, param_name, param_value_size, param_value, - param_value_size_ret); - } else { - return CL_OUT_OF_RESOURCES; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clGetProgramInfo; + MACE_CHECK_NOTNULL(func); + return func(program, param_name, param_value_size, param_value, + param_value_size_ret); } cl_kernel clCreateKernel(cl_program program, const char *kernel_name, cl_int *errcode_ret) { - auto func = mace::OpenCLLibraryImpl::Get().clCreateKernel; - if (func != nullptr) { - return func(program, kernel_name, errcode_ret); - } else { - if (errcode_ret != nullptr) { - *errcode_ret = CL_OUT_OF_RESOURCES; - } - return nullptr; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clCreateKernel; + MACE_CHECK_NOTNULL(func); + return func(program, kernel_name, errcode_ret); } cl_int clRetainKernel(cl_kernel kernel) { - auto func = mace::OpenCLLibraryImpl::Get().clRetainKernel; - if (func != nullptr) { - return func(kernel); - } else { - return CL_OUT_OF_RESOURCES; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clRetainKernel; + MACE_CHECK_NOTNULL(func); + return func(kernel); } cl_mem clCreateBuffer(cl_context context, @@ -763,15 +687,10 @@ cl_mem clCreateBuffer(cl_context context, size_t size, void *host_ptr, cl_int *errcode_ret) { - auto func = mace::OpenCLLibraryImpl::Get().clCreateBuffer; - if (func != nullptr) { - return func(context, flags, size, host_ptr, errcode_ret); - } else { - if (errcode_ret != nullptr) { - *errcode_ret = CL_OUT_OF_RESOURCES; - } - return nullptr; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clCreateBuffer; + MACE_CHECK_NOTNULL(func); + return func(context, flags, size, host_ptr, errcode_ret); } cl_mem clCreateImage(cl_context context, @@ -780,16 +699,10 @@ cl_mem clCreateImage(cl_context context, const cl_image_desc *image_desc, void *host_ptr, cl_int *errcode_ret) { - auto func = mace::OpenCLLibraryImpl::Get().clCreateImage; - if (func != nullptr) { - return func(context, flags, image_format, image_desc, host_ptr, - errcode_ret); - } else { - if (errcode_ret != nullptr) { - *errcode_ret = CL_OUT_OF_RESOURCES; - } - return nullptr; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clCreateImage; + MACE_CHECK_NOTNULL(func); + return func(context, flags, image_format, image_desc, host_ptr, errcode_ret); } cl_program clCreateProgramWithSource(cl_context context, @@ -797,24 +710,17 @@ cl_program clCreateProgramWithSource(cl_context context, const char **strings, const size_t *lengths, cl_int *errcode_ret) { - auto func = mace::OpenCLLibraryImpl::Get().clCreateProgramWithSource; - if (func != nullptr) { - return func(context, count, strings, lengths, errcode_ret); - } else { - if (errcode_ret != nullptr) { - *errcode_ret = CL_OUT_OF_RESOURCES; - } - return nullptr; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clCreateProgramWithSource; + MACE_CHECK_NOTNULL(func); + return func(context, count, strings, lengths, errcode_ret); } cl_int clReleaseKernel(cl_kernel kernel) { - auto func = mace::OpenCLLibraryImpl::Get().clReleaseKernel; - if (func != nullptr) { - return func(kernel); - } else { - return CL_OUT_OF_RESOURCES; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clReleaseKernel; + MACE_CHECK_NOTNULL(func); + return func(kernel); } cl_int clGetDeviceIDs(cl_platform_id platform, @@ -822,12 +728,10 @@ cl_int clGetDeviceIDs(cl_platform_id platform, cl_uint num_entries, cl_device_id *devices, cl_uint *num_devices) { - auto func = mace::OpenCLLibraryImpl::Get().clGetDeviceIDs; - if (func != nullptr) { - return func(platform, device_type, num_entries, devices, num_devices); - } else { - return CL_OUT_OF_RESOURCES; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clGetDeviceIDs; + MACE_CHECK_NOTNULL(func); + return func(platform, device_type, num_entries, devices, num_devices); } cl_int clGetDeviceInfo(cl_device_id device, @@ -835,40 +739,32 @@ cl_int clGetDeviceInfo(cl_device_id device, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { - auto func = mace::OpenCLLibraryImpl::Get().clGetDeviceInfo; - if (func != nullptr) { - return func(device, param_name, param_value_size, param_value, - param_value_size_ret); - } else { - return CL_OUT_OF_RESOURCES; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clGetDeviceInfo; + MACE_CHECK_NOTNULL(func); + return func(device, param_name, param_value_size, param_value, + param_value_size_ret); } cl_int clRetainDevice(cl_device_id device) { - auto func = mace::OpenCLLibraryImpl::Get().clRetainDevice; - if (func != nullptr) { - return func(device); - } else { - return CL_OUT_OF_RESOURCES; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clRetainDevice; + MACE_CHECK_NOTNULL(func); + return func(device); } cl_int clReleaseDevice(cl_device_id device) { - auto func = mace::OpenCLLibraryImpl::Get().clReleaseDevice; - if (func != nullptr) { - return func(device); - } else { - return CL_OUT_OF_RESOURCES; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clReleaseDevice; + MACE_CHECK_NOTNULL(func); + return func(device); } cl_int clRetainEvent(cl_event event) { - auto func = mace::OpenCLLibraryImpl::Get().clRetainEvent; - if (func != nullptr) { - return func(event); - } else { - return CL_OUT_OF_RESOURCES; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clRetainEvent; + MACE_CHECK_NOTNULL(func); + return func(event); } cl_int clGetKernelWorkGroupInfo(cl_kernel kernel, @@ -877,13 +773,11 @@ cl_int clGetKernelWorkGroupInfo(cl_kernel kernel, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { - auto func = mace::OpenCLLibraryImpl::Get().clGetKernelWorkGroupInfo; - if (func != nullptr) { - return func(kernel, device, param_name, param_value_size, param_value, - param_value_size_ret); - } else { - return CL_OUT_OF_RESOURCES; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clGetKernelWorkGroupInfo; + MACE_CHECK_NOTNULL(func); + return func(kernel, device, param_name, param_value_size, param_value, + param_value_size_ret); } cl_int clGetEventProfilingInfo(cl_event event, @@ -891,13 +785,11 @@ cl_int clGetEventProfilingInfo(cl_event event, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { - auto func = mace::OpenCLLibraryImpl::Get().clGetEventProfilingInfo; - if (func != nullptr) { - return func(event, param_name, param_value_size, param_value, - param_value_size_ret); - } else { - return CL_OUT_OF_RESOURCES; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clGetEventProfilingInfo; + MACE_CHECK_NOTNULL(func); + return func(event, param_name, param_value_size, param_value, + param_value_size_ret); } cl_int clGetImageInfo(cl_mem image, @@ -905,12 +797,9 @@ cl_int clGetImageInfo(cl_mem image, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { - auto func = mace::OpenCLLibraryImpl::Get().clGetImageInfo; - if (func != nullptr) { - return func(image, param_name, param_value_size, param_value, - param_value_size_ret); - } else { - return CL_OUT_OF_RESOURCES; - } + MACE_CHECK_NOTNULL(mace::openclLibraryImpl); + auto func = mace::openclLibraryImpl->clGetImageInfo; + MACE_CHECK_NOTNULL(func); + return func(image, param_name, param_value_size, param_value, + param_value_size_ret); } - diff --git a/mace/core/runtime/opencl/opencl_wrapper.h b/mace/core/runtime/opencl/opencl_wrapper.h index 71ecd4ab86e4c2aa3fc709774af0afb826003bfb..c0e881864789658c457e2791833e34559f19481d 100644 --- a/mace/core/runtime/opencl/opencl_wrapper.h +++ b/mace/core/runtime/opencl/opencl_wrapper.h @@ -7,13 +7,10 @@ namespace mace { -class OpenCLLibrary { - public: - static bool Supported(); - static void Load(); - static void Unload(); -}; - + // These functions are not thread-safe. + void LoadOpenCLLibrary(); + void UnloadOpenCLLibrary(); + } // namespace mace #endif // MACE_CORE_RUNTIME_OPENCL_OPENCL_WRAPPER_H_ diff --git a/mace/core/tensor.h b/mace/core/tensor.h index d2d634e66e11e498d6f7c549f8dafb651de81ba0..2d97d833b7f62a57cfcc6d5a14073f65a8e15e2d 100644 --- a/mace/core/tensor.h +++ b/mace/core/tensor.h @@ -7,7 +7,7 @@ #include "mace/core/allocator.h" #include "mace/core/common.h" -#include "mace/core/logging.h" +#include "mace/utils/logging.h" #include "mace/core/types.h" #include "mace/core/mace.h" diff --git a/mace/core/testing/test_benchmark.cc b/mace/core/testing/test_benchmark.cc index cf32aa6e90ba042bff6002f1fa3e7f18477e19d6..513ec349b831f7bf7feea1d66cea4c421d9cfd9d 100644 --- a/mace/core/testing/test_benchmark.cc +++ b/mace/core/testing/test_benchmark.cc @@ -9,8 +9,8 @@ #include #include -#include "mace/core/logging.h" -#include "mace/core/testing/env_time.h" +#include "mace/utils/env_time.h" +#include "mace/utils/logging.h" #include "mace/core/testing/test_benchmark.h" namespace mace { @@ -82,7 +82,7 @@ void Benchmark::Run(const char *pattern) { } printf("%-*s %10s %10s\n", width, "Benchmark", "Time(ns)", "Iterations"); - printf("%s\n", string(width + 22, '-').c_str()); + printf("%s\n", std::string(width + 22, '-').c_str()); for (auto b : *all_benchmarks) { if (!std::regex_match(b->name_, match, regex)) continue; for (auto arg : b->args_) { @@ -128,7 +128,7 @@ void Benchmark::Run(int arg1, int arg2, int *run_count, double *run_seconds) { int64_t iters = kMinIters; while (true) { accum_time = 0; - start_time = NowMicros(); + start_time = utils::NowMicros(); bytes_processed = -1; items_processed = -1; label.clear(); @@ -160,11 +160,11 @@ void Benchmark::Run(int arg1, int arg2, int *run_count, double *run_seconds) { void BytesProcessed(int64_t n) { bytes_processed = n; } void ItemsProcessed(int64_t n) { items_processed = n; } void StartTiming() { - if (start_time == 0) start_time = NowMicros(); + if (start_time == 0) start_time = utils::NowMicros(); } void StopTiming() { if (start_time != 0) { - accum_time += (NowMicros() - start_time); + accum_time += (utils::NowMicros() - start_time); start_time = 0; } } diff --git a/mace/core/testing/test_benchmark.h b/mace/core/testing/test_benchmark.h index 01236b15a45d064fdfa5760aa921fef227993470..6d40ff75ae79f61f63e506ac51a23e5392eb9ab8 100644 --- a/mace/core/testing/test_benchmark.h +++ b/mace/core/testing/test_benchmark.h @@ -3,13 +3,12 @@ // // Simple benchmarking facility. -#ifndef MACE_TEST_BENCHMARK_H_ -#define MACE_TEST_BENCHMARK_H_ +#ifndef MACE_CORE_TESTING_TEST_BENCHMARK_H_ +#define MACE_CORE_TESTING_TEST_BENCHMARK_H_ #include #include - -#include "mace/core/types.h" +#include #define MACE_BENCHMARK_CONCAT(a, b, c) a##b##c #define BENCHMARK(n) \ @@ -31,7 +30,7 @@ class Benchmark { static void Run(const char *pattern); private: - string name_; + std::string name_; int num_args_; std::vector> args_; void (*fn0_)(int) = nullptr; @@ -51,4 +50,4 @@ void StopTiming(); } // namespace testing } // namespace mace -#endif // MACE_TEST_BENCHMARK_H_ +#endif // MACE_CORE_TESTING_TEST_BENCHMARK_H_ diff --git a/mace/dsp/hexagon_control_wrapper.cc b/mace/dsp/hexagon_control_wrapper.cc index 3f25a5d78d208d3d10abbd18ae6234ae2033d2ed..c89d2726e17048f479d637e97f0c74e820e26bd5 100644 --- a/mace/dsp/hexagon_control_wrapper.cc +++ b/mace/dsp/hexagon_control_wrapper.cc @@ -335,4 +335,4 @@ bool HexagonControlWrapper::ExecuteGraphPreQuantize(const Tensor &input_tensor, return true; } -} // namespace mace \ No newline at end of file +} // namespace mace diff --git a/mace/dsp/hexagon_control_wrapper_test.cc b/mace/dsp/hexagon_control_wrapper_test.cc index b34e028c16b80fdfe9c280a3edf353fa9e040ec6..e7268db3d32401c2a53c0714258610b43b926a6f 100644 --- a/mace/dsp/hexagon_control_wrapper_test.cc +++ b/mace/dsp/hexagon_control_wrapper_test.cc @@ -3,7 +3,8 @@ // #include "mace/dsp/hexagon_control_wrapper.h" -#include "mace/core/logging.h" +#include "mace/utils/logging.h" +#include "mace/utils/env_time.h" #include "gtest/gtest.h" using namespace mace; @@ -27,17 +28,14 @@ TEST(HexagonControlerWrapper, InputFloat) { } wrapper.ResetPerfInfo(); - timeval tv1, tv2; - gettimeofday(&tv1, NULL); + int64_t start_micros = utils::NowMicros(); int round = 10; for (int i = 0; i < round; ++i) { VLOG(0) << wrapper.ExecuteGraph(input_tensor, &output_tensor); } - gettimeofday(&tv2, NULL); - VLOG(0) << "avg duration: " - << ((tv2.tv_sec - tv1.tv_sec) * 1000 + - (tv2.tv_usec - tv1.tv_usec) / 1000) / - round; + int64_t end_micros = utils::NowMicros(); + VLOG(0) << "avg duration: " << (end_micros - start_micros) / (double)round + << " ms"; wrapper.GetPerfInfo(); wrapper.PrintLog(); @@ -95,4 +93,4 @@ TEST(HexagonControlerWrapper, PreQuantize) { VLOG(0) << wrapper.TeardownGraph(); wrapper.Finalize(); -} \ No newline at end of file +} diff --git a/mace/dsp/hexagon_nn_ops.h b/mace/dsp/hexagon_nn_ops.h index e66548d4d9b9e1f1a3f31e05d05e758e932a9c58..9ec9e197086e4c63523ebe8e4b404ec1ba1e895b 100644 --- a/mace/dsp/hexagon_nn_ops.h +++ b/mace/dsp/hexagon_nn_ops.h @@ -5,7 +5,7 @@ #ifndef MACE_HEXAGON_NN_OPS_H_ #define MACE_HEXAGON_NN_OPS_H_ -#include "mace/core/logging.h" +#include "mace/utils/logging.h" #include namespace mace { diff --git a/mace/dsp/util/BUILD b/mace/dsp/util/BUILD index e5730b285116454ca7c15d5dd08110d3da7c3f42..df699bb8eda504f256d0367dd50088ffd2f7d7d1 100644 --- a/mace/dsp/util/BUILD +++ b/mace/dsp/util/BUILD @@ -30,7 +30,7 @@ cc_test( name = "util_test", testonly = 1, srcs = glob(["*_test.cc"]), - copts = ["-std=c++11"], + copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"], linkopts = if_android([ "-ldl", "-lm", diff --git a/mace/examples/BUILD b/mace/examples/BUILD index b1611be473ba34fd2d96e4148c4b0e8fa10dca1a..7ce5af055a470d8a42c0039341e1c8f86ac4b4a4 100644 --- a/mace/examples/BUILD +++ b/mace/examples/BUILD @@ -6,11 +6,12 @@ cc_binary( srcs = [ "helloworld.cc", ], - copts = ["-std=c++11"], - linkopts = ["-fopenmp"] + if_android(["-ldl"]), + copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"], + linkopts = ["-fopenmp"], deps = [ "//mace/core", "//mace/ops", + "//mace/core:opencl_runtime", ], ) @@ -18,8 +19,8 @@ cc_test( name = "benchmark_example", testonly = 1, srcs = ["benchmark_example.cc"], - copts = ["-std=c++11"], - linkopts = ["-fopenmp"] + if_android(["-ldl"]), + copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"], + linkopts = ["-fopenmp"], linkstatic = 1, deps = [ "//mace/core", @@ -30,8 +31,8 @@ cc_test( cc_binary( name = "mace_run", srcs = glob(["models/*/*.cc"] + ["mace_run.cc"]), - copts = ["-std=c++11"], - linkopts = ["-fopenmp"] + if_android(["-ldl"]), + copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1", "-v", "-ftime-report"], + linkopts = ["-fopenmp"], linkstatic = 1, deps = [ "//mace/core", diff --git a/mace/examples/helloworld.cc b/mace/examples/helloworld.cc index 25a2e2eac2c04a18247729029e91d9ea8c3235dc..a89045090e8f56e6b4a9050e7ec53609563c74b5 100644 --- a/mace/examples/helloworld.cc +++ b/mace/examples/helloworld.cc @@ -3,6 +3,7 @@ // #include "mace/core/net.h" +#include "mace/core/runtime/opencl/opencl_runtime.h" using namespace mace; @@ -42,16 +43,10 @@ int main() { net_def.add_op()->CopyFrom(op_def_1); net_def.add_op()->CopyFrom(op_def_2); - auto input = net_def.add_tensors(); - input->set_name("Input"); - input->set_data_type(DataType::DT_FLOAT); - input->add_dims(2); - input->add_dims(3); - for (int i = 0; i < 6; ++i) { - input->add_float_data(i - 3); - } - - VLOG(0) << net_def.DebugString(); + alignas(4) unsigned char tensor_data[] = "012345678901234567890123"; + const std::vector dims = {1, 2, 3, 1}; + TensorProto input("Input", tensor_data, dims, DataType::DT_FLOAT); + net_def.mutable_tensors().push_back(input); // Create workspace and input tensor Workspace ws; diff --git a/mace/kernels/BUILD b/mace/kernels/BUILD index b4b383ae29243c186cfd3315c2d82c3013aade9b..d40faf9163f767e6fdaf90e9bac9e36c2f3a6f32 100644 --- a/mace/kernels/BUILD +++ b/mace/kernels/BUILD @@ -25,8 +25,8 @@ cc_library( linkopts = if_android(["-lm"]), deps = [ "//mace/core", - "//mace/utils", - "//mace/utils:tuner", + "//mace/core:opencl_runtime", + "//mace/utils:utils_hdrs", ], ) diff --git a/mace/kernels/addn.h b/mace/kernels/addn.h index 6195f324da7731cf2a7374ded017e734ce92faf8..59ebfd8f6e6f54906d27c1587df2911b36fb0845 100644 --- a/mace/kernels/addn.h +++ b/mace/kernels/addn.h @@ -5,6 +5,7 @@ #ifndef MACE_KERNELS_ADDN_H_ #define MACE_KERNELS_ADDN_H_ +#include "mace/core/future.h" #include "mace/core/tensor.h" namespace mace { @@ -15,7 +16,7 @@ struct AddNFunctorBase {}; template struct AddNFunctor : AddNFunctorBase { void operator()(const std::vector &input_tensors, - Tensor *output_tensor) { + Tensor *output_tensor, StatsFuture *future) { output_tensor->ResizeLike(input_tensors[0]); Tensor::MappingGuard output_map(output_tensor); index_t size = input_tensors[0]->size(); @@ -38,12 +39,14 @@ struct AddNFunctor : AddNFunctorBase { template <> void AddNFunctor::operator()( - const std::vector &input_tensors, Tensor *output_tensor); + const std::vector &input_tensors, + Tensor *output_tensor, + StatsFuture *future); template struct AddNFunctor : AddNFunctorBase { void operator()(const std::vector &input_tensors, - Tensor *output_tensor); + Tensor *output_tensor, StatsFuture *future); }; } // namespace kernels diff --git a/mace/kernels/batch_norm.h b/mace/kernels/batch_norm.h index 9c009b86f59a2bc9807bfc6696b23f491898947a..46469b07049519a6bc69b255b31bd846de11012a 100644 --- a/mace/kernels/batch_norm.h +++ b/mace/kernels/batch_norm.h @@ -5,6 +5,7 @@ #ifndef MACE_KERNELS_BATCH_NORM_H_ #define MACE_KERNELS_BATCH_NORM_H_ +#include "mace/core/future.h" #include "mace/core/tensor.h" #include "mace/core/mace.h" @@ -20,7 +21,8 @@ struct BatchNormFunctor { const Tensor *offset, const Tensor *mean, const Tensor *var, - Tensor *output) { + Tensor *output, + StatsFuture *future) { // Batch normalization in the paper https://arxiv.org/abs/1502.03167 . // The calculation formula for inference is // Y = \frac{ \scale } { \sqrt{var+\variance_epsilon} } * X + @@ -80,7 +82,8 @@ void BatchNormFunctor::operator()( const Tensor *offset, const Tensor *mean, const Tensor *var, - Tensor *output); + Tensor *output, + StatsFuture *future); template struct BatchNormFunctor { @@ -91,7 +94,8 @@ struct BatchNormFunctor { const Tensor *offset, const Tensor *mean, const Tensor *var, - Tensor *output); + Tensor *output, + StatsFuture *future); }; } // namepsace kernels diff --git a/mace/kernels/bias_add.h b/mace/kernels/bias_add.h index c738502a0811524154586ca2f3669e0f967d39ad..7ba199d2d36a1f67ee91b7f73e0fbc64bd4e3108 100644 --- a/mace/kernels/bias_add.h +++ b/mace/kernels/bias_add.h @@ -5,6 +5,7 @@ #ifndef MACE_KERNELS_BIAS_ADD_H_ #define MACE_KERNELS_BIAS_ADD_H_ +#include "mace/core/future.h" #include "mace/core/tensor.h" #include "mace/core/mace.h" @@ -15,7 +16,8 @@ template struct BiasAddFunctor { void operator()(const Tensor *input, const Tensor *bias, - Tensor *output) { + Tensor *output, + StatsFuture *future) { const index_t batch = input->dim(0); const index_t height = input->dim(1); const index_t width = input->dim(2); @@ -51,14 +53,16 @@ template <> void BiasAddFunctor::operator()( const Tensor *input, const Tensor *bias, - Tensor *output); + Tensor *output, + StatsFuture *future); */ template struct BiasAddFunctor { void operator()(const Tensor *input, const Tensor *bias, - Tensor *output); + Tensor *output, + StatsFuture *future); }; } // namepsace kernels diff --git a/mace/kernels/buffer_to_image.h b/mace/kernels/buffer_to_image.h index 42043365f6a5b0227fc559bd52499f5be16fb316..1c2bdea6747184d180914c35f0a6bdc69d472d28 100644 --- a/mace/kernels/buffer_to_image.h +++ b/mace/kernels/buffer_to_image.h @@ -5,6 +5,7 @@ #ifndef MACE_KERNELS_BUFFER_TO_IMAGE_H_ #define MACE_KERNELS_BUFFER_TO_IMAGE_H_ +#include "mace/core/future.h" #include "mace/core/tensor.h" #include "mace/kernels/opencl/helper.h" @@ -22,7 +23,8 @@ struct BufferToImageFunctor : BufferToImageFunctorBase{ BufferToImageFunctorBase(i2b) {} void operator()(Tensor *input, const BufferType type, - Tensor *output) { + Tensor *output, + StatsFuture *future) { MACE_NOT_IMPLEMENTED; } bool i2b_; @@ -34,7 +36,8 @@ struct BufferToImageFunctor : BufferToImageFunctorBase{ BufferToImageFunctorBase(i2b) {} void operator()(Tensor *input, const BufferType type, - Tensor *output); + Tensor *output, + StatsFuture *future); }; } // namepsace kernels diff --git a/mace/kernels/channel_shuffle.h b/mace/kernels/channel_shuffle.h index b4829a4e63b13e465ef388fd845d17ed43b6703d..83c8bb498aae735257e3b97dc5cb0b183db83e16 100644 --- a/mace/kernels/channel_shuffle.h +++ b/mace/kernels/channel_shuffle.h @@ -5,6 +5,7 @@ #ifndef MACE_KERNELS_CHANNEL_SHUFFLE_H_ #define MACE_KERNELS_CHANNEL_SHUFFLE_H_ +#include "mace/core/future.h" #include "mace/core/tensor.h" namespace mace { @@ -15,7 +16,8 @@ class ChannelShuffleFunctor { public: ChannelShuffleFunctor(const int group) : group_(group) {} - void operator()(const T *input, const index_t *input_shape, T *output) { + void operator()(const T *input, const index_t *input_shape, + T *output, StatsFuture *future) { index_t batch = input_shape[0]; index_t channels = input_shape[1]; index_t height = input_shape[2]; @@ -44,4 +46,4 @@ class ChannelShuffleFunctor { } // namespace kernels } // namespace mace -#endif // MACE_KERNELS_CHANNEL_SHUFFLE_H_ \ No newline at end of file +#endif // MACE_KERNELS_CHANNEL_SHUFFLE_H_ diff --git a/mace/kernels/concat.h b/mace/kernels/concat.h index 5c3b22ab1a97d4d8e1f1b28a67186bba89d8f5dc..3988dbab2b3a59a483554b6e085bf2d9cc0db73f 100644 --- a/mace/kernels/concat.h +++ b/mace/kernels/concat.h @@ -6,6 +6,7 @@ #define MACE_KERNELS_CONCAT_H_ #include "mace/core/common.h" +#include "mace/core/future.h" #include "mace/core/types.h" #include "mace/core/mace.h" #include "mace/core/tensor.h" @@ -24,7 +25,8 @@ struct ConcatFunctor : ConcatFunctorBase { ConcatFunctor(const int32_t axis): ConcatFunctorBase(axis){} void operator()(const std::vector &input_list, - Tensor *output) { + Tensor *output, + StatsFuture *future) { const Tensor *input0 = input_list.front(); const int inputs_count = input_list.size(); @@ -78,7 +80,7 @@ struct ConcatFunctor : ConcatFunctorBase{ ConcatFunctor(const int32_t axis): ConcatFunctorBase(axis){} void operator()(const std::vector &input_list, - Tensor *output); + Tensor *output, StatsFuture *future); }; diff --git a/mace/kernels/conv_2d.h b/mace/kernels/conv_2d.h index e9a41cfcafef011da308a4df81b3dbc79874bfb2..37ca87f405df211ed3752a28048714fd5618317e 100644 --- a/mace/kernels/conv_2d.h +++ b/mace/kernels/conv_2d.h @@ -5,6 +5,7 @@ #ifndef MACE_KERNELS_CONV_2D_H_ #define MACE_KERNELS_CONV_2D_H_ +#include "mace/core/future.h" #include "mace/core/tensor.h" #include "mace/kernels/conv_pool_2d_util.h" @@ -32,7 +33,8 @@ struct Conv2dFunctor : Conv2dFunctorBase { void operator()(const Tensor *input, const Tensor *filter, const Tensor *bias, - Tensor *output) { + Tensor *output, + StatsFuture *future) { MACE_CHECK_NOTNULL(input); MACE_CHECK_NOTNULL(filter); MACE_CHECK_NOTNULL(output); @@ -130,7 +132,8 @@ template<> void Conv2dFunctor::operator()(const Tensor *input, const Tensor *filter, const Tensor *bias, - Tensor *output); + Tensor *output, + StatsFuture *future); template struct Conv2dFunctor : Conv2dFunctorBase { @@ -142,7 +145,8 @@ struct Conv2dFunctor : Conv2dFunctorBase { void operator()(const Tensor *input, const Tensor *filter, const Tensor *bias, - Tensor *output); + Tensor *output, + StatsFuture *future); }; } // namespace kernels diff --git a/mace/kernels/depthwise_conv2d.h b/mace/kernels/depthwise_conv2d.h index c1f1d076ed05f0490ee3724339b6637af84d3a95..9d762fb5fb3e194aad9636b23ff20434fb854ce1 100644 --- a/mace/kernels/depthwise_conv2d.h +++ b/mace/kernels/depthwise_conv2d.h @@ -5,6 +5,7 @@ #ifndef MACE_KERNELS_DEPTHWISE_CONV_H_ #define MACE_KERNELS_DEPTHWISE_CONV_H_ +#include "mace/core/future.h" #include "mace/core/common.h" #include "mace/kernels/conv_pool_2d_util.h" #include "mace/core/mace.h" @@ -23,7 +24,8 @@ struct DepthwiseConv2dFunctor { void operator()(const Tensor *input, // NCHW const Tensor *filter, // c_out, c_in, kernel_h, kernel_w const Tensor *bias, // c_out - Tensor *output) { + Tensor *output, + StatsFuture *future) { MACE_CHECK_NOTNULL(input); MACE_CHECK_NOTNULL(filter); MACE_CHECK_NOTNULL(bias); @@ -115,14 +117,16 @@ void DepthwiseConv2dFunctor::operator()( const Tensor *input, const Tensor *filter, const Tensor *bias, - Tensor *output); + Tensor *output, + StatsFuture *future); template <> void DepthwiseConv2dFunctor::operator()( const Tensor *input, const Tensor *filter, const Tensor *bias, - Tensor *output); + Tensor *output, + StatsFuture *future); } // namespace kernels } // namespace mace diff --git a/mace/kernels/fused_conv_2d.h b/mace/kernels/fused_conv_2d.h index 4daf28e63599497ea5af99ae7ef1a452dd838465..53a7dbb10a58fbfdf02220e6df217081bec3ee45 100644 --- a/mace/kernels/fused_conv_2d.h +++ b/mace/kernels/fused_conv_2d.h @@ -33,8 +33,10 @@ struct FusedConv2dFunctor : FusedConv2dFunctorBase { void operator()(const Tensor *input, const Tensor *filter, const Tensor *bias, - Tensor *output) { - Conv2dFunctor(strides_, paddings_, dilations_)(input, filter, bias, output); + Tensor *output, + StatsFuture *future) { + Conv2dFunctor(strides_, paddings_, dilations_)(input, filter, bias, + output, future); T *output_data = output->mutable_data(); T zero_value; @@ -62,7 +64,8 @@ struct FusedConv2dFunctor : FusedConv2dFunctorBase { void operator()(const Tensor *input, const Tensor *filter, const Tensor *bias, - Tensor *output); + Tensor *output, + StatsFuture *future); }; } // namespace kernels diff --git a/mace/kernels/global_avg_pooling.h b/mace/kernels/global_avg_pooling.h index f321bcbf3312f5407f66ae966d9b68286c7f677e..372e6242c1298e87fee95e535b813c70d8a75f06 100644 --- a/mace/kernels/global_avg_pooling.h +++ b/mace/kernels/global_avg_pooling.h @@ -5,6 +5,7 @@ #ifndef MACE_KERNELS_GLOBAL_AVG_POOLING_H_ #define MACE_KERNELS_GLOBAL_AVG_POOLING_H_ +#include "mace/core/future.h" #include "mace/core/tensor.h" namespace mace { @@ -12,7 +13,10 @@ namespace kernels { template struct GlobalAvgPoolingFunctor { - void operator()(const T *input, const index_t *input_shape, T *output) { + void operator()(const T *input, + const index_t *input_shape, + T *output, + StatsFuture *future) { index_t batch = input_shape[0]; index_t channels = input_shape[1]; index_t height = input_shape[2]; @@ -35,9 +39,10 @@ struct GlobalAvgPoolingFunctor { template <> void GlobalAvgPoolingFunctor::operator()( - const float *input, const index_t *input_shape, float *output); + const float *input, const index_t *input_shape, + float *output, StatsFuture *future); } // namespace kernels } // namespace mace -#endif // MACE_KERNELS_GLOBAL_AVG_POOLING_H_ \ No newline at end of file +#endif // MACE_KERNELS_GLOBAL_AVG_POOLING_H_ diff --git a/mace/kernels/neon/addn_neon.cc b/mace/kernels/neon/addn_neon.cc index 33a2bec5bdfecb985dec1f20d3a0b01f2a245fd2..f3f2a3ac426820d302f445c03c8b8bffc3d3b685 100644 --- a/mace/kernels/neon/addn_neon.cc +++ b/mace/kernels/neon/addn_neon.cc @@ -10,7 +10,8 @@ namespace kernels { template <> void AddNFunctor::operator()( - const std::vector &input_tensors, Tensor *output_tensor) { + const std::vector &input_tensors, Tensor *output_tensor, + StatsFuture *future) { // TODO: neon mem copy index_t size = output_tensor->size(); float *output_ptr = output_tensor->mutable_data(); diff --git a/mace/kernels/neon/batch_norm_neon.cc b/mace/kernels/neon/batch_norm_neon.cc index 7f67616442f84eff0a98a7bf7e022c224e9ceab9..b681f6e8679d174288fdaca68eb182c416a3036b 100644 --- a/mace/kernels/neon/batch_norm_neon.cc +++ b/mace/kernels/neon/batch_norm_neon.cc @@ -15,7 +15,8 @@ void BatchNormFunctor::operator()( const Tensor *offset, const Tensor *mean, const Tensor *var, - Tensor *output) { + Tensor *output, + StatsFuture *future) { // Batch normalization in the paper https://arxiv.org/abs/1502.03167 . // The calculation formula for inference is // Y = \frac{ \scale } { \sqrt{var+\epsilon} } * X + diff --git a/mace/kernels/neon/conv_2d_neon.cc b/mace/kernels/neon/conv_2d_neon.cc index f7391b6f18b28829d088e46f1d50bd7b07ec4a86..229ac96f6b49596d70d2dc403ea97988ac60e684 100644 --- a/mace/kernels/neon/conv_2d_neon.cc +++ b/mace/kernels/neon/conv_2d_neon.cc @@ -44,7 +44,8 @@ template <> void Conv2dFunctor::operator()(const Tensor *input, const Tensor *filter, const Tensor *bias, - Tensor *output) { + Tensor *output, + StatsFuture *future) { MACE_CHECK_NOTNULL(input); MACE_CHECK_NOTNULL(filter); MACE_CHECK_NOTNULL(output); @@ -79,7 +80,7 @@ void Conv2dFunctor::operator()(const Tensor *input, << " stride " << strides_[0] << "x" << strides_[1] << " is not implemented yet, using slow version"; Conv2dFunctor(strides_, paddings_, dilations_)( - input, filter, bias, output); + input, filter, bias, output, future); return; } diff --git a/mace/kernels/neon/depthwise_conv_neon.cc b/mace/kernels/neon/depthwise_conv_neon.cc index 42b7fa354d339459088fad483ae5d82ea8c7df42..c396ee21c0f651dffd1419fba7dc41b102c3a9c4 100644 --- a/mace/kernels/neon/depthwise_conv_neon.cc +++ b/mace/kernels/neon/depthwise_conv_neon.cc @@ -29,7 +29,8 @@ void DepthwiseConv2dFunctor::operator()( const Tensor *input, const Tensor *filter, const Tensor *bias, - Tensor *output) { + Tensor *output, + StatsFuture *future) { typedef void (*Conv2dNeonFunction)( const float *input, const index_t *input_shape, const float *filter, const index_t *filter_shape, const float *bias, float *output, @@ -53,7 +54,7 @@ void DepthwiseConv2dFunctor::operator()( << " is not implemented yet, using slow version"; DepthwiseConv2dFunctor(strides_, paddings_, dilations_)( - input, filter, bias, output); + input, filter, bias, output, future); return; } @@ -77,4 +78,4 @@ void DepthwiseConv2dFunctor::operator()( } } // namespace kernels -} // namespace mace \ No newline at end of file +} // namespace mace diff --git a/mace/kernels/neon/global_avg_pooling_neon.cc b/mace/kernels/neon/global_avg_pooling_neon.cc index 88c54fdce63ec896f3787be1477afca1865b6e5d..cf639559bdfd5cd5b243aec636c98590cd05855a 100644 --- a/mace/kernels/neon/global_avg_pooling_neon.cc +++ b/mace/kernels/neon/global_avg_pooling_neon.cc @@ -10,7 +10,8 @@ namespace kernels { template <> void GlobalAvgPoolingFunctor::operator()( - const float *input, const index_t *input_shape, float *output) { + const float *input, const index_t *input_shape, + float *output, StatsFuture *future) { index_t batch = input_shape[0]; index_t channels = input_shape[1]; index_t height = input_shape[2]; @@ -52,4 +53,4 @@ void GlobalAvgPoolingFunctor::operator()( }; } // namespace kernels -} // namespace mace \ No newline at end of file +} // namespace mace diff --git a/mace/kernels/neon/pooling_neon.cc b/mace/kernels/neon/pooling_neon.cc index 76868335d12500623cc08fff5d0cfae70761cff9..cf280c38e7efb7b891768158850906b5d6695061 100644 --- a/mace/kernels/neon/pooling_neon.cc +++ b/mace/kernels/neon/pooling_neon.cc @@ -56,7 +56,8 @@ extern void PoolingAvgNeonK3x3S2x2Padded(const float *input, template <> void PoolingFunctor::operator()( const Tensor *input_tensor, - Tensor *output_tensor) { + Tensor *output_tensor, + StatsFuture *future) { std::vector output_shape(4); std::vector paddings(2); @@ -122,9 +123,9 @@ void PoolingFunctor::operator()( } else { // not implement yet PoolingFunctor(pooling_type_, kernels_, strides_, padding_, dilations_)( - input_tensor, output_tensor); + input_tensor, output_tensor, future); } } } // namespace kernels -} // namespace mace \ No newline at end of file +} // namespace mace diff --git a/mace/kernels/neon/relu_neon.cc b/mace/kernels/neon/relu_neon.cc index e2d983dd4dd61aa9ef2baa291388f0832fd076ba..356e14ece1cb7e3bdb816ba02200ca63626f8fc9 100644 --- a/mace/kernels/neon/relu_neon.cc +++ b/mace/kernels/neon/relu_neon.cc @@ -10,7 +10,8 @@ namespace kernels { template <> void ReluFunctor::operator()(const Tensor *input_tensor, - Tensor *output_tensor) { + Tensor *output_tensor, + StatsFuture *future) { const float *input = input_tensor->data(); float *output = output_tensor->mutable_data(); index_t size = input_tensor->size(); @@ -66,4 +67,4 @@ void ReluFunctor::operator()(const Tensor *input_tensor }; } // namespace kernels -} // namespace mace \ No newline at end of file +} // namespace mace diff --git a/mace/kernels/opencl/addn.cc b/mace/kernels/opencl/addn.cc index 83e6b65b3d882bcc857f08fb0cffd87a9b84a65f..837f6cd8d1b5a8ef16ff80020cb2ac2a51c7e7b2 100644 --- a/mace/kernels/opencl/addn.cc +++ b/mace/kernels/opencl/addn.cc @@ -13,7 +13,7 @@ namespace kernels { template static void AddN(const std::vector &input_tensors, - Tensor *output) { + Tensor *output, StatsFuture *future) { if (input_tensors.size() > 4) { MACE_NOT_IMPLEMENTED; } @@ -26,7 +26,7 @@ static void AddN(const std::vector &input_tensors, const index_t width_pixels = channel_blocks * width; const index_t batch_height_pixels = batch * height; - auto runtime = OpenCLRuntime::Get(); + auto runtime = OpenCLRuntime::Global(); std::set built_options; auto dt = DataTypeToEnum::value; built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt)); @@ -61,12 +61,13 @@ static void AddN(const std::vector &input_tensors, {1, kwg_size} }; }; + cl::Event event; auto func = [&](const std::vector ¶ms) -> cl_int { cl_int error = runtime->command_queue().enqueueNDRangeKernel( addn_kernel, cl::NullRange, cl::NDRange(gws[0], gws[1]), cl::NDRange(params[0], params[1]), - NULL, OpenCLRuntime::Get()->GetDefaultEvent()); + nullptr, &event); MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; return error; @@ -77,16 +78,25 @@ static void AddN(const std::vector &input_tensors, << output->dim(1) << "_" << output->dim(2) << "_" << output->dim(3); + OpenCLProfilingTimer timer(&event); Tuner::Get()->template TuneOrRun(ss.str(), lws, params_generator, - func); - + func, + &timer); + future->wait_fn = [runtime, event](CallStats *stats) { + event.wait(); + if (stats != nullptr) { + runtime->GetCallStats(event, stats); + } + }; } template void AddNFunctor::operator()( - const std::vector &input_tensors, Tensor *output_tensor) { + const std::vector &input_tensors, + Tensor *output_tensor, + StatsFuture *future) { size_t size = input_tensors.size(); MACE_CHECK(size >= 2 && input_tensors[0] != nullptr); @@ -108,7 +118,7 @@ void AddNFunctor::operator()( CalImage2DShape(output_shape, BufferType::IN_OUT, output_image_shape); output_tensor->ResizeImage(output_shape, output_image_shape); - AddN(input_tensors, output_tensor); + AddN(input_tensors, output_tensor, future); }; template diff --git a/mace/kernels/opencl/batch_norm_opencl.cc b/mace/kernels/opencl/batch_norm_opencl.cc index de6571ea03fba7498af4adb6cb203e5789177069..b53ef848556f01279eaf8f62b8e1f1fa24c7ee84 100644 --- a/mace/kernels/opencl/batch_norm_opencl.cc +++ b/mace/kernels/opencl/batch_norm_opencl.cc @@ -6,6 +6,7 @@ #include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/utils/tuner.h" +#include "mace/utils/utils.h" #include "mace/kernels/opencl/helper.h" namespace mace { @@ -18,8 +19,8 @@ void BatchNormFunctor::operator()( const Tensor *offset, const Tensor *mean, const Tensor *var, - Tensor *output) { - + Tensor *output, + StatsFuture *future) { const index_t batch = input->dim(0); const index_t height = input->dim(1); const index_t width = input->dim(2); @@ -27,7 +28,7 @@ void BatchNormFunctor::operator()( const index_t channel_blocks = RoundUpDiv4(channels); - auto runtime = OpenCLRuntime::Get(); + auto runtime = OpenCLRuntime::Global(); std::set built_options; auto dt = DataTypeToEnum::value; built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt)); @@ -72,12 +73,13 @@ void BatchNormFunctor::operator()( {15, 7, 9}, {1, kwg_size, 1}}; }; + cl::Event event; auto func = [&](const std::vector ¶ms) -> cl_int { cl_int error = runtime->command_queue().enqueueNDRangeKernel( bm_kernel, cl::NullRange, cl::NDRange(gws[0], gws[1], gws[2]), cl::NDRange(params[0], params[1], params[2]), - NULL, OpenCLRuntime::Get()->GetDefaultEvent()); + nullptr, &event); MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; return error; @@ -88,10 +90,18 @@ void BatchNormFunctor::operator()( << output->dim(1) << "_" << output->dim(2) << "_" << output->dim(3); + OpenCLProfilingTimer timer(&event); Tuner::Get()->template TuneOrRun(ss.str(), lws, params_generator, - func); + func, + &timer); + future->wait_fn = [runtime, event](CallStats *stats) { + event.wait(); + if (stats != nullptr) { + runtime->GetCallStats(event, stats); + } + }; } template diff --git a/mace/kernels/opencl/bias_add_opencl.cc b/mace/kernels/opencl/bias_add_opencl.cc index b03bbc5e65eecb8a627931e8e5965e1dac2f4e64..e843e5f3774ca67febc5df1b143f199d7d071139 100644 --- a/mace/kernels/opencl/bias_add_opencl.cc +++ b/mace/kernels/opencl/bias_add_opencl.cc @@ -15,8 +15,8 @@ template void BiasAddFunctor::operator()( const Tensor *input, const Tensor *bias, - Tensor *output) { - + Tensor *output, + StatsFuture *future) { const index_t batch = input->dim(0); const index_t height = input->dim(1); const index_t width = input->dim(2); @@ -28,7 +28,7 @@ void BiasAddFunctor::operator()( static_cast(width), static_cast(height * batch)}; - auto runtime = OpenCLRuntime::Get(); + auto runtime = OpenCLRuntime::Global(); std::set built_options; auto dt = DataTypeToEnum::value; built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt)); @@ -43,12 +43,19 @@ void BiasAddFunctor::operator()( bias_kernel.setArg(idx++, *(static_cast(bias->buffer()))); bias_kernel.setArg(idx++, *(static_cast(output->buffer()))); + cl::Event event; cl_int error = runtime->command_queue().enqueueNDRangeKernel( bias_kernel, cl::NullRange, cl::NDRange(gws[0], gws[1], gws[2]), cl::NDRange(lws[0], lws[1], lws[2]), - NULL, OpenCLRuntime::Get()->GetDefaultEvent()); + nullptr, &event); MACE_CHECK(error == CL_SUCCESS); + future->wait_fn = [runtime, event](CallStats *stats) { + event.wait(); + if (stats != nullptr) { + runtime->GetCallStats(event, stats); + } + }; } template diff --git a/mace/kernels/opencl/buffer_to_image.cc b/mace/kernels/opencl/buffer_to_image.cc index f3af3d22622bd5e893347d958da76dbec71a450a..c0dea3a2dc0e80c76c45b7bd75e02ebd7639791b 100644 --- a/mace/kernels/opencl/buffer_to_image.cc +++ b/mace/kernels/opencl/buffer_to_image.cc @@ -12,7 +12,8 @@ namespace kernels { template void BufferToImageFunctor::operator()(Tensor *buffer, const BufferType type, - Tensor *image) { + Tensor *image, + StatsFuture *future) { MACE_CHECK(!buffer->is_image()) << "buffer must be buffer-type"; std::vector image_shape; if (!i2b_) { @@ -31,7 +32,7 @@ void BufferToImageFunctor::operator()(Tensor *buffer, built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(DataTypeToEnum::value)); built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(DataTypeToEnum::value)); } - auto runtime = OpenCLRuntime::Get(); + auto runtime = OpenCLRuntime::Global(); string kernel_name; switch (type) { case FILTER: @@ -64,12 +65,20 @@ void BufferToImageFunctor::operator()(Tensor *buffer, 1}; const uint32_t kwg_size = runtime->GetKernelMaxWorkGroupSize(b2f_kernel); const std::vector lws = {kwg_size, 1, 1}; + cl::Event event; cl_int error = runtime->command_queue().enqueueNDRangeKernel( b2f_kernel, cl::NullRange, cl::NDRange(gws[0], gws[1], gws[2]), - cl::NDRange(lws[0], lws[1], lws[2])); - + cl::NDRange(lws[0], lws[1], lws[2]), + nullptr, &event); MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; + + future->wait_fn = [runtime, event](CallStats *stats) { + event.wait(); + if (stats != nullptr) { + runtime->GetCallStats(event, stats); + } + }; } template struct BufferToImageFunctor; diff --git a/mace/kernels/opencl/concat.cc b/mace/kernels/opencl/concat.cc index 706ed8f1c8e8c7257fb8284439887ec62e39759d..514d590db94707f4313771ef4421174610d30eeb 100644 --- a/mace/kernels/opencl/concat.cc +++ b/mace/kernels/opencl/concat.cc @@ -14,7 +14,8 @@ namespace kernels { static void Concat2(const Tensor *input0, const Tensor *input1, const DataType dt, - Tensor *output) { + Tensor *output, + StatsFuture *future) { const index_t batch = output->dim(0); const index_t height = output->dim(1); const index_t width = output->dim(2); @@ -22,7 +23,7 @@ static void Concat2(const Tensor *input0, const int channel_blk = RoundUpDiv4(channel); - auto runtime = OpenCLRuntime::Get(); + auto runtime = OpenCLRuntime::Global(); std::set built_options; if (input0->dtype() == output->dtype()) { built_options.emplace("-DDATA_TYPE=" + DtToCLDt(dt)); @@ -73,12 +74,13 @@ static void Concat2(const Tensor *input0, {15, 7, 9}, {1, kwg_size, 1}}; }; + cl::Event event; auto func = [&](const std::vector ¶ms) -> cl_int { cl_int error = runtime->command_queue().enqueueNDRangeKernel( concat_kernel, cl::NullRange, cl::NDRange(gws[0], gws[1], gws[2]), cl::NDRange(params[0], params[1], params[2]), - NULL, OpenCLRuntime::Get()->GetDefaultEvent()); + nullptr, &event); MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; return error; @@ -89,15 +91,24 @@ static void Concat2(const Tensor *input0, << output->dim(1) << "_" << output->dim(2) << "_" << output->dim(3); + OpenCLProfilingTimer timer(&event); Tuner::Get()->template TuneOrRun(ss.str(), lws, params_generator, - func); + func, + &timer); + future->wait_fn = [runtime, event](CallStats *stats) { + event.wait(); + if (stats != nullptr) { + runtime->GetCallStats(event, stats); + } + }; } template void ConcatFunctor::operator()(const std::vector &input_list, - Tensor *output) { + Tensor *output, + StatsFuture *future) { const int inputs_count = input_list.size(); MACE_CHECK(inputs_count == 2 && axis_ == 3) << "Concat opencl kernel only support two elements with axis == 3"; @@ -124,7 +135,8 @@ void ConcatFunctor::operator()(const std::vector::value, output); + Concat2(input_list[0], input_list[1], DataTypeToEnum::value, + output, future); break; default:MACE_NOT_IMPLEMENTED; } diff --git a/mace/kernels/opencl/conv_2d_opencl.cc b/mace/kernels/opencl/conv_2d_opencl.cc index c40481543796215c80f4367e8e5f01a59b32c3be..947f781811356b10a93d61ccdf51b0956ac036e0 100644 --- a/mace/kernels/opencl/conv_2d_opencl.cc +++ b/mace/kernels/opencl/conv_2d_opencl.cc @@ -11,37 +11,40 @@ namespace kernels { extern void Conv2dOpenclK1x1S1(const Tensor *input, const Tensor *filter, const Tensor *bias, const bool fused_relu, const int *padding, const DataType dt, - Tensor *output); + Tensor *output, StatsFuture *future); extern void Conv2dOpenclK1x1S2(const Tensor *input, const Tensor *filter, const Tensor *bias, const bool fused_relu, const int *padding, const DataType dt, - Tensor *output); + Tensor *output, StatsFuture *future); extern void Conv2dOpenclK3x3S1(const Tensor *input, const Tensor *filter, const Tensor *bias, const bool fused_relu, const int *padding, const DataType dt, - Tensor *output); + Tensor *output, StatsFuture *future); extern void Conv2dOpenclK3x3S2(const Tensor *input, const Tensor *filter, const Tensor *bias, const bool fused_relu, const int *padding, const DataType dt, - Tensor *output); + Tensor *output, StatsFuture *future); extern void Conv2dOpencl(const Tensor *input, const Tensor *filter, const Tensor *bias, const bool fused_relu, const uint32_t stride, const int *padding, - const DataType dt, Tensor *output); + const DataType dt, Tensor *output, + StatsFuture *future); template void Conv2dFunctor::operator()(const Tensor *input, const Tensor *filter, const Tensor *bias, - Tensor *output) { + Tensor *output, + StatsFuture *future) { typedef void (*Conv2dOpenclFunction)(const Tensor *input, const Tensor *filter, const Tensor *bias, const bool fused_relu, const int *padding, const DataType dt, - Tensor *output); + Tensor *output, + StatsFuture *future); // Selection matrix: kernel_size x stride_size static const Conv2dOpenclFunction selector[5][2] = { {Conv2dOpenclK1x1S1, Conv2dOpenclK1x1S2}, @@ -74,9 +77,12 @@ void Conv2dFunctor::operator()(const Tensor *input, if (kernel_h == kernel_w && kernel_h <= 5 && selector[kernel_h - 1][strides_[0] - 1] != nullptr) { auto conv2d_func = selector[kernel_h - 1][strides_[0] - 1]; - conv2d_func(input, filter, bias, false, paddings.data(), DataTypeToEnum::value, output); + conv2d_func(input, filter, bias, false, paddings.data(), + DataTypeToEnum::value, output, future); } else { - Conv2dOpencl(input, filter, bias, false, strides_[0], paddings.data(), DataTypeToEnum::value, output); + Conv2dOpencl(input, filter, bias, false, strides_[0], + paddings.data(), DataTypeToEnum::value, + output, future); } } diff --git a/mace/kernels/opencl/conv_2d_opencl_1x1.cc b/mace/kernels/opencl/conv_2d_opencl_1x1.cc index 9eaaa3b1e053cee2b9bc1a72746f357a6adeae67..59b933763f1febd23e1f63933816add009b3313e 100644 --- a/mace/kernels/opencl/conv_2d_opencl_1x1.cc +++ b/mace/kernels/opencl/conv_2d_opencl_1x1.cc @@ -18,7 +18,8 @@ void Conv1x1(const Tensor *input, const bool fused_relu, const int stride, const DataType dt, - Tensor *output) { + Tensor *output, + StatsFuture *future) { const index_t batch = output->dim(0); const index_t height = output->dim(1); const index_t width = output->dim(2); @@ -45,9 +46,7 @@ void Conv1x1(const Tensor *input, built_options.emplace("-DFUSED_RELU"); } - auto runtime = OpenCLRuntime::Get(); - auto program = runtime->program(); - + auto runtime = OpenCLRuntime::Global(); auto conv_2d_kernel = runtime->BuildKernel("conv_2d_1x1", "conv_2d_1x1", built_options); uint32_t idx = 0; @@ -92,12 +91,13 @@ void Conv1x1(const Tensor *input, {15, 7, 9}, {1, kwg_size, 1}}; }; + cl::Event event; auto func = [&](const std::vector& params)->cl_int { cl_int error = runtime->command_queue().enqueueNDRangeKernel( conv_2d_kernel, cl::NullRange, cl::NDRange(gws[0], gws[1], gws[2]), cl::NDRange(params[0], params[1], params[2]), - NULL, OpenCLRuntime::Get()->GetDefaultEvent()); + nullptr, &event); MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; return error; @@ -108,11 +108,18 @@ void Conv1x1(const Tensor *input, << output->dim(1) << "_" << output->dim(2) << "_" << output->dim(3); + OpenCLProfilingTimer timer(&event); Tuner::Get()->template TuneOrRun(ss.str(), lws, params_generator, - func); - + func, + &timer); + future->wait_fn = [runtime, event](CallStats *stats) { + event.wait(); + if (stats != nullptr) { + runtime->GetCallStats(event, stats); + } + }; } extern void Conv2dOpenclK1x1S1(const Tensor *input, @@ -121,8 +128,9 @@ extern void Conv2dOpenclK1x1S1(const Tensor *input, const bool fused_relu, const int *padding, const DataType dt, - Tensor *output) { - Conv1x1(input, filter, bias, fused_relu, 1, dt, output); + Tensor *output, + StatsFuture *future) { + Conv1x1(input, filter, bias, fused_relu, 1, dt, output, future); }; extern void Conv2dOpenclK1x1S2(const Tensor *input, @@ -131,8 +139,9 @@ extern void Conv2dOpenclK1x1S2(const Tensor *input, const bool fused_relu, const int *padding, const DataType dt, - Tensor *output) { - Conv1x1(input, filter, bias, fused_relu, 2, dt, output); + Tensor *output, + StatsFuture *future) { + Conv1x1(input, filter, bias, fused_relu, 2, dt, output, future); }; } // namespace kernels diff --git a/mace/kernels/opencl/conv_2d_opencl_3x3.cc b/mace/kernels/opencl/conv_2d_opencl_3x3.cc index 0b77b6c26dfc2c93c4ccdaf9232a97e06e80a046..59b66e4839bd8209e5f8073b0400e2d64da33fb2 100644 --- a/mace/kernels/opencl/conv_2d_opencl_3x3.cc +++ b/mace/kernels/opencl/conv_2d_opencl_3x3.cc @@ -15,7 +15,8 @@ namespace kernels { static void Conv2d3x3S12(const Tensor *input, const Tensor *filter, const Tensor *bias, const bool fused_relu, const uint32_t stride, const int *padding, - const DataType dt, Tensor *output) { + const DataType dt, Tensor *output, + StatsFuture *future) { const index_t batch = output->dim(0); const index_t height = output->dim(1); const index_t width = output->dim(2); @@ -35,9 +36,7 @@ static void Conv2d3x3S12(const Tensor *input, const Tensor *filter, built_options.emplace("-DFUSED_RELU"); } - auto runtime = OpenCLRuntime::Get(); - auto program = runtime->program(); - + auto runtime = OpenCLRuntime::Global(); auto conv_2d_kernel = runtime->BuildKernel("conv_2d_3x3", "conv_2d_3x3", built_options); uint32_t idx = 0; @@ -84,12 +83,13 @@ static void Conv2d3x3S12(const Tensor *input, const Tensor *filter, {15, 7, 9}, {1, kwg_size, 1}}; }; + cl::Event event; auto func = [&](const std::vector ¶ms) -> cl_int { cl_int error = runtime->command_queue().enqueueNDRangeKernel( conv_2d_kernel, cl::NullRange, cl::NDRange(gws[0], gws[1], gws[2]), cl::NDRange(params[0], params[1], params[2]), - NULL, OpenCLRuntime::Get()->GetDefaultEvent()); + nullptr, &event); MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; return error; @@ -100,11 +100,19 @@ static void Conv2d3x3S12(const Tensor *input, const Tensor *filter, << output->dim(1) << "_" << output->dim(2) << "_" << output->dim(3); + OpenCLProfilingTimer timer(&event); Tuner::Get()->template TuneOrRun(ss.str(), lws, params_generator, - func); + func, + &timer); + future->wait_fn = [runtime, event](CallStats *stats) { + event.wait(); + if (stats != nullptr) { + runtime->GetCallStats(event, stats); + } + }; } void Conv2dOpenclK3x3S1(const Tensor *input, const Tensor *filter, @@ -112,8 +120,9 @@ void Conv2dOpenclK3x3S1(const Tensor *input, const bool fused_relu, const int *padding, const DataType dt, - Tensor *output) { - Conv2d3x3S12(input, filter, bias, fused_relu, 1, padding, dt, output); + Tensor *output, + StatsFuture *future) { + Conv2d3x3S12(input, filter, bias, fused_relu, 1, padding, dt, output, future); }; void Conv2dOpenclK3x3S2(const Tensor *input, @@ -122,8 +131,9 @@ void Conv2dOpenclK3x3S2(const Tensor *input, const bool fused_relu, const int *padding, const DataType dt, - Tensor *output) { - Conv2d3x3S12(input, filter, bias, fused_relu, 2, padding, dt, output); + Tensor *output, + StatsFuture *future) { + Conv2d3x3S12(input, filter, bias, fused_relu, 2, padding, dt, output, future); }; } // namespace kernels diff --git a/mace/kernels/opencl/conv_2d_opencl_general.cc b/mace/kernels/opencl/conv_2d_opencl_general.cc index dcfbdec818c5ae00a09eb5b56dce46d8cbde4cba..d3cee60087049340d8119c13ab074348a6dcda8d 100644 --- a/mace/kernels/opencl/conv_2d_opencl_general.cc +++ b/mace/kernels/opencl/conv_2d_opencl_general.cc @@ -15,7 +15,8 @@ namespace kernels { void Conv2dOpencl(const Tensor *input, const Tensor *filter, const Tensor *bias, const bool fused_relu, const uint32_t stride, const int *padding, - const DataType dt, Tensor *output) { + const DataType dt, Tensor *output, + StatsFuture *future) { const index_t batch = output->dim(0); const index_t height = output->dim(1); const index_t width = output->dim(2); @@ -35,9 +36,7 @@ void Conv2dOpencl(const Tensor *input, const Tensor *filter, built_options.emplace("-DFUSED_RELU"); } - auto runtime = OpenCLRuntime::Get(); - auto program = runtime->program(); - + auto runtime = OpenCLRuntime::Global(); auto conv_2d_kernel = runtime->BuildKernel("conv_2d", "conv_2d", built_options); uint32_t idx = 0; @@ -86,12 +85,13 @@ void Conv2dOpencl(const Tensor *input, const Tensor *filter, {15, 7, 9}, {1, kwg_size, 1}}; }; + cl::Event event; auto func = [&](const std::vector ¶ms) -> cl_int { cl_int error = runtime->command_queue().enqueueNDRangeKernel( conv_2d_kernel, cl::NullRange, cl::NDRange(gws[0], gws[1], gws[2]), cl::NDRange(params[0], params[1], params[2]), - NULL, OpenCLRuntime::Get()->GetDefaultEvent()); + nullptr, &event); MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; return error; @@ -102,11 +102,19 @@ void Conv2dOpencl(const Tensor *input, const Tensor *filter, << output->dim(1) << "_" << output->dim(2) << "_" << output->dim(3); + OpenCLProfilingTimer timer(&event); Tuner::Get()->template TuneOrRun(ss.str(), lws, params_generator, - func); + func, + &timer); + future->wait_fn = [runtime, event](CallStats *stats) { + event.wait(); + if (stats != nullptr) { + runtime->GetCallStats(event, stats); + } + }; } } // namespace kernels diff --git a/mace/kernels/opencl/depthwise_conv_opencl.cc b/mace/kernels/opencl/depthwise_conv_opencl.cc index 7e75fc0091f8fcae51759062355683c6569ed9cb..67e15501c7c8d826ed982e4ec374cd7569fc5312 100644 --- a/mace/kernels/opencl/depthwise_conv_opencl.cc +++ b/mace/kernels/opencl/depthwise_conv_opencl.cc @@ -8,17 +8,21 @@ namespace mace { namespace kernels { extern void DepthwiseConvOpenclK3x3S1(const Tensor *input, const Tensor *filter, - const Tensor *bias, Tensor *output); + const Tensor *bias, Tensor *output, + StatsFuture *future); extern void DepthwiseConvOpenclK3x3S2(const Tensor *input, const Tensor *filter, - const Tensor *bias, Tensor *output); + const Tensor *bias, Tensor *output, + StatsFuture *future); template <> void DepthwiseConv2dFunctor::operator()(const Tensor *input, const Tensor *filter, const Tensor *bias, - Tensor *output) { + Tensor *output, + StatsFuture *future) { typedef void (*Conv2dOpenclFunction)(const Tensor *input, const Tensor *filter, - const Tensor *bias, Tensor *output); + const Tensor *bias, Tensor *output, + StatsFuture *future); // Selection matrix: kernel_size x stride_size static const Conv2dOpenclFunction selector[5][2] = { {nullptr, nullptr}, @@ -38,7 +42,7 @@ void DepthwiseConv2dFunctor::operator()(const Tensor << " is not implemented yet, using slow version"; // TODO(heliangliang) The CPU/NEON kernel should map the buffer DepthwiseConv2dFunctor(strides_, paddings_, dilations_)( - input, filter, bias, output); + input, filter, bias, output, future); return; } @@ -46,9 +50,9 @@ void DepthwiseConv2dFunctor::operator()(const Tensor if (paddings_[0] > 0 || paddings_[1] > 0) { Tensor padded_input(GetDeviceAllocator(DeviceType::OPENCL), DataTypeToEnum::v()); ConstructInputWithPadding(input, paddings_.data(), &padded_input); - conv2d_func(&padded_input, filter, bias, output); + conv2d_func(&padded_input, filter, bias, output, future); }else { - conv2d_func(input, filter, bias, output); + conv2d_func(input, filter, bias, output, future); } } diff --git a/mace/kernels/opencl/depthwise_conv_opencl_3x3.cc b/mace/kernels/opencl/depthwise_conv_opencl_3x3.cc index 1402131df164cb0d1ba348617b3988e78f71c574..c67382c479199ca45063252448e081e33f26d0a1 100644 --- a/mace/kernels/opencl/depthwise_conv_opencl_3x3.cc +++ b/mace/kernels/opencl/depthwise_conv_opencl_3x3.cc @@ -14,7 +14,8 @@ static void InnerDepthwiseConvOpenclK3x3S12(const Tensor *input, const Tensor *filter, const Tensor *bias, const uint32_t stride, - Tensor *output) { + Tensor *output, + StatsFuture *future) { const index_t batch = output->dim(0); const index_t channels = output->dim(1); const index_t height = output->dim(2); @@ -30,7 +31,7 @@ static void InnerDepthwiseConvOpenclK3x3S12(const Tensor *input, const index_t channel_blocks = (channels + 3) / 4; const index_t pixel_blocks = (width + 3) / 4 * height; - auto runtime = OpenCLRuntime::Get(); + auto runtime = OpenCLRuntime::Global(); std::set built_options; built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(input->dtype())); built_options.emplace(stride == 1 ? "-DSTRIDE_1" : ""); @@ -57,26 +58,36 @@ static void InnerDepthwiseConvOpenclK3x3S12(const Tensor *input, const uint32_t lws[3] = {static_cast(1), static_cast(1), static_cast(256)}; + cl::Event event; cl_int error = runtime->command_queue().enqueueNDRangeKernel( conv_kernel, cl::NullRange, cl::NDRange(gws[0], gws[1], gws[2]), cl::NDRange(lws[0], lws[1], lws[2]), - NULL, OpenCLRuntime::Get()->GetDefaultEvent()); + nullptr, &event); MACE_CHECK(error == CL_SUCCESS); + + future->wait_fn = [runtime, event](CallStats *stats) { + event.wait(); + if (stats != nullptr) { + runtime->GetCallStats(event, stats); + } + }; } extern void DepthwiseConvOpenclK3x3S1(const Tensor *input, const Tensor *filter, const Tensor *bias, - Tensor *output) { - InnerDepthwiseConvOpenclK3x3S12(input, filter, bias, 1, output); + Tensor *output, + StatsFuture *future) { + InnerDepthwiseConvOpenclK3x3S12(input, filter, bias, 1, output, future); }; extern void DepthwiseConvOpenclK3x3S2(const Tensor *input, const Tensor *filter, const Tensor *bias, - Tensor *output) { - InnerDepthwiseConvOpenclK3x3S12(input, filter, bias, 2, output); + Tensor *output, + StatsFuture *future) { + InnerDepthwiseConvOpenclK3x3S12(input, filter, bias, 2, output, future); }; } // namespace kernels diff --git a/mace/kernels/opencl/fused_conv_2d_opencl.cc b/mace/kernels/opencl/fused_conv_2d_opencl.cc index 86aa0424f003c4c5815766bbab9dac2e6f5ee191..f2bd514967d1359397762f4775a4d498af3b1ea7 100644 --- a/mace/kernels/opencl/fused_conv_2d_opencl.cc +++ b/mace/kernels/opencl/fused_conv_2d_opencl.cc @@ -11,37 +11,43 @@ namespace kernels { extern void Conv2dOpenclK1x1S1(const Tensor *input, const Tensor *filter, const Tensor *bias, const bool fused_relu, const int *padding, const DataType dt, - Tensor *output); + Tensor *output, + StatsFuture *future); extern void Conv2dOpenclK1x1S2(const Tensor *input, const Tensor *filter, const Tensor *bias, const bool fused_relu, const int *padding, const DataType dt, - Tensor *output); + Tensor *output, + StatsFuture *future); extern void Conv2dOpenclK3x3S1(const Tensor *input, const Tensor *filter, const Tensor *bias, const bool fused_relu, const int *padding, const DataType dt, - Tensor *output); + Tensor *output, + StatsFuture *future); extern void Conv2dOpenclK3x3S2(const Tensor *input, const Tensor *filter, const Tensor *bias, const bool fused_relu, const int *padding, const DataType dt, - Tensor *output); + Tensor *output, + StatsFuture *future); extern void Conv2dOpencl(const Tensor *input, const Tensor *filter, const Tensor *bias, const bool fused_relu, const uint32_t stride, const int *padding, - const DataType dt, Tensor *output); + const DataType dt, Tensor *output, + StatsFuture *future); template void FusedConv2dFunctor::operator()(const Tensor *input, const Tensor *filter, const Tensor *bias, - Tensor *output) { + Tensor *output, + StatsFuture *future) { typedef void (*Conv2dOpenclFunction)(const Tensor *input, const Tensor *filter, const Tensor *bias, const bool fused_relu, const int *padding, const DataType dt, - Tensor *output); + Tensor *output, StatsFuture *future); // Selection matrix: kernel_size x stride_size static const Conv2dOpenclFunction selector[5][2] = { {Conv2dOpenclK1x1S1, Conv2dOpenclK1x1S2}, @@ -73,9 +79,11 @@ void FusedConv2dFunctor::operator()(const Tensor *input, if (kernel_h == kernel_w && kernel_h <= 5 && selector[kernel_h - 1][strides_[0] - 1] != nullptr) { auto conv2d_func = selector[kernel_h - 1][strides_[0] - 1]; - conv2d_func(input, filter, bias, true, paddings.data(), DataTypeToEnum::value, output); + conv2d_func(input, filter, bias, true, paddings.data(), + DataTypeToEnum::value, output, future); } else { - Conv2dOpencl(input, filter, bias, true, strides_[0], paddings.data(), DataTypeToEnum::value, output); + Conv2dOpencl(input, filter, bias, true, strides_[0], paddings.data(), + DataTypeToEnum::value, output, future); } } diff --git a/mace/kernels/opencl/pooling_opencl.cc b/mace/kernels/opencl/pooling_opencl.cc index 5a0fbadf3db02894394ab5e5c83cd2eb6a864104..814159668ec87b5f5afec33667abfc9b531d42a3 100644 --- a/mace/kernels/opencl/pooling_opencl.cc +++ b/mace/kernels/opencl/pooling_opencl.cc @@ -17,7 +17,8 @@ static void Pooling(const Tensor *input, const int pooling_size, const PoolingType type, const DataType dt, - Tensor *output) { + Tensor *output, + StatsFuture *future) { index_t batch = output->dim(0); index_t out_height = output->dim(1); index_t out_width = output->dim(2); @@ -25,7 +26,7 @@ static void Pooling(const Tensor *input, index_t channel_blocks = (channels + 3) / 4; - auto runtime = OpenCLRuntime::Get(); + auto runtime = OpenCLRuntime::Global(); std::set built_options; if (type == MAX && input->dtype() == output->dtype()) { built_options.emplace("-DDATA_TYPE=" + DtToCLDt(dt)); @@ -85,12 +86,13 @@ static void Pooling(const Tensor *input, {15, 7, 9}, {1, kwg_size, 1}}; }; + cl::Event event; auto func = [&](const std::vector ¶ms) -> cl_int { cl_int error = runtime->command_queue().enqueueNDRangeKernel( pooling_kernel, cl::NullRange, cl::NDRange(gws[0], gws[1], gws[2]), cl::NDRange(params[0], params[1], params[2]), - NULL, OpenCLRuntime::Get()->GetDefaultEvent()); + nullptr, &event); MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; return error; @@ -101,16 +103,27 @@ static void Pooling(const Tensor *input, << output->dim(1) << "_" << output->dim(2) << "_" << output->dim(3); + OpenCLProfilingTimer timer(&event); Tuner::Get()->template TuneOrRun(ss.str(), lws, params_generator, - func); + func, + &timer); + + future->wait_fn = [runtime, event](CallStats *stats) { + event.wait(); + if (stats != nullptr) { + runtime->GetCallStats(event, stats); + } + }; } template void PoolingFunctor::operator()(const Tensor *input, - Tensor *output) { - MACE_CHECK(dilations_[0] == 1 && dilations_[1] == 1) << "Pooling opencl kernel not support dilation yet"; + Tensor *output, + StatsFuture *future) { + MACE_CHECK(dilations_[0] == 1 && dilations_[1] == 1) + << "Pooling opencl kernel not support dilation yet"; std::vector output_shape(4); std::vector paddings(2); std::vector filter_shape = { @@ -128,7 +141,7 @@ void PoolingFunctor::operator()(const Tensor *input, output->ResizeImage(output_shape, output_image_shape); Pooling(input, strides_, paddings.data(), kernels_[0], pooling_type_, - DataTypeToEnum::value, output); + DataTypeToEnum::value, output, future); } diff --git a/mace/kernels/opencl/relu_opencl.cc b/mace/kernels/opencl/relu_opencl.cc index 483ec8d492df6a755ae8d665245912873960de2d..d74337ecf907828e0fb5905c3f97170cb3b38e84 100644 --- a/mace/kernels/opencl/relu_opencl.cc +++ b/mace/kernels/opencl/relu_opencl.cc @@ -14,7 +14,8 @@ namespace kernels { template void ReluFunctor::operator()(const Tensor *input, - Tensor *output) { + Tensor *output, + StatsFuture *future) { const index_t batch = input->dim(0); const index_t height = input->dim(1); @@ -23,8 +24,7 @@ void ReluFunctor::operator()(const Tensor *input, const index_t channel_blocks = RoundUpDiv4(channels); - auto runtime = OpenCLRuntime::Get(); - auto program = runtime->program(); + auto runtime = OpenCLRuntime::Global(); std::set built_options; auto dt = DataTypeToEnum::value; @@ -74,12 +74,13 @@ void ReluFunctor::operator()(const Tensor *input, {15, 7, 9}, {1, kwg_size, 1}}; }; + cl::Event event; auto func = [&](const std::vector ¶ms) -> cl_int { cl_int error = runtime->command_queue().enqueueNDRangeKernel( relu_kernel, cl::NullRange, cl::NDRange(gws[0], gws[1], gws[2]), cl::NDRange(params[0], params[1], params[2]), - NULL, OpenCLRuntime::Get()->GetDefaultEvent()); + nullptr, &event); MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; return error; @@ -90,10 +91,18 @@ void ReluFunctor::operator()(const Tensor *input, << output->dim(1) << "_" << output->dim(2) << "_" << output->dim(3); + OpenCLProfilingTimer timer(&event); Tuner::Get()->template TuneOrRun(ss.str(), lws, params_generator, - func); + func, + &timer); + future->wait_fn = [runtime, event](CallStats *stats) { + event.wait(); + if (stats != nullptr) { + runtime->GetCallStats(event, stats); + } + }; } template diff --git a/mace/kernels/opencl/resize_bilinear_opencl.cc b/mace/kernels/opencl/resize_bilinear_opencl.cc index a3686e479f29cb76e55f08652fe385ca940e5d10..f8c7aa71e91d70bd385fdcf82fc2a13d7a31d2d7 100644 --- a/mace/kernels/opencl/resize_bilinear_opencl.cc +++ b/mace/kernels/opencl/resize_bilinear_opencl.cc @@ -14,7 +14,7 @@ namespace kernels { template void ResizeBilinearFunctor::operator()( - const Tensor *input, Tensor *output) { + const Tensor *input, Tensor *output, StatsFuture *future) { const index_t batch = input->dim(0); const index_t in_height = input->dim(1); const index_t in_width = input->dim(2); @@ -38,7 +38,7 @@ void ResizeBilinearFunctor::operator()( CalculateResizeScale(in_height, out_height, align_corners_); float width_scale = CalculateResizeScale(in_width, out_width, align_corners_); - auto runtime = OpenCLRuntime::Get(); + auto runtime = OpenCLRuntime::Global(); std::set built_options; auto dt = DataTypeToEnum::value; built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt)); @@ -79,12 +79,13 @@ void ResizeBilinearFunctor::operator()( {1, kwg_size / 128, 128}, {1, kwg_size, 1}}; }; + cl::Event event; auto func = [&](const std::vector ¶ms) -> cl_int { cl_int error = runtime->command_queue().enqueueNDRangeKernel( rb_kernel, cl::NullRange, cl::NDRange(gws[0], gws[1], gws[2]), cl::NDRange(params[0], params[1], params[2]), - NULL, OpenCLRuntime::Get()->GetDefaultEvent()); + nullptr, &event); MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; return error; @@ -95,11 +96,18 @@ void ResizeBilinearFunctor::operator()( << output->dim(1) << "_" << output->dim(2) << "_" << output->dim(3); + OpenCLProfilingTimer timer(&event); Tuner::Get()->template TuneOrRun(ss.str(), lws, params_generator, - func); - + func, + &timer); + future->wait_fn = [runtime, event](CallStats *stats) { + event.wait(); + if (stats != nullptr) { + runtime->GetCallStats(event, stats); + } + }; } template struct ResizeBilinearFunctor; diff --git a/mace/kernels/opencl/space_to_batch_opecl.cc b/mace/kernels/opencl/space_to_batch_opecl.cc index 72590be5e87ca1c5b721972855b8869e397df82c..395b9235697d9fc63237ac91e63ce77a9a642a2b 100644 --- a/mace/kernels/opencl/space_to_batch_opecl.cc +++ b/mace/kernels/opencl/space_to_batch_opecl.cc @@ -17,8 +17,9 @@ template <> void SpaceToBatchFunctor::operator()(Tensor *space_tensor, const Tensor *block_shape_tensor, const Tensor *paddings_tensor, - Tensor *batch_tensor) { - auto runtime = OpenCLRuntime::Get(); + Tensor *batch_tensor, + StatsFuture *future) { + auto runtime = OpenCLRuntime::Global(); std::set built_options; built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(space_tensor->dtype())); auto s2b_kernel = runtime->BuildKernel("space_to_batch", "space_to_batch", built_options); @@ -42,12 +43,19 @@ void SpaceToBatchFunctor::operator()(Tensor *space_te const uint32_t lws[3] = {static_cast(1), static_cast(8), static_cast(128)}; + cl::Event event; cl_int error = runtime->command_queue().enqueueNDRangeKernel( s2b_kernel, cl::NullRange, cl::NDRange(gws[0], gws[1], gws[2]), cl::NDRange(lws[0], lws[1], lws[2]), - NULL, OpenCLRuntime::Get()->GetDefaultEvent()); + nullptr, &event); MACE_CHECK(error == CL_SUCCESS); + future->wait_fn = [runtime, event](CallStats *stats) { + event.wait(); + if (stats != nullptr) { + runtime->GetCallStats(event, stats); + } + }; } } // namespace kernels diff --git a/mace/kernels/pooling.h b/mace/kernels/pooling.h index 0a1960a4e7e891d6c71d1841cf672b3a48a83fdb..1c1d90b2fd035eec8c644404c7f721414d1952a1 100644 --- a/mace/kernels/pooling.h +++ b/mace/kernels/pooling.h @@ -6,6 +6,7 @@ #define MACE_KERNELS_POOLING_H #include +#include "mace/core/future.h" #include "mace/core/tensor.h" #include "mace/kernels/conv_pool_2d_util.h" @@ -49,7 +50,8 @@ struct PoolingFunctor : PoolingFunctorBase { dilations) {} void operator()(const Tensor *input_tensor, - Tensor *output_tensor) { + Tensor *output_tensor, + StatsFuture *future) { std::vector output_shape(4); std::vector paddings(2); @@ -153,7 +155,8 @@ struct PoolingFunctor : PoolingFunctorBase { template<> void PoolingFunctor::operator()( const Tensor *input_tensor, - Tensor *output_tensor); + Tensor *output_tensor, + StatsFuture *future); template struct PoolingFunctor : PoolingFunctorBase { @@ -166,7 +169,8 @@ struct PoolingFunctor : PoolingFunctorBase { strides, padding, dilations) {} void operator()(const Tensor *input_tensor, - Tensor *output_tensor); + Tensor *output_tensor, + StatsFuture *future); }; } // namespace kernels diff --git a/mace/kernels/relu.h b/mace/kernels/relu.h index ea94c79a96ad523653ec0defd53902af3ac698f6..19135f5e83b1d8021505429f9cf03879ff0728e4 100644 --- a/mace/kernels/relu.h +++ b/mace/kernels/relu.h @@ -5,6 +5,7 @@ #ifndef MACE_KERNELS_RELU_H_ #define MACE_KERNELS_RELU_H_ +#include "mace/core/future.h" #include "mace/core/tensor.h" namespace mace { @@ -14,7 +15,7 @@ template struct ReluFunctor { T max_limit_; - void operator()(const Tensor *input, Tensor *output) { + void operator()(const Tensor *input, Tensor *output, StatsFuture *future) { const T *input_ptr = input->data(); T *output_ptr = output->mutable_data(); index_t size = input->size(); @@ -32,13 +33,14 @@ struct ReluFunctor { template <> void ReluFunctor::operator()(const Tensor *input, - Tensor *output); + Tensor *output, + StatsFuture *future); template struct ReluFunctor { T max_limit_; - void operator()(const Tensor *input, Tensor *output); + void operator()(const Tensor *input, Tensor *output, StatsFuture *future); }; } // namespace kernels diff --git a/mace/kernels/resize_bilinear.h b/mace/kernels/resize_bilinear.h index c365243ad1b2b659082d22d348e81045d1318716..8d3d7fe3a4baa73ada8aff5e59bbc7d7c3a46c71 100644 --- a/mace/kernels/resize_bilinear.h +++ b/mace/kernels/resize_bilinear.h @@ -4,6 +4,7 @@ #ifndef MACE_KERNELS_RESIZE_BILINEAR_H_ #define MACE_KERNELS_RESIZE_BILINEAR_H_ +#include "mace/core/future.h" #include "mace/core/tensor.h" namespace mace { @@ -122,7 +123,7 @@ struct ResizeBilinearFunctor : ResizeBilinearFunctorBase { ResizeBilinearFunctor(const std::vector &size, bool align_corners) : ResizeBilinearFunctorBase(size, align_corners) {} - void operator()(const Tensor *input, Tensor *output) { + void operator()(const Tensor *input, Tensor *output, StatsFuture *future) { const index_t batch = input->dim(0); const index_t in_height = input->dim(1); const index_t in_width = input->dim(2); @@ -167,7 +168,7 @@ struct ResizeBilinearFunctor : ResizeBilinearFunctorBase ResizeBilinearFunctor(const std::vector &size, bool align_corners) : ResizeBilinearFunctorBase(size, align_corners) {} - void operator()(const Tensor *input, Tensor *output); + void operator()(const Tensor *input, Tensor *output, StatsFuture *future); }; } // namespace kernels diff --git a/mace/kernels/space_to_batch.h b/mace/kernels/space_to_batch.h index 4f7bd1afe644e52423ca8688ee04289ca014f64d..e29c565b1e4ecf7615da02b1153604f2659eafa6 100644 --- a/mace/kernels/space_to_batch.h +++ b/mace/kernels/space_to_batch.h @@ -5,6 +5,7 @@ #ifndef MACE_KERNELS_CONV_2D_H_ #define MACE_KERNELS_CONV_2D_H_ +#include "mace/core/future.h" #include "mace/core/tensor.h" #include "mace/core/mace.h" @@ -18,7 +19,8 @@ struct SpaceToBatchFunctor { void operator()(Tensor *input_tensor, const Tensor *block_shape_tensor, const Tensor *paddings_tensor, - Tensor *output_tensor) { + Tensor *output_tensor, + StatsFuture *future) { MACE_NOT_IMPLEMENTED; } @@ -29,7 +31,8 @@ template <> void SpaceToBatchFunctor::operator()(Tensor *input_tensor, const Tensor *block_shape_tensor, const Tensor *paddings_tensor, - Tensor *output); + Tensor *output, + StatsFuture *future); } // namespace kernels } // namespace mace diff --git a/mace/mace.bzl b/mace/mace.bzl index 757334a8b8c0d5b104afd19bd9654ddec24b3eeb..af6fe583544d4d80dcc3c794b25480b7fb82da88 100644 --- a/mace/mace.bzl +++ b/mace/mace.bzl @@ -24,8 +24,8 @@ def if_android_arm64(a): "//conditions:default": [], }) -def if_profiling(a): +def if_profiling_enabled(a): return select({ - "//mace:is_profiling": a, + "//mace:profiling_enabled": a, "//conditions:default": [], - }) +}) diff --git a/mace/ops/BUILD b/mace/ops/BUILD index a1ba1eebe3a06559dccab4d3dcedd2b32f1600fa..1da2972539c157e286ca8f32460b575e247046c2 100644 --- a/mace/ops/BUILD +++ b/mace/ops/BUILD @@ -34,10 +34,7 @@ cc_library( ["*.h"], exclude = ["ops_test_util.h"], ), - copts = [ - "-std=c++11", - "-D_GLIBCXX_USE_C99_MATH_TR1", - ], + copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"], deps = [ "//mace/kernels", ], @@ -50,7 +47,7 @@ cc_test( srcs = glob( ["*_test.cc"], ), - copts = ["-std=c++11"], + copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"], linkopts = ["-fopenmp"], linkstatic = 1, deps = [ @@ -64,12 +61,8 @@ cc_test( name = "ops_benchmark", testonly = 1, srcs = glob(["*_benchmark.cc"]), - copts = [ - "-std=c++11", - "-fopenmp", - "-D_GLIBCXX_USE_C99_MATH_TR1", - ], - linkopts = ["-fopenmp"] + if_android(["-ldl"]), + copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"], + linkopts = ["-fopenmp"], linkstatic = 1, deps = [ ":ops", diff --git a/mace/ops/addn.h b/mace/ops/addn.h index 155c6830b6aa14e072e3ba67f68ee6421aa427c1..fc984c3bd3efe27afe96418b84817682c1b6f61f 100644 --- a/mace/ops/addn.h +++ b/mace/ops/addn.h @@ -16,7 +16,7 @@ class AddNOp : public Operator { AddNOp(const OperatorDef &operator_def, Workspace *ws) : Operator(operator_def, ws) {} - bool Run() override { + bool Run(StatsFuture *future) override { Tensor *output_tensor = this->outputs_[0]; int n = this->inputs_.size(); vector inputs(n, nullptr); @@ -24,7 +24,7 @@ class AddNOp : public Operator { inputs[i] = this->inputs_[i]; } - functor_(inputs, output_tensor); + functor_(inputs, output_tensor, future); return true; } diff --git a/mace/ops/batch_norm.h b/mace/ops/batch_norm.h index 3253dcfc4df41bd7321f058ce061f4cb4938a20b..96c4a1fc1c42bc9596a37434b1660277ec11ea7d 100644 --- a/mace/ops/batch_norm.h +++ b/mace/ops/batch_norm.h @@ -19,7 +19,7 @@ class BatchNormOp : public Operator { OperatorBase::GetSingleArgument("epsilon", static_cast(1e-4)); } - bool Run() override { + bool Run(StatsFuture *future) override { const Tensor *input = this->Input(INPUT); const Tensor *scale = this->Input(SCALE); const Tensor *offset = this->Input(OFFSET); @@ -40,7 +40,7 @@ class BatchNormOp : public Operator { Tensor *output = this->Output(OUTPUT); output->ResizeLike(input); - functor_(input, scale, offset, mean, var, output); + functor_(input, scale, offset, mean, var, output, future); return true; } diff --git a/mace/ops/batch_norm_test.cc b/mace/ops/batch_norm_test.cc index 1093cbb9a45bde6057c9429cc038016e56073865..6cc6eea342f26d4ac124bed7d7de3d89ad877fe2 100644 --- a/mace/ops/batch_norm_test.cc +++ b/mace/ops/batch_norm_test.cc @@ -88,7 +88,7 @@ TEST_F(BatchNormOpTest, SimpleRandomNeon) { index_t height = 64; index_t width = 64; // Construct graph - auto &net = test_net(); + OpsTestNet net; OpDefBuilder("BatchNorm", "BatchNormTest") .Input("Input") .Input("Scale") @@ -129,7 +129,7 @@ TEST_F(BatchNormOpTest, ComplexRandomNeon) { index_t height = 103; index_t width = 113; // Construct graph - auto &net = test_net(); + OpsTestNet net; OpDefBuilder("BatchNorm", "BatchNormTest") .Input("Input") .Input("Scale") @@ -172,7 +172,7 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) { index_t width = 64; // Construct graph - auto &net = test_net(); + OpsTestNet net; OpDefBuilder("BatchNorm", "BatchNormTest") .Input("Input") .Input("Scale") @@ -237,7 +237,7 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) { index_t width = 64; // Construct graph - auto &net = test_net(); + OpsTestNet net; OpDefBuilder("BatchNorm", "BatchNormTest") .Input("Input") .Input("Scale") @@ -303,7 +303,7 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) { index_t width = 113; // Construct graph - auto &net = test_net(); + OpsTestNet net; OpDefBuilder("BatchNorm", "BatchNormTest") .Input("Input") .Input("Scale") @@ -369,7 +369,7 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) { index_t width = 113; // Construct graph - auto &net = test_net(); + OpsTestNet net; OpDefBuilder("BatchNorm", "BatchNormTest") .Input("Input") .Input("Scale") diff --git a/mace/ops/batch_to_space.h b/mace/ops/batch_to_space.h index 14b6444553860935d4fe4add7e353727b0d74c96..286b67731cd7a00f338e2db62b3d81ece60990ec 100644 --- a/mace/ops/batch_to_space.h +++ b/mace/ops/batch_to_space.h @@ -53,14 +53,14 @@ class BatchToSpaceNDOp: public Operator { BatchToSpaceNDOp(const OperatorDef &op_def, Workspace *ws) : Operator(op_def, ws), functor_(true) {} - bool Run() override { + bool Run(StatsFuture *future) override { const Tensor *input_tensor = this->Input(INPUT); const Tensor *block_shape_tensor = this->Input(BLOCK_SHAPE); const Tensor *cropped_tensor = this->Input(CROPS); Tensor *output = this->Output(OUTPUT); BatchToSpaceHelper(input_tensor, block_shape_tensor, cropped_tensor, output); - functor_(output, block_shape_tensor, cropped_tensor, const_cast(input_tensor)); + functor_(output, block_shape_tensor, cropped_tensor, const_cast(input_tensor), future); return true; } diff --git a/mace/ops/bias_add.h b/mace/ops/bias_add.h index 54b426042181ebb6a87e2eb4b29cc86b20cc1dca..0088bfbd3bdbb3d3f51288fc3afb4fd18e5d8900 100644 --- a/mace/ops/bias_add.h +++ b/mace/ops/bias_add.h @@ -16,7 +16,7 @@ class BiasAddOp : public Operator { BiasAddOp(const OperatorDef &operator_def, Workspace *ws) : Operator(operator_def, ws), functor_() {} - bool Run() override { + bool Run(StatsFuture *future) override { const Tensor *input = this->Input(INPUT); const Tensor *bias = this->Input(BIAS); @@ -28,7 +28,7 @@ class BiasAddOp : public Operator { Tensor *output = this->Output(OUTPUT); output->ResizeLike(input); - functor_(input, bias, output); + functor_(input, bias, output, future); return true; } diff --git a/mace/ops/bias_add_test.cc b/mace/ops/bias_add_test.cc index 8cab655b1472f3056ce1bf65c015ee50525fb26a..b9e347388a8af57df065ad76befe6aea1a25eb2f 100644 --- a/mace/ops/bias_add_test.cc +++ b/mace/ops/bias_add_test.cc @@ -68,7 +68,7 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) { index_t width = 64 + rand() % 50; // Construct graph - auto &net = test_net(); + OpsTestNet net; OpDefBuilder("BiasAdd", "BiasAddTest") .Input("Input") .Input("Bias") @@ -114,7 +114,7 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) { index_t width = 113 + rand() % 100; // Construct graph - auto &net = test_net(); + OpsTestNet net; OpDefBuilder("BiasAdd", "BiasAddTest") .Input("Input") .Input("Bias") diff --git a/mace/ops/buffer_to_image.h b/mace/ops/buffer_to_image.h index cd946962024dea6bd36893b17cb9b9edeecd4cf6..40c412b928c788be3ebc6aeb3767cb4b8d664d09 100644 --- a/mace/ops/buffer_to_image.h +++ b/mace/ops/buffer_to_image.h @@ -16,14 +16,14 @@ class BufferToImageOp: public Operator { BufferToImageOp(const OperatorDef &op_def, Workspace *ws) : Operator(op_def, ws) {} - bool Run() override { + bool Run(StatsFuture *future) override { const Tensor *input_tensor = this->Input(INPUT); kernels::BufferType type = static_cast(OperatorBase::GetSingleArgument( "buffer_type", static_cast(kernels::FILTER))); Tensor *output = this->Output(OUTPUT); - functor_(const_cast(input_tensor), type, output); + functor_(const_cast(input_tensor), type, output, future); return true; } diff --git a/mace/ops/channel_shuffle.h b/mace/ops/channel_shuffle.h index 53cd8aee31355f4f58cbe7fe942d028f0413e999..9f6b19bebf92072f77d075bda35801888135d71c 100644 --- a/mace/ops/channel_shuffle.h +++ b/mace/ops/channel_shuffle.h @@ -20,7 +20,7 @@ class ChannelShuffleOp : public Operator { group_(OperatorBase::GetSingleArgument("group", 1)), functor_(this->group_) {} - bool Run() override { + bool Run(StatsFuture *future) override { const Tensor *input = this->Input(INPUT); Tensor *output = this->Output(OUTPUT); MACE_CHECK(input->shape()[1] % group_ == 0, @@ -29,7 +29,7 @@ class ChannelShuffleOp : public Operator { output->ResizeLike(input); functor_(input->data(), input->shape().data(), - output->mutable_data()); + output->mutable_data(), future); return true; } diff --git a/mace/ops/channel_shuffle_test.cc b/mace/ops/channel_shuffle_test.cc index ca30029d2053c41e702cefe39baccc3d72293dfb..f42e3b1b6caf492058a1d00359fd99bdf587343c 100644 --- a/mace/ops/channel_shuffle_test.cc +++ b/mace/ops/channel_shuffle_test.cc @@ -10,7 +10,7 @@ class ChannelShuffleOpTest : public OpsTestBase {}; TEST_F(ChannelShuffleOpTest, C8G4) { // Construct graph - auto &net = test_net(); + OpsTestNet net; OpDefBuilder("ChannelShuffle", "ChannelShuffleTest") .Input("Input") .Output("Output") diff --git a/mace/ops/concat.h b/mace/ops/concat.h index 0edf34551b1718e365873efb7758ea79d71fe797..27bc9012e8a00cdf5bdb7d76aa422b361a660d41 100644 --- a/mace/ops/concat.h +++ b/mace/ops/concat.h @@ -17,7 +17,7 @@ class ConcatOp : public Operator { : Operator(op_def, ws), functor_(OperatorBase::GetSingleArgument("axis", 3)){} - bool Run() override { + bool Run(StatsFuture *future) override { MACE_CHECK(this->InputSize() >= 2) << "There must be at least two inputs to concat"; const std::vector input_list = this->Inputs(); const int32_t concat_axis = OperatorBase::GetSingleArgument("axis", 3); @@ -30,7 +30,7 @@ class ConcatOp : public Operator { Tensor *output = this->Output(OUTPUT); - functor_(input_list, output); + functor_(input_list, output, future); return true; } diff --git a/mace/ops/concat_test.cc b/mace/ops/concat_test.cc index 8a42899e49183dd35fcfaf804679b4807219688b..ca1c06d6197c2a16d8efd4b2ea8cfba13bafccdb 100644 --- a/mace/ops/concat_test.cc +++ b/mace/ops/concat_test.cc @@ -12,7 +12,7 @@ class ConcatOpTest : public OpsTestBase {}; TEST_F(ConcatOpTest, CPUSimpleHorizon) { // Construct graph - auto &net = test_net(); + OpsTestNet net; OpDefBuilder("Concat", "ConcatTest") .Input("Input0") .Input("Input1") @@ -49,7 +49,7 @@ TEST_F(ConcatOpTest, CPUSimpleHorizon) { TEST_F(ConcatOpTest, CPUSimpleVertical) { // Construct graph - auto &net = test_net(); + OpsTestNet net; OpDefBuilder("Concat", "ConcatTest") .Input("Input0") .Input("Input1") @@ -92,7 +92,7 @@ TEST_F(ConcatOpTest, CPURandom) { int num_inputs = 2 + rand() % 10; int axis = rand() % dim; // Construct graph - auto &net = test_net(); + OpsTestNet net; auto builder = OpDefBuilder("Concat", "ConcatTest"); for (int i = 0; i < num_inputs; ++i) { builder = builder.Input(("Input" + ToString(i)).c_str()); diff --git a/mace/ops/conv_2d.h b/mace/ops/conv_2d.h index f02455d3840699c00bc98580617c7e07e39190b0..1df020118e3e7f2c290fd5cbb6424015b9559cec 100644 --- a/mace/ops/conv_2d.h +++ b/mace/ops/conv_2d.h @@ -22,13 +22,13 @@ class Conv2dOp : public ConvPool2dOpBase { this->dilations_.data()) { } - bool Run() override { + bool Run(StatsFuture *future) override { const Tensor *input = this->Input(INPUT); const Tensor *filter = this->Input(FILTER); const Tensor *bias = this->InputSize() >= 3 ? this->Input(BIAS) : nullptr; Tensor *output = this->Output(OUTPUT); - functor_(input, filter, bias, output); + functor_(input, filter, bias, output, future); return true; } diff --git a/mace/ops/conv_2d_test.cc b/mace/ops/conv_2d_test.cc index 711bf3891211451429fc3ad0e80e1f55611a4b70..e39c9b740049e96af68e04315872d6b5c30e98a3 100644 --- a/mace/ops/conv_2d_test.cc +++ b/mace/ops/conv_2d_test.cc @@ -78,11 +78,12 @@ void TestSimple3x3SAME() { ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.001); } - +#if __ARM_NEON TEST_F(Conv2dOpTest, NEONSimple) { TestSimple3x3VALID(); TestSimple3x3SAME(); } +#endif template void TestNHWCSimple3x3VALID() { @@ -233,9 +234,11 @@ void TestSimple3x3WithoutBias() { } +#ifdef __ARM_NEON TEST_F(Conv2dOpTest, NEONWithouBias) { TestSimple3x3WithoutBias(); } +#endif template void TestNHWCSimple3x3WithoutBias() { @@ -335,9 +338,11 @@ static void TestCombined3x3() { } +#ifdef __ARM_NEON TEST_F(Conv2dOpTest, NEONCombined) { TestCombined3x3(); } +#endif template static void TestNHWCCombined3x3() { diff --git a/mace/ops/depthwise_conv2d.h b/mace/ops/depthwise_conv2d.h index d4812def1fb1aaa534148e0951de5f06cec60564..9b36e566d0d8a4715f86a2d6cde93dc503364398 100644 --- a/mace/ops/depthwise_conv2d.h +++ b/mace/ops/depthwise_conv2d.h @@ -23,7 +23,7 @@ class DepthwiseConv2dOp : public ConvPool2dOpBase { functor_.dilations_ = this->dilations_.data(); } - bool Run() override { + bool Run(StatsFuture *future) override { const Tensor *input = this->Input(INPUT); const Tensor *filter = this->Input(FILTER); const Tensor *bias = nullptr; @@ -46,7 +46,7 @@ class DepthwiseConv2dOp : public ConvPool2dOpBase { output->Resize(output_shape); functor_.paddings_ = paddings; - functor_(input, filter, bias, output); + functor_(input, filter, bias, output, future); return true; } diff --git a/mace/ops/depthwise_conv2d_test.cc b/mace/ops/depthwise_conv2d_test.cc index 05cd5d7a204016c76b9f571e6c4b8581a8c32e21..b3fbdeb21bd12082437f4144d30c00298397842c 100644 --- a/mace/ops/depthwise_conv2d_test.cc +++ b/mace/ops/depthwise_conv2d_test.cc @@ -96,27 +96,33 @@ void TestNxNS12(const index_t height, const index_t width) { } +#if __ARM_NEON TEST_F(DepthwiseConv2dOpTest, NeonSimpleNxNS12) { TestNxNS12(4, 4); } +#endif TEST_F(DepthwiseConv2dOpTest, OpenCLSimpleNxNS12) { TestNxNS12(4, 4); } +#if __ARM_NEON TEST_F(DepthwiseConv2dOpTest, NeonAlignedNxNS12) { TestNxNS12(64, 64); TestNxNS12(128, 128); } +#endif TEST_F(DepthwiseConv2dOpTest, OpenCLAlignedNxNS12) { TestNxNS12(64, 64); TestNxNS12(128, 128); } +#if __ARM_NEON TEST_F(DepthwiseConv2dOpTest, NeonUnalignedNxNS12) { TestNxNS12(107, 113); } +#endif TEST_F(DepthwiseConv2dOpTest, OpenCLUnalignedNxNS12) { TestNxNS12(107, 113); diff --git a/mace/ops/fused_conv_2d.h b/mace/ops/fused_conv_2d.h index c6baafeaa27365141168511facafb68cc3573073..8ad36d12b019f7c1373725254853be043f86437c 100644 --- a/mace/ops/fused_conv_2d.h +++ b/mace/ops/fused_conv_2d.h @@ -22,13 +22,13 @@ class FusedConv2dOp : public ConvPool2dOpBase { this->dilations_.data()) { } - bool Run() override { + bool Run(StatsFuture *future) override { const Tensor *input = this->Input(INPUT); const Tensor *filter = this->Input(FILTER); const Tensor *bias = this->InputSize() > 2 ? this->Input(BIAS) : nullptr; Tensor *output = this->Output(OUTPUT); - functor_(input, filter, bias, output); + functor_(input, filter, bias, output, future); return true; } diff --git a/mace/ops/global_avg_pooling.h b/mace/ops/global_avg_pooling.h index 117857c13e39f4cc0d7b7b51691d7f6bc52ac0fe..c5ccf3457bc0b763ab1a7a92e54f9e1b8e2c5310 100644 --- a/mace/ops/global_avg_pooling.h +++ b/mace/ops/global_avg_pooling.h @@ -16,7 +16,7 @@ class GlobalAvgPoolingOp : public Operator { GlobalAvgPoolingOp(const OperatorDef &operator_def, Workspace *ws) : Operator(operator_def, ws) {} - bool Run() override { + bool Run(StatsFuture *future) override { const Tensor *input = this->Input(INPUT); Tensor *output = this->Output(OUTPUT); @@ -29,7 +29,7 @@ class GlobalAvgPoolingOp : public Operator { auto pooling_func = kernels::GlobalAvgPoolingFunctor(); pooling_func(input->data(), input->shape().data(), - output->mutable_data()); + output->mutable_data(), future); return true; } diff --git a/mace/ops/global_avg_pooling_test.cc b/mace/ops/global_avg_pooling_test.cc index da82e53435c043da7e2d6ad618201374bc9a5568..cb12c0d489d50ee7315a3aa7e6411b6a8c792aa0 100644 --- a/mace/ops/global_avg_pooling_test.cc +++ b/mace/ops/global_avg_pooling_test.cc @@ -10,7 +10,7 @@ class GlobalAvgPoolingOpTest : public OpsTestBase {}; TEST_F(GlobalAvgPoolingOpTest, 3x7x7_CPU) { // Construct graph - auto &net = test_net(); + OpsTestNet net; OpDefBuilder("GlobalAvgPooling", "GlobalAvgPoolingTest") .Input("Input") .Output("Output") @@ -32,9 +32,10 @@ TEST_F(GlobalAvgPoolingOpTest, 3x7x7_CPU) { ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.001); } +#if __ARM_NEON TEST_F(GlobalAvgPoolingOpTest, 3x7x7_NEON) { // Construct graph - auto &net = test_net(); + OpsTestNet net; OpDefBuilder("GlobalAvgPooling", "GlobalAvgPoolingTest") .Input("Input") .Output("Output") @@ -55,3 +56,4 @@ TEST_F(GlobalAvgPoolingOpTest, 3x7x7_NEON) { ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.001); } +#endif diff --git a/mace/ops/image_to_buffer.h b/mace/ops/image_to_buffer.h index 37465728e9a5b88fab07b1106dac5af1345a9548..9b07b82922428e4b2751e834813c21b4a30014cd 100644 --- a/mace/ops/image_to_buffer.h +++ b/mace/ops/image_to_buffer.h @@ -16,13 +16,13 @@ class ImageToBufferOp: public Operator { ImageToBufferOp(const OperatorDef &op_def, Workspace *ws) : Operator(op_def, ws), functor_(true) {} - bool Run() override { + bool Run(StatsFuture *future) override { const Tensor *input_tensor = this->Input(INPUT); Tensor *output = this->Output(OUTPUT); kernels::BufferType type = static_cast(OperatorBase::GetSingleArgument( "buffer_type", static_cast(kernels::FILTER))); - functor_(output, type, const_cast(input_tensor)); + functor_(output, type, const_cast(input_tensor), future); return true; } diff --git a/mace/ops/ops_test_util.h b/mace/ops/ops_test_util.h index 3d8c809be96c2cd92dd136a3ca14a24eeadad780..d03e43de91f2c79a4424a9df2efb2250b340f43b 100644 --- a/mace/ops/ops_test_util.h +++ b/mace/ops/ops_test_util.h @@ -176,7 +176,7 @@ class OpsTestNet { void Sync() { if (net_ && device_ == DeviceType::OPENCL) { - OpenCLRuntime::Get()->command_queue().finish(); + OpenCLRuntime::Global()->command_queue().finish(); } } @@ -188,20 +188,14 @@ class OpsTestNet { }; class OpsTestBase : public ::testing::Test { - public: - OpsTestNet &test_net() { return test_net_; }; - protected: - virtual void TearDown() { - auto ws = test_net_.ws(); - auto tensor_names = ws->Tensors(); - for (auto &name : tensor_names) { - ws->RemoveTensor(name); - } + virtual void SetUp() { + // OpenCLRuntime::CreateGlobal(); } - private: - OpsTestNet test_net_; + virtual void TearDown() { + // OpenCLRuntime::DestroyGlobal(); + } }; template diff --git a/mace/ops/pooling.h b/mace/ops/pooling.h index bbc653ab75d627a412d5fcdfaf5c67772658f24f..43250852f8d54de156f4408bd0f256a464b98fc7 100644 --- a/mace/ops/pooling.h +++ b/mace/ops/pooling.h @@ -23,11 +23,11 @@ class PoolingOp : public ConvPool2dOpBase { functor_(pooling_type_, kernels_.data(), this->strides_.data(), this->padding_, this->dilations_.data()){}; - bool Run() override { + bool Run(StatsFuture *future) override { const Tensor *input = this->Input(INPUT); Tensor *output = this->Output(OUTPUT); - functor_(input, output); + functor_(input, output, future); return true; }; diff --git a/mace/ops/pooling_test.cc b/mace/ops/pooling_test.cc index dcda06b75483e6e0e01cfe16594991d72171d2bf..1e1a992ab620cc6ea377191d3fe3a215d822b943 100644 --- a/mace/ops/pooling_test.cc +++ b/mace/ops/pooling_test.cc @@ -15,7 +15,7 @@ class PoolingOpTest : public OpsTestBase {}; TEST_F(PoolingOpTest, MAX_VALID) { // Construct graph - auto &net = test_net(); + OpsTestNet net; OpDefBuilder("Pooling", "PoolingTest") .Input("Input") .Output("Output") @@ -45,7 +45,7 @@ TEST_F(PoolingOpTest, MAX_VALID) { TEST_F(PoolingOpTest, MAX_SAME) { // Construct graph - auto &net = test_net(); + OpsTestNet net; OpDefBuilder("Pooling", "PoolingTest") .Input("Input") .Output("Output") @@ -71,7 +71,7 @@ TEST_F(PoolingOpTest, MAX_SAME) { TEST_F(PoolingOpTest, MAX_VALID_DILATION) { // Construct graph - auto &net = test_net(); + OpsTestNet net; OpDefBuilder("Pooling", "PoolingTest") .Input("Input") .Output("Output") @@ -98,7 +98,7 @@ TEST_F(PoolingOpTest, MAX_VALID_DILATION) { TEST_F(PoolingOpTest, MAX_k2x2s2x2) { // Construct graph - auto &net = test_net(); + OpsTestNet net; OpDefBuilder("Pooling", "PoolingTest") .Input("Input") .Output("Output") @@ -245,7 +245,7 @@ TEST_F(PoolingOpTest, OPENCLUnalignedMaxPooling3S2) { TEST_F(PoolingOpTest, AVG_VALID) { // Construct graph - auto &net = test_net(); + OpsTestNet net; OpDefBuilder("Pooling", "PoolingTest") .Input("Input") .Output("Output") diff --git a/mace/ops/relu.h b/mace/ops/relu.h index 613776a870b736b9a2166339301674bf484fe4fe..489e21d12f6f5ebb79db6581d0c902427c49ef4b 100644 --- a/mace/ops/relu.h +++ b/mace/ops/relu.h @@ -18,12 +18,12 @@ class ReluOp : public Operator { functor_.max_limit_ = OperatorBase::GetSingleArgument("max_limit", static_cast(-1)); } - bool Run() override { + bool Run(StatsFuture *future) override { const Tensor *input_tensor = this->inputs_[0]; Tensor *output_tensor = this->outputs_[0]; output_tensor->ResizeLike(input_tensor); - functor_(input_tensor, output_tensor); + functor_(input_tensor, output_tensor, future); return true; } diff --git a/mace/ops/relu_test.cc b/mace/ops/relu_test.cc index 879ecb3211f75ebeb791467ae152d40e653ccdc9..e74b927ef98a814c6bf0548909b345dae273acc2 100644 --- a/mace/ops/relu_test.cc +++ b/mace/ops/relu_test.cc @@ -53,9 +53,11 @@ TEST_F(ReluOpTest, CPUSimple) { TestSimple(); } +#if __ARM_NEON TEST_F(ReluOpTest, NEONSimple) { TestSimple(); } +#endif TEST_F(ReluOpTest, OPENCLSimple) { TestSimple(); @@ -103,9 +105,11 @@ TEST_F(ReluOpTest, CPUUnalignedSimple) { TestUnalignedSimple(); } +#if __ARM_NEON TEST_F(ReluOpTest, NEONUnalignedSimple) { TestUnalignedSimple(); } +#endif TEST_F(ReluOpTest, OPENCLUnalignedSimple) { TestUnalignedSimple(); @@ -157,9 +161,11 @@ TEST_F(ReluOpTest, CPUSimpleReluX) { TestSimpleReluX(); } +#if __ARM_NEON TEST_F(ReluOpTest, NEONSimpleReluX) { TestSimpleReluX(); } +#endif TEST_F(ReluOpTest, OPENCLSimpleReluX) { TestSimpleReluX(); @@ -209,9 +215,11 @@ TEST_F(ReluOpTest, CPUUnalignedSimpleReluX) { TestUnalignedSimpleReluX(); } +#if __ARM_NEON TEST_F(ReluOpTest, NEONUnalignedSimpleReluX) { TestUnalignedSimpleReluX(); } +#endif TEST_F(ReluOpTest, OPENCLUnalignedSimpleReluX) { TestUnalignedSimpleReluX(); diff --git a/mace/ops/resize_bilinear.h b/mace/ops/resize_bilinear.h index 0e814f53e48dbf709adc3dc7b577956e00788cb6..bea852b529bb068cd0cee0e731fed29770aed157 100644 --- a/mace/ops/resize_bilinear.h +++ b/mace/ops/resize_bilinear.h @@ -19,14 +19,14 @@ class ResizeBilinearOp : public Operator { OperatorBase::GetRepeatedArgument("size", {-1, -1}), OperatorBase::GetSingleArgument("align_corners", false)) {} - bool Run() override { + bool Run(StatsFuture *future) override { const Tensor *input = this->Input(0); Tensor *output = this->Output(0); MACE_CHECK(input->dim_size() == 4, "input must be 4-dimensional.", input->dim_size()); - functor_(input, output); + functor_(input, output, future); return true; } diff --git a/mace/ops/resize_bilinear_benchmark.cc b/mace/ops/resize_bilinear_benchmark.cc index d9453908c11bff15ad8ee3c996af03523d6fb7d1..9a51b03caefe335a3cf1f98735076313f86e6060 100644 --- a/mace/ops/resize_bilinear_benchmark.cc +++ b/mace/ops/resize_bilinear_benchmark.cc @@ -73,7 +73,6 @@ static void ResizeBilinearBenchmark(int iters, BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, TYPE, OPENCL); // SNPE 835 GPU: 6870us -BM_RESIZE_BILINEAR(1, 128, 120, 120, 480, 480, half); BM_RESIZE_BILINEAR(1, 128, 120, 120, 480, 480, float); BM_RESIZE_BILINEAR(1, 256, 7, 7, 15, 15, float); diff --git a/mace/ops/resize_bilinear_test.cc b/mace/ops/resize_bilinear_test.cc index fedb12e9f847b774919f6f0cad555e808009c206..8d7f2d5579e24c8cf097efd46ee2c1b493f1b147 100644 --- a/mace/ops/resize_bilinear_test.cc +++ b/mace/ops/resize_bilinear_test.cc @@ -13,7 +13,7 @@ class ResizeBilinearTest : public OpsTestBase {}; TEST_F(ResizeBilinearTest, CPUResizeBilinearWOAlignCorners) { testing::internal::LogToStderr(); // Construct graph - auto &net = test_net(); + OpsTestNet net; OpDefBuilder("ResizeBilinear", "ResizeBilinearTest") .Input("Input") .Output("Output") @@ -37,7 +37,7 @@ TEST_F(ResizeBilinearTest, CPUResizeBilinearWOAlignCorners) { TEST_F(ResizeBilinearTest, ResizeBilinearWAlignCorners) { testing::internal::LogToStderr(); // Construct graph - auto &net = test_net(); + OpsTestNet net; OpDefBuilder("ResizeBilinear", "ResizeBilinearTest") .Input("Input") .Output("Output") diff --git a/mace/ops/space_to_batch.h b/mace/ops/space_to_batch.h index 079697d495a163912ca4443823fdca15f4d1bda2..6abc7772e71a55baeb89592661c745e9598c09ed 100644 --- a/mace/ops/space_to_batch.h +++ b/mace/ops/space_to_batch.h @@ -52,14 +52,14 @@ class SpaceToBatchNDOp : public Operator { SpaceToBatchNDOp(const OperatorDef &op_def, Workspace *ws) : Operator(op_def, ws) {} - bool Run() override { + bool Run(StatsFuture *future) override { const Tensor *input_tensor = this->Input(INPUT); const Tensor *block_shape_tensor = this->Input(BLOCK_SHAPE); const Tensor *paddings_tensor = this->Input(PADDINGS); Tensor *output = this->Output(OUTPUT); SpaceToBatchHelper(input_tensor, block_shape_tensor, paddings_tensor, output); - functor_(const_cast(input_tensor), block_shape_tensor, paddings_tensor, output); + functor_(const_cast(input_tensor), block_shape_tensor, paddings_tensor, output, future); return true; } diff --git a/mace/proto/BUILD b/mace/proto/BUILD index e166ade2bf1b5d1628b815279bc627d3460beac6..d46aa81266f71c5847080cf2b59369b3305bb467 100644 --- a/mace/proto/BUILD +++ b/mace/proto/BUILD @@ -10,16 +10,6 @@ licenses(["notice"]) # Apache 2.0 load("@com_google_protobuf//:protobuf.bzl", "py_proto_library") -proto_library( - name = "stats", - srcs = ["stats.proto"], -) - -cc_proto_library( - name = "stats_proto", - deps = [":stats"], -) - py_proto_library( name = "mace_py", srcs = ["mace.proto"], diff --git a/mace/proto/stats.proto b/mace/proto/stats.proto deleted file mode 100644 index 6e06b4675e793879226fe2ac155bb38f468b3d0a..0000000000000000000000000000000000000000 --- a/mace/proto/stats.proto +++ /dev/null @@ -1,16 +0,0 @@ -syntax = "proto2"; - -package mace; - -message OperatorStats { - optional string operator_name = 1; - optional string type = 2; - optional int64 all_start_micros = 3; - optional int64 op_start_rel_micros = 4; - optional int64 op_end_rel_micros = 5; - optional int64 all_end_rel_micros = 6; -}; - -message RunMetadata { - repeated OperatorStats op_stats = 1; -} diff --git a/mace/python/tools/BUILD b/mace/python/tools/BUILD index fbe406d33df00839fdd53d0db72790f6eee3e424..f3a75c21f9b82789567a70bbb2cb137d349b0e63 100644 --- a/mace/python/tools/BUILD +++ b/mace/python/tools/BUILD @@ -43,12 +43,3 @@ py_binary( "//mace/proto:mace_py", ], ) - -py_binary( - name = "tf_ops_stats", - srcs = ["tf_ops_stats.py"], - srcs_version = "PY2AND3", - deps = [ - "@six_archive//:six", - ], -) diff --git a/mace/tools/benchmark/BUILD b/mace/tools/benchmark/BUILD index 87377d9934eeace47377514c8d38f9c81ab52b9f..321f451de2aaf9b141338e716440e507827465e5 100644 --- a/mace/tools/benchmark/BUILD +++ b/mace/tools/benchmark/BUILD @@ -7,15 +7,10 @@ cc_library( name = "stat_summarizer", srcs = ["stat_summarizer.cc"], hdrs = ["stat_summarizer.h"], - copts = ["-std=c++11"], - linkopts = ["-fopenmp"] + if_android([ - "-ldl", - "-lm", - ]), + copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"], linkstatic = 1, deps = [ "//mace/core", - "//mace/proto:stats_proto", ], ) @@ -24,8 +19,7 @@ cc_binary( srcs = [ "benchmark_model.cc", ], - copts = ["-std=c++11"], - linkopts = ["-fopenmp"] + if_android(["-ldl"]), + copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"], linkstatic = 1, deps = [ ":stat_summarizer", diff --git a/mace/tools/benchmark/stat_summarizer.cc b/mace/tools/benchmark/stat_summarizer.cc index fd0e820a4eac4f38cba8e81a5fd4a8214a966d9b..0b0d8e5a69289b60025ef41c60af20f6d3288a71 100644 --- a/mace/tools/benchmark/stat_summarizer.cc +++ b/mace/tools/benchmark/stat_summarizer.cc @@ -4,7 +4,7 @@ #include "mace/tools/benchmark/stat_summarizer.h" #include "mace/core/common.h" -#include "mace/proto/stats.pb.h" +#include "mace/core/operator.h" #include #include @@ -26,20 +26,21 @@ void StatSummarizer::ProcessMetadata(const RunMetadata &run_metadata) { int64_t curr_total_us = 0; int64_t mem_total = 0; - int64_t first_node_start_us = run_metadata.op_stats(0).all_start_micros(); + MACE_CHECK(!run_metadata.op_stats.empty()); + int64_t first_node_start_us = run_metadata.op_stats[0].stats.start_micros; int node_num = 0; - for (const auto &ops : run_metadata.op_stats()) { - std::string name = ops.operator_name(); - std::string op_type = ops.type(); + for (const auto &ops : run_metadata.op_stats) { + std::string name = ops.operator_name; + std::string op_type = ops.type; ++node_num; - const int64_t curr_time = ops.all_end_rel_micros(); + const int64_t curr_time = ops.stats.end_micros - ops.stats.start_micros; curr_total_us += curr_time; auto result = details_.emplace(name, Detail()); Detail *detail = &(result.first->second); - detail->start_us.UpdateStat(ops.all_start_micros() - first_node_start_us); + detail->start_us.UpdateStat(ops.stats.start_micros - first_node_start_us); detail->rel_end_us.UpdateStat(curr_time); // If this is the first pass, initialize some values. diff --git a/mace/utils/BUILD b/mace/utils/BUILD index cd5583dfc9f35b214c952c8647e20dfb0c091c3d..d98c167532e0c511fc4775eace32e24e3edbd99a 100644 --- a/mace/utils/BUILD +++ b/mace/utils/BUILD @@ -10,36 +10,43 @@ licenses(["notice"]) # Apache 2.0 load("//mace:mace.bzl", "if_android") cc_library( - name = "command_line_flags", + name = "logging", srcs = [ - "command_line_flags.cc", + "logging.cc", ], hdrs = [ - "command_line_flags.h", + "logging.h", ], copts = ["-std=c++11"], - deps = [ - "//mace/core", - ], + linkopts = if_android([ + "-llog", + ]), ) cc_library( - name = "utils", + name = "command_line_flags", + srcs = [ + "command_line_flags.cc", + ], hdrs = [ - "utils.h", + "command_line_flags.h", ], copts = ["-std=c++11"], + deps = [ + ":logging", + ], ) cc_library( name = "tuner", hdrs = [ "tuner.h", + "timer.h", ], copts = ["-std=c++11"], deps = [ - "//mace/core", - "//mace/core:opencl_runtime", + ":utils_hdrs", + ":logging", ], ) @@ -50,7 +57,7 @@ cc_test( "tuner_test.cc", ], copts = ["-std=c++11"], - linkopts = if_android(["-lm", "-ldl"]), + linkopts = if_android(["-pie", "-lm"]), linkstatic = 1, deps = [ ":tuner", @@ -58,3 +65,22 @@ cc_test( "@gtest//:gtest_main", ], ) + +cc_library( + name = "utils_hdrs", + hdrs = [ + "utils.h", + "env_time.h", + ], + copts = ["-std=c++11"], +) + +cc_library( + name = "utils", + deps = [ + ":utils_hdrs", + ":tuner", + ":command_line_flags", + ":logging", + ], +) diff --git a/mace/utils/command_line_flags.cc b/mace/utils/command_line_flags.cc index 146ead017b3cdd184a54d35e16e430263e3187a9..83f8be3cd544e52fa441273bf60d4a0ccd7f28fb 100644 --- a/mace/utils/command_line_flags.cc +++ b/mace/utils/command_line_flags.cc @@ -3,12 +3,16 @@ // #include "mace/utils/command_line_flags.h" +#include "mace/utils/logging.h" + #include #include namespace mace { namespace { +using namespace std; + bool StringConsume(string &arg, const string &x) { if ((arg.size() >= x.size()) && (memcmp(arg.data(), x.data(), x.size()) == 0)) { diff --git a/mace/utils/command_line_flags.h b/mace/utils/command_line_flags.h index 48eea0b486b5efbdd88b032f83506be455c0f031..ce65e944b3ac21340448f1951eb9d5df37a1a20f 100644 --- a/mace/utils/command_line_flags.h +++ b/mace/utils/command_line_flags.h @@ -5,31 +5,32 @@ #ifndef MACE_CORE_COMMAND_LINE_FLAGS_H #define MACE_CORE_COMMAND_LINE_FLAGS_H -#include "mace/core/common.h" +#include +#include namespace mace { class Flag { public: - Flag(const char *name, int *dst1, const string &usage_text); - Flag(const char *name, long long *dst1, const string &usage_text); - Flag(const char *name, bool *dst, const string &usage_text); - Flag(const char *name, string *dst, const string &usage_text); - Flag(const char *name, float *dst, const string &usage_text); + Flag(const char *name, int *dst1, const std::string &usage_text); + Flag(const char *name, long long *dst1, const std::string &usage_text); + Flag(const char *name, bool *dst, const std::string &usage_text); + Flag(const char *name, std::string *dst, const std::string &usage_text); + Flag(const char *name, float *dst, const std::string &usage_text); private: friend class Flags; - bool Parse(string arg, bool *value_parsing_ok) const; + bool Parse(std::string arg, bool *value_parsing_ok) const; - string name_; + std::string name_; enum { TYPE_INT, TYPE_INT64, TYPE_BOOL, TYPE_STRING, TYPE_FLOAT } type_; int *int_value_; long long *int64_value_; bool *bool_value_; - string *string_value_; + std::string *string_value_; float *float_value_; - string usage_text_; + std::string usage_text_; }; class Flags { @@ -43,7 +44,7 @@ class Flags { // Return a usage message with command line cmdline, and the // usage_text strings in flag_list[]. - static string Usage(const string &cmdline, + static std::string Usage(const std::string &cmdline, const std::vector &flag_list); }; diff --git a/mace/core/testing/env_time.h b/mace/utils/env_time.h similarity index 65% rename from mace/core/testing/env_time.h rename to mace/utils/env_time.h index f07783c1f66e4551886276e30796001ae1fc1a52..c1b86c074c9e6da99a30767470029dfb88753fff 100644 --- a/mace/core/testing/env_time.h +++ b/mace/utils/env_time.h @@ -2,19 +2,17 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // -// Only support POSIX environment -#ifndef MACE_TESTING_TIME_H_ -#define MACE_TESTING_TIME_H_ +#ifndef MACE_UTILS_ENV_TIME_H +#define MACE_UTILS_ENV_TIME_H #include #include #include -#include "mace/core/types.h" namespace mace { -namespace testing { +namespace utils { inline int64_t NowMicros() { struct timeval tv; @@ -25,4 +23,4 @@ inline int64_t NowMicros() { } // namespace testing } // namespace mace -#endif // MACE_TESTING_TIME_H_ +#endif // MACE_UTILS_ENV_TIME_H diff --git a/mace/core/logging.cc b/mace/utils/logging.cc similarity index 98% rename from mace/core/logging.cc rename to mace/utils/logging.cc index ffc359ab0951d7f504f2556e87c4c2547fb4b2f5..908a8f6712c9c64858bfc7ed263d27e7aad84098 100644 --- a/mace/core/logging.cc +++ b/mace/utils/logging.cc @@ -2,7 +2,7 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // -#include "mace/core/logging.h" +#include "mace/utils/logging.h" #include #if defined(PLATFORM_POSIX_ANDROID) diff --git a/mace/core/logging.h b/mace/utils/logging.h similarity index 97% rename from mace/core/logging.h rename to mace/utils/logging.h index 448532798dfb9d7c4fc998393c287a924e4e03b2..662d87740c3558a9538a7291c3cbacf7f8a4ac28 100644 --- a/mace/core/logging.h +++ b/mace/utils/logging.h @@ -2,8 +2,8 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // -#ifndef MACE_CORE_LOGGING_H_ -#define MACE_CORE_LOGGING_H_ +#ifndef MACE_UTILS_LOGGING_H_ +#define MACE_UTILS_LOGGING_H_ #include #include @@ -150,4 +150,4 @@ T &&CheckNotNull(const char *file, int line, const char *exprtext, T &&t) { } // namespace internal } // namespace mace -#endif // MACE_CORE_LOGGING_H_ +#endif // MACE_UTILS_LOGGING_H_ diff --git a/mace/utils/timer.h b/mace/utils/timer.h new file mode 100644 index 0000000000000000000000000000000000000000..cee4411e278abc3dce303c15f02ae8c37acfef1a --- /dev/null +++ b/mace/utils/timer.h @@ -0,0 +1,40 @@ +// +// Copyright (c) 2017 XiaoMi All rights reserved. +// + +#ifndef MACE_UTILS_TIMER_H_ +#define MACE_UTILS_TIMER_H_ + +#include "mace/utils/env_time.h" + +namespace mace { + +class Timer { + public: + virtual void StartTiming() = 0; + virtual void StopTiming() = 0; + virtual double ElapsedMicros() = 0; +}; + +class WallClockTimer : public Timer { + public: + void StartTiming() override { + start_micros_ = mace::utils::NowMicros(); + } + + void StopTiming() override { + stop_micros_ = mace::utils::NowMicros(); + } + + double ElapsedMicros() override { + return stop_micros_ - start_micros_; + } + + private: + double start_micros_; + double stop_micros_; +}; + +} // namespace mace + +#endif // MACE_UTILS_TIMER_H_ diff --git a/mace/utils/tuner.h b/mace/utils/tuner.h index 32c21e9b1dfef01fc45396242dd4f1a4937df972..7168980b27ed680bb3a13b90b26c5da271a0df95 100644 --- a/mace/utils/tuner.h +++ b/mace/utils/tuner.h @@ -5,44 +5,50 @@ #ifndef MACE_UTILS_TUNER_H_ #define MACE_UTILS_TUNER_H_ #include -#include +#include #include +#include #include #include -#include -#include +#include -#include "mace/core/logging.h" -#include "mace/utils/utils.h" -#include "mace/core/runtime/opencl/opencl_runtime.h" +#include "mace/utils/logging.h" +#include "mace/utils/timer.h" namespace mace { - -template +template class Tuner { public: - static Tuner* Get() { + static Tuner *Get() { static Tuner tuner; return &tuner; } - template - RetType TuneOrRun(const std::string param_key, - const std::vector &default_param, - const std::function>()> ¶m_generator, - const std::function &)> &func) { + inline bool IsTuning() { + const char *tuning = getenv("MACE_TUNING"); + return tuning != nullptr && strlen(tuning) == 1 && tuning[0] == '1'; + } + template + RetType TuneOrRun( + const std::string param_key, + const std::vector &default_param, + const std::function>()> + ¶m_generator, + const std::function &)> &func, + Timer *timer) { if (IsTuning() && param_generator != nullptr) { // tune std::vector opt_param = default_param; - RetType res = Tune(param_generator, func, opt_param); + RetType res = Tune(param_generator, func, timer, &opt_param); param_table_[param_key] = opt_param; return res; } else { // run if (param_table_.find(param_key) != param_table_.end()) { - VLOG(1) << param_key << ": " << internal::MakeString(param_table_[param_key]); + VLOG(1) << param_key << ": " + << internal::MakeString(param_table_[param_key]); return func(param_table_[param_key]); } else { return func(default_param); @@ -56,17 +62,10 @@ class Tuner { ReadRunParamters(); } - ~Tuner() { - WriteRunParameters(); - } - - Tuner(const Tuner&) = delete; - Tuner& operator=(const Tuner&) = delete; + ~Tuner() { WriteRunParameters(); } - inline bool IsTuning() { - const char *tuning = getenv("MACE_TUNING"); - return tuning != nullptr && strlen(tuning) == 1 && tuning[0] == '1'; - } + Tuner(const Tuner &) = delete; + Tuner &operator=(const Tuner &) = delete; inline void WriteRunParameters() { VLOG(1) << path_; @@ -83,7 +82,8 @@ class Tuner { auto ¶ms = kp.second; int32_t params_size = params.size() * sizeof(param_type); - ofs.write(reinterpret_cast(¶ms_size), sizeof(params_size)); + ofs.write(reinterpret_cast(¶ms_size), + sizeof(params_size)); for (auto ¶m : params) { ofs.write(reinterpret_cast(¶m), sizeof(params_size)); VLOG(1) << param; @@ -114,7 +114,7 @@ class Tuner { params_count = params_size / sizeof(param_type); std::vector params(params_count); for (int i = 0; i < params_count; ++i) { - ifs.read(reinterpret_cast(¶ms[i]), sizeof(param_type)); + ifs.read(reinterpret_cast(¶ms[i]), sizeof(param_type)); } param_table_.emplace(key, params); } @@ -126,45 +126,47 @@ class Tuner { } template - inline RetType Run(const std::function &)> &func, - const std::vector ¶ms, - int num_runs, - double &time_us) { + inline RetType Run( + const std::function &)> &func, + const std::vector ¶ms, + Timer *timer, + int num_runs, + double *time_us) { RetType res; int64_t total_time_us = 0; for (int i = 0; i < num_runs; ++i) { + timer->StartTiming(); res = func(params); - OpenCLRuntime::Get()->command_queue().finish(); - - double start_time = OpenCLRuntime::Get()->GetEventProfilingStartInfo() / 1000.0; - double end_time = OpenCLRuntime::Get()->GetEventProfilingEndInfo() / 1000.0; - total_time_us += end_time - start_time; + timer->StopTiming(); + total_time_us += timer->ElapsedMicros(); } - time_us = total_time_us * 1.0 / num_runs; + *time_us = total_time_us * 1.0 / num_runs; return res; } template - inline RetType Tune(const std::function>()> ¶m_generator, - const std::function &)> &func, - std::vector &opt_params) { - OpenCLRuntime::EnableProfiling(); + inline RetType Tune( + const std::function>()> + ¶m_generator, + const std::function &)> &func, + Timer *timer, + std::vector *opt_params) { RetType res; double opt_time = std::numeric_limits::max(); auto params = param_generator(); - for (const auto ¶m: params) { + for (const auto ¶m : params) { double tmp_time = 0.0; // warm up - Run(func, param, 2, tmp_time); + Run(func, param, timer, 2, &tmp_time); // run - RetType tmp_res = Run(func, param, 10, tmp_time); + RetType tmp_res = Run(func, param, timer, 10, &tmp_time); // Check the execution time if (tmp_time < opt_time) { opt_time = tmp_time; - opt_params = param; + *opt_params = param; res = tmp_res; } } @@ -172,9 +174,9 @@ class Tuner { } private: - const char* path_; + const char *path_; std::unordered_map> param_table_; }; -} // namespace mace -#endif // MACE_UTILS_TUNER_H_ +} // namespace mace +#endif // MACE_UTILS_TUNER_H_ diff --git a/mace/utils/tuner_test.cc b/mace/utils/tuner_test.cc index ea80dd4dd87716245ceb32ccb1c2c9d8a197df2a..fae12c91aa160bc8fa7ecdcc66070316cdf71ff5 100644 --- a/mace/utils/tuner_test.cc +++ b/mace/utils/tuner_test.cc @@ -28,13 +28,14 @@ TEST_F(TunerTest, SimpleRun) { } }; + WallClockTimer timer; std::vector default_params(1, 1); - int res = Tuner::Get()->template TuneOrRun("SimpleRun", default_params, nullptr, TunerFunc); + int res = Tuner::Get()->template TuneOrRun("SimpleRun", default_params, nullptr, TunerFunc, &timer); EXPECT_EQ(expect, res); default_params[0] = 2; - res = Tuner::Get()->template TuneOrRun("SimpleRun", default_params, nullptr, TunerFunc); + res = Tuner::Get()->template TuneOrRun("SimpleRun", default_params, nullptr, TunerFunc, &timer); EXPECT_EQ(expect+1, res); } @@ -54,11 +55,12 @@ TEST_F(TunerTest, SimpleTune) { return {{1}, {2}, {3}, {4}}; }; // tune - int res = Tuner::Get()->template TuneOrRun("SimpleRun", default_params, *params_generator, TunerFunc); + WallClockTimer timer; + int res = Tuner::Get()->template TuneOrRun("SimpleRun", default_params, *params_generator, TunerFunc, &timer); EXPECT_EQ(expect, res); // run - res = Tuner::Get()->template TuneOrRun("SimpleRun", default_params, nullptr, TunerFunc); + res = Tuner::Get()->template TuneOrRun("SimpleRun", default_params, nullptr, TunerFunc, &timer); EXPECT_EQ(expect, res); } diff --git a/tools/bazel-adb-run.sh b/tools/bazel-adb-run.sh index b41d4d140303d8b682c49d40d23a35abe81b68c3..4926ad2b95197be8772e988d07fddcbac02608fe 100755 --- a/tools/bazel-adb-run.sh +++ b/tools/bazel-adb-run.sh @@ -21,10 +21,13 @@ ANDROID_ABI=armeabi-v7a ANDROID_ABI=arm64-v8a STRIP="" STRIP="--strip always" +VLOG_LEVEL=0 +PROFILINE="--define profiling=true" -# for profiling -bazel build -c opt $STRIP --verbose_failures $BAZEL_TARGET --crosstool_top=//external:android/crosstool --host_crosstool_top=@bazel_tools//tools/cpp:toolchain --cpu=$ANDROID_ABI --define profiling=true -#bazel build -c opt $STRIP --verbose_failures $BAZEL_TARGET --crosstool_top=//external:android/crosstool --host_crosstool_top=@bazel_tools//tools/cpp:toolchain --cpu=$ANDROID_ABI +BRANCH=$(git symbolic-ref --short HEAD) +COMMIT_ID=$(git rev-parse --short HEAD) + +bazel build -c opt $STRIP --verbose_failures $BAZEL_TARGET --crosstool_top=//external:android/crosstool --host_crosstool_top=@bazel_tools//tools/cpp:toolchain --cpu=$ANDROID_ABI if [ $? -ne 0 ]; then exit 1 @@ -39,5 +42,5 @@ for device in `adb devices | grep "^[A-Za-z0-9]\+[[:space:]]\+device$"| cut -f1` adb -s ${device} shell "mkdir -p $DEVICE_PATH" adb -s ${device} push $CL_PATH $DEVICE_CL_PATH && \ adb -s ${device} push $BAZEL_BIN_PATH/$BIN_NAME $DEVICE_PATH && \ - adb -s ${device} shell "MACE_KERNEL_PATH=$DEVICE_CL_PATH $DEVICE_PATH/$BIN_NAME $@" + adb -s ${device} shell "MACE_KERNEL_PATH=$DEVICE_CL_PATH MACE_CPP_MIN_VLOG_LEVEL=0$VLOG_LEVEL $DEVICE_PATH/$BIN_NAME $@" done diff --git a/tools/validate_gcn.sh b/tools/validate_gcn.sh index 99a245a191ae45e4d59ec768df7cc6e12c4055e0..04e0f0a5e4d9be430c64ecda1571f80bcc2fadd0 100644 --- a/tools/validate_gcn.sh +++ b/tools/validate_gcn.sh @@ -55,7 +55,7 @@ bazel build -c opt --strip always mace/examples:mace_run \ adb shell "mkdir -p ${PHONE_DATA_DIR}" adb shell "mkdir -p ${KERNEL_DIR}" -adb push mace/kernels/opencl/cl/* ${KERNEL_DIR} +adb push mace/kernels/opencl/cl/ ${KERNEL_DIR} adb push ${MODEL_DIR}/${INPUT_FILE_NAME} ${PHONE_DATA_DIR} adb push bazel-bin/mace/examples/mace_run ${PHONE_DATA_DIR}