提交 0a07d780 编写于 作者: A Alexander Alekhin

ocl: OpenCL SVM support

上级 58ad952b
......@@ -162,6 +162,7 @@ OCV_OPTION(WITH_XIMEA "Include XIMEA cameras support" OFF
OCV_OPTION(WITH_XINE "Include Xine support (GPL)" OFF IF (UNIX AND NOT APPLE AND NOT ANDROID) )
OCV_OPTION(WITH_CLP "Include Clp support (EPL)" OFF)
OCV_OPTION(WITH_OPENCL "Include OpenCL Runtime support" ON IF (NOT IOS) )
OCV_OPTION(WITH_OPENCL_SVM "Include OpenCL Shared Virtual Memory support" OFF ) # experimental
OCV_OPTION(WITH_OPENCLAMDFFT "Include AMD OpenCL FFT library support" ON IF (NOT ANDROID AND NOT IOS) )
OCV_OPTION(WITH_OPENCLAMDBLAS "Include AMD OpenCL BLAS library support" ON IF (NOT ANDROID AND NOT IOS) )
OCV_OPTION(WITH_DIRECTX "Include DirectX support" ON IF WIN32 )
......
......@@ -26,6 +26,10 @@ if(OPENCL_FOUND)
set(HAVE_OPENCL 1)
if(WITH_OPENCL_SVM)
set(HAVE_OPENCL_SVM 1)
endif()
if(HAVE_OPENCL_STATIC)
set(OPENCL_LIBRARIES "${OPENCL_LIBRARY}")
else()
......
......@@ -122,6 +122,7 @@
/* OpenCL Support */
#cmakedefine HAVE_OPENCL
#cmakedefine HAVE_OPENCL_STATIC
#cmakedefine HAVE_OPENCL_SVM
/* OpenEXR codec */
#cmakedefine HAVE_OPENEXR
......
......@@ -415,7 +415,7 @@ public:
const size_t dstofs[], const size_t dststep[], bool sync) const;
// default implementation returns DummyBufferPoolController
virtual BufferPoolController* getBufferPoolController() const;
virtual BufferPoolController* getBufferPoolController(const char* id = NULL) const;
};
......@@ -481,7 +481,7 @@ struct CV_EXPORTS UMatData
int refcount;
uchar* data;
uchar* origdata;
size_t size, capacity;
size_t size;
int flags;
void* handle;
......
......@@ -56,6 +56,8 @@ CV_EXPORTS_W bool haveAmdFft();
CV_EXPORTS_W void setUseOpenCL(bool flag);
CV_EXPORTS_W void finish();
CV_EXPORTS bool haveSVM();
class CV_EXPORTS Context;
class CV_EXPORTS Device;
class CV_EXPORTS Kernel;
......@@ -248,7 +250,10 @@ public:
void* ptr() const;
friend void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device);
protected:
bool useSVM() const;
void setUseSVM(bool enabled);
struct Impl;
Impl* p;
};
......@@ -666,8 +671,17 @@ protected:
CV_EXPORTS MatAllocator* getOpenCLAllocator();
CV_EXPORTS_W bool isPerformanceCheckBypassed();
#define OCL_PERFORMANCE_CHECK(condition) (cv::ocl::isPerformanceCheckBypassed() || (condition))
#ifdef __OPENCV_BUILD
namespace internal {
CV_EXPORTS bool isPerformanceCheckBypassed();
#define OCL_PERFORMANCE_CHECK(condition) (cv::ocl::internal::isPerformanceCheckBypassed() || (condition))
CV_EXPORTS bool isCLBuffer(UMat& u);
} // namespace internal
#endif
//! @}
......
/* See LICENSE file in the root OpenCV directory */
#ifndef __OPENCV_CORE_OPENCL_SVM_HPP__
#define __OPENCV_CORE_OPENCL_SVM_HPP__
//
// Internal usage only (binary compatibility is not guaranteed)
//
#ifndef __OPENCV_BUILD
#error Internal header file
#endif
#if defined(HAVE_OPENCL) && defined(HAVE_OPENCL_SVM)
#include "runtime/opencl_core.hpp"
#include "runtime/opencl_svm_20.hpp"
#include "runtime/opencl_svm_hsa_extension.hpp"
namespace cv { namespace ocl { namespace svm {
struct SVMCapabilities
{
enum Value
{
SVM_COARSE_GRAIN_BUFFER = (1 << 0),
SVM_FINE_GRAIN_BUFFER = (1 << 1),
SVM_FINE_GRAIN_SYSTEM = (1 << 2),
SVM_ATOMICS = (1 << 3),
};
int value_;
SVMCapabilities(int capabilities = 0) : value_(capabilities) { }
operator int() const { return value_; }
inline bool isNoSVMSupport() const { return value_ == 0; }
inline bool isSupportCoarseGrainBuffer() const { return (value_ & SVM_COARSE_GRAIN_BUFFER) != 0; }
inline bool isSupportFineGrainBuffer() const { return (value_ & SVM_FINE_GRAIN_BUFFER) != 0; }
inline bool isSupportFineGrainSystem() const { return (value_ & SVM_FINE_GRAIN_SYSTEM) != 0; }
inline bool isSupportAtomics() const { return (value_ & SVM_ATOMICS) != 0; }
};
CV_EXPORTS const SVMCapabilities getSVMCapabilitites(const ocl::Context& context);
struct SVMFunctions
{
clSVMAllocAMD_fn fn_clSVMAlloc;
clSVMFreeAMD_fn fn_clSVMFree;
clSetKernelArgSVMPointerAMD_fn fn_clSetKernelArgSVMPointer;
//clSetKernelExecInfoAMD_fn fn_clSetKernelExecInfo;
//clEnqueueSVMFreeAMD_fn fn_clEnqueueSVMFree;
clEnqueueSVMMemcpyAMD_fn fn_clEnqueueSVMMemcpy;
clEnqueueSVMMemFillAMD_fn fn_clEnqueueSVMMemFill;
clEnqueueSVMMapAMD_fn fn_clEnqueueSVMMap;
clEnqueueSVMUnmapAMD_fn fn_clEnqueueSVMUnmap;
inline SVMFunctions()
: fn_clSVMAlloc(NULL), fn_clSVMFree(NULL),
fn_clSetKernelArgSVMPointer(NULL), /*fn_clSetKernelExecInfo(NULL),*/
/*fn_clEnqueueSVMFree(NULL),*/ fn_clEnqueueSVMMemcpy(NULL), fn_clEnqueueSVMMemFill(NULL),
fn_clEnqueueSVMMap(NULL), fn_clEnqueueSVMUnmap(NULL)
{
// nothing
}
inline bool isValid() const
{
return fn_clSVMAlloc != NULL && fn_clSVMFree && fn_clSetKernelArgSVMPointer &&
/*fn_clSetKernelExecInfo && fn_clEnqueueSVMFree &&*/ fn_clEnqueueSVMMemcpy &&
fn_clEnqueueSVMMemFill && fn_clEnqueueSVMMap && fn_clEnqueueSVMUnmap;
}
};
// We should guarantee that SVMFunctions lifetime is not less than context's lifetime
CV_EXPORTS const SVMFunctions* getSVMFunctions(const ocl::Context& context);
CV_EXPORTS bool useSVM(UMatUsageFlags usageFlags);
}}} //namespace cv::ocl::svm
#endif
#endif // __OPENCV_CORE_OPENCL_SVM_HPP__
/* End of file. */
......@@ -62,6 +62,18 @@
#endif
#endif
#ifdef HAVE_OPENCL_SVM
#define clSVMAlloc clSVMAlloc_
#define clSVMFree clSVMFree_
#define clSetKernelArgSVMPointer clSetKernelArgSVMPointer_
#define clSetKernelExecInfo clSetKernelExecInfo_
#define clEnqueueSVMFree clEnqueueSVMFree_
#define clEnqueueSVMMemcpy clEnqueueSVMMemcpy_
#define clEnqueueSVMMemFill clEnqueueSVMMemFill_
#define clEnqueueSVMMap clEnqueueSVMMap_
#define clEnqueueSVMUnmap clEnqueueSVMUnmap_
#endif
#include "autogenerated/opencl_core.hpp"
#endif // HAVE_OPENCL_STATIC
......
/* See LICENSE file in the root OpenCV directory */
#ifndef __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP__
#define __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP__
#if defined(HAVE_OPENCL_SVM)
#include "opencl_core.hpp"
#include "opencl_svm_definitions.hpp"
#ifndef HAVE_OPENCL_STATIC
#undef clSVMAlloc
#define clSVMAlloc clSVMAlloc_pfn
#undef clSVMFree
#define clSVMFree clSVMFree_pfn
#undef clSetKernelArgSVMPointer
#define clSetKernelArgSVMPointer clSetKernelArgSVMPointer_pfn
#undef clSetKernelExecInfo
//#define clSetKernelExecInfo clSetKernelExecInfo_pfn
#undef clEnqueueSVMFree
//#define clEnqueueSVMFree clEnqueueSVMFree_pfn
#undef clEnqueueSVMMemcpy
#define clEnqueueSVMMemcpy clEnqueueSVMMemcpy_pfn
#undef clEnqueueSVMMemFill
#define clEnqueueSVMMemFill clEnqueueSVMMemFill_pfn
#undef clEnqueueSVMMap
#define clEnqueueSVMMap clEnqueueSVMMap_pfn
#undef clEnqueueSVMUnmap
#define clEnqueueSVMUnmap clEnqueueSVMUnmap_pfn
extern CL_RUNTIME_EXPORT void* (CL_API_CALL *clSVMAlloc)(cl_context context, cl_svm_mem_flags flags, size_t size, unsigned int alignment);
extern CL_RUNTIME_EXPORT void (CL_API_CALL *clSVMFree)(cl_context context, void* svm_pointer);
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clSetKernelArgSVMPointer)(cl_kernel kernel, cl_uint arg_index, const void* arg_value);
//extern CL_RUNTIME_EXPORT void* (CL_API_CALL *clSetKernelExecInfo)(cl_kernel kernel, cl_kernel_exec_info param_name, size_t param_value_size, const void* param_value);
//extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMFree)(cl_command_queue command_queue, cl_uint num_svm_pointers, void* svm_pointers[],
// void (CL_CALLBACK *pfn_free_func)(cl_command_queue queue, cl_uint num_svm_pointers, void* svm_pointers[], void* user_data), void* user_data,
// cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMMemcpy)(cl_command_queue command_queue, cl_bool blocking_copy, void* dst_ptr, const void* src_ptr, size_t size,
cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMMemFill)(cl_command_queue command_queue, void* svm_ptr, const void* pattern, size_t pattern_size, size_t size,
cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMMap)(cl_command_queue command_queue, cl_bool blocking_map, cl_map_flags map_flags, void* svm_ptr, size_t size,
cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMUnmap)(cl_command_queue command_queue, void* svm_ptr,
cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
#endif // HAVE_OPENCL_STATIC
#endif // HAVE_OPENCL_SVM
#endif // __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP__
/* See LICENSE file in the root OpenCV directory */
#ifndef __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP__
#define __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP__
#if defined(HAVE_OPENCL_SVM)
#if defined(CL_VERSION_2_0)
// OpenCL 2.0 contains SVM definitions
#else
typedef cl_bitfield cl_device_svm_capabilities;
typedef cl_bitfield cl_svm_mem_flags;
typedef cl_uint cl_kernel_exec_info;
//
// TODO Add real values after OpenCL 2.0 release
//
#ifndef CL_DEVICE_SVM_CAPABILITIES
#define CL_DEVICE_SVM_CAPABILITIES 0x1053
#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER (1 << 0)
#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER (1 << 1)
#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM (1 << 2)
#define CL_DEVICE_SVM_ATOMICS (1 << 3)
#endif
#ifndef CL_MEM_SVM_FINE_GRAIN_BUFFER
#define CL_MEM_SVM_FINE_GRAIN_BUFFER (1 << 10)
#endif
#ifndef CL_MEM_SVM_ATOMICS
#define CL_MEM_SVM_ATOMICS (1 << 11)
#endif
#endif // CL_VERSION_2_0
#endif // HAVE_OPENCL_SVM
#endif // __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP__
/* See LICENSE file in the root OpenCV directory */
#ifndef __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP__
#define __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP__
#if defined(HAVE_OPENCL_SVM)
#include "opencl_core.hpp"
#ifndef CL_DEVICE_SVM_CAPABILITIES_AMD
//
// Part of the file is an extract from the cl_ext.h file from AMD APP SDK package.
// Below is the original copyright.
//
/*******************************************************************************
* Copyright (c) 2008-2013 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
/*******************************************
* Shared Virtual Memory (SVM) extension
*******************************************/
typedef cl_bitfield cl_device_svm_capabilities_amd;
typedef cl_bitfield cl_svm_mem_flags_amd;
typedef cl_uint cl_kernel_exec_info_amd;
/* cl_device_info */
#define CL_DEVICE_SVM_CAPABILITIES_AMD 0x1053
#define CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT_AMD 0x1054
/* cl_device_svm_capabilities_amd */
#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_AMD (1 << 0)
#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER_AMD (1 << 1)
#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM_AMD (1 << 2)
#define CL_DEVICE_SVM_ATOMICS_AMD (1 << 3)
/* cl_svm_mem_flags_amd */
#define CL_MEM_SVM_FINE_GRAIN_BUFFER_AMD (1 << 10)
#define CL_MEM_SVM_ATOMICS_AMD (1 << 11)
/* cl_mem_info */
#define CL_MEM_USES_SVM_POINTER_AMD 0x1109
/* cl_kernel_exec_info_amd */
#define CL_KERNEL_EXEC_INFO_SVM_PTRS_AMD 0x11B6
#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_AMD 0x11B7
/* cl_command_type */
#define CL_COMMAND_SVM_FREE_AMD 0x1209
#define CL_COMMAND_SVM_MEMCPY_AMD 0x120A
#define CL_COMMAND_SVM_MEMFILL_AMD 0x120B
#define CL_COMMAND_SVM_MAP_AMD 0x120C
#define CL_COMMAND_SVM_UNMAP_AMD 0x120D
typedef CL_API_ENTRY void*
(CL_API_CALL * clSVMAllocAMD_fn)(
cl_context /* context */,
cl_svm_mem_flags_amd /* flags */,
size_t /* size */,
unsigned int /* alignment */
) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY void
(CL_API_CALL * clSVMFreeAMD_fn)(
cl_context /* context */,
void* /* svm_pointer */
) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int
(CL_API_CALL * clEnqueueSVMFreeAMD_fn)(
cl_command_queue /* command_queue */,
cl_uint /* num_svm_pointers */,
void** /* svm_pointers */,
void (CL_CALLBACK *)( /*pfn_free_func*/
cl_command_queue /* queue */,
cl_uint /* num_svm_pointers */,
void** /* svm_pointers */,
void* /* user_data */),
void* /* user_data */,
cl_uint /* num_events_in_wait_list */,
const cl_event* /* event_wait_list */,
cl_event* /* event */
) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int
(CL_API_CALL * clEnqueueSVMMemcpyAMD_fn)(
cl_command_queue /* command_queue */,
cl_bool /* blocking_copy */,
void* /* dst_ptr */,
const void* /* src_ptr */,
size_t /* size */,
cl_uint /* num_events_in_wait_list */,
const cl_event* /* event_wait_list */,
cl_event* /* event */
) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int
(CL_API_CALL * clEnqueueSVMMemFillAMD_fn)(
cl_command_queue /* command_queue */,
void* /* svm_ptr */,
const void* /* pattern */,
size_t /* pattern_size */,
size_t /* size */,
cl_uint /* num_events_in_wait_list */,
const cl_event* /* event_wait_list */,
cl_event* /* event */
) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int
(CL_API_CALL * clEnqueueSVMMapAMD_fn)(
cl_command_queue /* command_queue */,
cl_bool /* blocking_map */,
cl_map_flags /* map_flags */,
void* /* svm_ptr */,
size_t /* size */,
cl_uint /* num_events_in_wait_list */,
const cl_event* /* event_wait_list */,
cl_event* /* event */
) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int
(CL_API_CALL * clEnqueueSVMUnmapAMD_fn)(
cl_command_queue /* command_queue */,
void* /* svm_ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event* /* event_wait_list */,
cl_event* /* event */
) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int
(CL_API_CALL * clSetKernelArgSVMPointerAMD_fn)(
cl_kernel /* kernel */,
cl_uint /* arg_index */,
const void * /* arg_value */
) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int
(CL_API_CALL * clSetKernelExecInfoAMD_fn)(
cl_kernel /* kernel */,
cl_kernel_exec_info_amd /* param_name */,
size_t /* param_value_size */,
const void * /* param_value */
) CL_EXT_SUFFIX__VERSION_1_2;
#endif
#endif // HAVE_OPENCL_SVM
#endif // __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP__
......@@ -721,6 +721,16 @@ static bool ocl_gemm_amdblas( InputArray matA, InputArray matB, double alpha,
return false;
UMat A = matA.getUMat(), B = matB.getUMat(), D = matD.getUMat();
if (!ocl::internal::isCLBuffer(A) || !ocl::internal::isCLBuffer(B) || !ocl::internal::isCLBuffer(D))
{
return false;
}
if (haveC)
{
UMat C = matC.getUMat();
if (!ocl::internal::isCLBuffer(C))
return false;
}
if (haveC)
ctrans ? transpose(matC, D) : matC.copyTo(D);
else
......
......@@ -159,8 +159,9 @@ void MatAllocator::copy(UMatData* usrc, UMatData* udst, int dims, const size_t s
memcpy(ptrs[1], ptrs[0], planesz);
}
BufferPoolController* MatAllocator::getBufferPoolController() const
BufferPoolController* MatAllocator::getBufferPoolController(const char* id) const
{
(void)id;
static DummyBufferPoolController dummy;
return &dummy;
}
......
......@@ -48,6 +48,8 @@
#define CV_OPENCL_ALWAYS_SHOW_BUILD_LOG 0
#define CV_OPENCL_SHOW_RUN_ERRORS 0
#define CV_OPENCL_SHOW_SVM_ERROR_LOG 1
#define CV_OPENCL_SHOW_SVM_LOG 0
#include "opencv2/core/bufferpool.hpp"
#ifndef LOG_BUFFER_POOL
......@@ -111,6 +113,20 @@ static size_t getConfigurationParameterForSize(const char* name, size_t defaultV
CV_ErrorNoReturn(cv::Error::StsBadArg, cv::format("Invalid value for %s parameter: %s", name, value.c_str()));
}
#if CV_OPENCL_SHOW_SVM_LOG
// TODO add timestamp logging
#define CV_OPENCL_SVM_TRACE_P printf("line %d (ocl.cpp): ", __LINE__); printf
#else
#define CV_OPENCL_SVM_TRACE_P(...)
#endif
#if CV_OPENCL_SHOW_SVM_ERROR_LOG
// TODO add timestamp logging
#define CV_OPENCL_SVM_TRACE_ERROR_P printf("Error on line %d (ocl.cpp): ", __LINE__); printf
#else
#define CV_OPENCL_SVM_TRACE_ERROR_P(...)
#endif
#include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp"
#include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
......@@ -920,6 +936,7 @@ OCL_FUNC(cl_int, clGetSupportedImageFormats,
cl_uint * num_image_formats),
(context, flags, image_type, num_entries, image_formats, num_image_formats))
/*
OCL_FUNC(cl_int, clGetMemObjectInfo,
(cl_mem memobj,
......@@ -1342,6 +1359,12 @@ static bool isRaiseError()
#define CV_OclDbgAssert(expr) do { if (isRaiseError()) { CV_Assert(expr); } else { (void)(expr); } } while ((void)0, 0)
#endif
#ifdef HAVE_OPENCL_SVM
#include "opencv2/core/opencl/runtime/opencl_svm_20.hpp"
#include "opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp"
#include "opencv2/core/opencl/opencl_svm.hpp"
#endif
namespace cv { namespace ocl {
struct UMat2D
......@@ -1627,6 +1650,15 @@ bool haveAmdFft()
#endif
bool haveSVM()
{
#ifdef HAVE_OPENCL_SVM
return true;
#else
return false;
#endif
}
void finish()
{
Queue::getDefault().finish();
......@@ -2357,12 +2389,86 @@ not_found:
}
#endif
#ifdef HAVE_OPENCL_SVM
namespace svm {
enum AllocatorFlags { // don't use first 16 bits
OPENCL_SVM_COARSE_GRAIN_BUFFER = 1 << 16, // clSVMAlloc + SVM map/unmap
OPENCL_SVM_FINE_GRAIN_BUFFER = 2 << 16, // clSVMAlloc
OPENCL_SVM_FINE_GRAIN_SYSTEM = 3 << 16, // direct access
OPENCL_SVM_BUFFER_MASK = 3 << 16,
OPENCL_SVM_BUFFER_MAP = 4 << 16
};
static bool checkForceSVMUmatUsage()
{
static bool initialized = false;
static bool force = false;
if (!initialized)
{
force = getBoolParameter("OPENCV_OPENCL_SVM_FORCE_UMAT_USAGE", false);
initialized = true;
}
return force;
}
static bool checkDisableSVMUMatUsage()
{
static bool initialized = false;
static bool force = false;
if (!initialized)
{
force = getBoolParameter("OPENCV_OPENCL_SVM_DISABLE_UMAT_USAGE", false);
initialized = true;
}
return force;
}
static bool checkDisableSVM()
{
static bool initialized = false;
static bool force = false;
if (!initialized)
{
force = getBoolParameter("OPENCV_OPENCL_SVM_DISABLE", false);
initialized = true;
}
return force;
}
// see SVMCapabilities
static unsigned int getSVMCapabilitiesMask()
{
static bool initialized = false;
static unsigned int mask = 0;
if (!initialized)
{
const char* envValue = getenv("OPENCV_OPENCL_SVM_CAPABILITIES_MASK");
if (envValue == NULL)
{
return ~0U; // all bits 1
}
mask = atoi(envValue);
initialized = true;
}
return mask;
}
} // namespace
#endif
struct Context::Impl
{
Impl()
static Context::Impl* get(Context& context) { return context.p; }
void __init()
{
refcount = 1;
handle = 0;
#ifdef HAVE_OPENCL_SVM
svmInitialized = false;
#endif
}
Impl()
{
__init();
}
void setDefault()
......@@ -2401,8 +2507,7 @@ struct Context::Impl
Impl(int dtype0)
{
refcount = 1;
handle = 0;
__init();
cl_int retval = 0;
cl_platform_id pl = (cl_platform_id)Platform::getDefault().ptr();
......@@ -2419,7 +2524,7 @@ struct Context::Impl
AutoBuffer<void*> dlistbuf(nd0*2+1);
cl_device_id* dlist = (cl_device_id*)(void**)dlistbuf;
cl_device_id* dlist_new = dlist + nd0;
CV_OclDbgAssert(clGetDeviceIDs( pl, dtype, nd0, dlist, &nd0 ) == CL_SUCCESS);
CV_OclDbgAssert(clGetDeviceIDs( pl, dtype, nd0, dlist, &nd0 ) == CL_SUCCESS);
String name0;
for(i = 0; i < nd0; i++)
......@@ -2496,6 +2601,144 @@ struct Context::Impl
};
typedef std::map<HashKey, Program> phash_t;
phash_t phash;
#ifdef HAVE_OPENCL_SVM
bool svmInitialized;
bool svmAvailable;
bool svmEnabled;
svm::SVMCapabilities svmCapabilities;
svm::SVMFunctions svmFunctions;
void svmInit()
{
CV_Assert(handle != NULL);
const Device& device = devices[0];
cl_device_svm_capabilities deviceCaps = 0;
CV_Assert(((void)0, CL_DEVICE_SVM_CAPABILITIES == CL_DEVICE_SVM_CAPABILITIES_AMD)); // Check assumption
cl_int status = clGetDeviceInfo((cl_device_id)device.ptr(), CL_DEVICE_SVM_CAPABILITIES, sizeof(deviceCaps), &deviceCaps, NULL);
if (status != CL_SUCCESS)
{
CV_OPENCL_SVM_TRACE_ERROR_P("CL_DEVICE_SVM_CAPABILITIES via clGetDeviceInfo failed: %d\n", status);
goto noSVM;
}
CV_OPENCL_SVM_TRACE_P("CL_DEVICE_SVM_CAPABILITIES returned: 0x%x\n", (int)deviceCaps);
CV_Assert(((void)0, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER == CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_AMD)); // Check assumption
svmCapabilities.value_ =
((deviceCaps & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER) ? svm::SVMCapabilities::SVM_COARSE_GRAIN_BUFFER : 0) |
((deviceCaps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) ? svm::SVMCapabilities::SVM_FINE_GRAIN_BUFFER : 0) |
((deviceCaps & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM) ? svm::SVMCapabilities::SVM_FINE_GRAIN_SYSTEM : 0) |
((deviceCaps & CL_DEVICE_SVM_ATOMICS) ? svm::SVMCapabilities::SVM_ATOMICS : 0);
svmCapabilities.value_ &= svm::getSVMCapabilitiesMask();
if (svmCapabilities.value_ == 0)
{
CV_OPENCL_SVM_TRACE_ERROR_P("svmCapabilities is empty\n");
goto noSVM;
}
try
{
// Try OpenCL 2.0
CV_OPENCL_SVM_TRACE_P("Try SVM from OpenCL 2.0 ...\n");
void* ptr = clSVMAlloc(handle, CL_MEM_READ_WRITE, 100, 0);
if (!ptr)
{
CV_OPENCL_SVM_TRACE_ERROR_P("clSVMAlloc returned NULL...\n");
CV_ErrorNoReturn(Error::StsBadArg, "clSVMAlloc returned NULL");
}
try
{
bool error = false;
cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
if (CL_SUCCESS != clEnqueueSVMMap(q, CL_TRUE, CL_MAP_WRITE, ptr, 100, 0, NULL, NULL))
{
CV_OPENCL_SVM_TRACE_ERROR_P("clEnqueueSVMMap failed...\n");
CV_ErrorNoReturn(Error::StsBadArg, "clEnqueueSVMMap FAILED");
}
clFinish(q);
try
{
((int*)ptr)[0] = 100;
}
catch (...)
{
CV_OPENCL_SVM_TRACE_ERROR_P("SVM buffer access test FAILED\n");
error = true;
}
if (CL_SUCCESS != clEnqueueSVMUnmap(q, ptr, 0, NULL, NULL))
{
CV_OPENCL_SVM_TRACE_ERROR_P("clEnqueueSVMUnmap failed...\n");
CV_ErrorNoReturn(Error::StsBadArg, "clEnqueueSVMUnmap FAILED");
}
clFinish(q);
if (error)
{
CV_ErrorNoReturn(Error::StsBadArg, "OpenCL SVM buffer access test was FAILED");
}
}
catch (...)
{
CV_OPENCL_SVM_TRACE_ERROR_P("OpenCL SVM buffer access test was FAILED\n");
clSVMFree(handle, ptr);
throw;
}
clSVMFree(handle, ptr);
svmFunctions.fn_clSVMAlloc = clSVMAlloc;
svmFunctions.fn_clSVMFree = clSVMFree;
svmFunctions.fn_clSetKernelArgSVMPointer = clSetKernelArgSVMPointer;
//svmFunctions.fn_clSetKernelExecInfo = clSetKernelExecInfo;
//svmFunctions.fn_clEnqueueSVMFree = clEnqueueSVMFree;
svmFunctions.fn_clEnqueueSVMMemcpy = clEnqueueSVMMemcpy;
svmFunctions.fn_clEnqueueSVMMemFill = clEnqueueSVMMemFill;
svmFunctions.fn_clEnqueueSVMMap = clEnqueueSVMMap;
svmFunctions.fn_clEnqueueSVMUnmap = clEnqueueSVMUnmap;
}
catch (...)
{
CV_OPENCL_SVM_TRACE_P("clSVMAlloc failed, trying HSA extension...\n");
try
{
// Try HSA extension
String extensions = device.extensions();
if (extensions.find("cl_amd_svm") == String::npos)
{
CV_OPENCL_SVM_TRACE_P("Device extension doesn't have cl_amd_svm: %s\n", extensions.c_str());
goto noSVM;
}
cl_platform_id p = NULL;
status = clGetDeviceInfo((cl_device_id)device.ptr(), CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &p, NULL);
CV_Assert(status == CL_SUCCESS);
svmFunctions.fn_clSVMAlloc = (clSVMAllocAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSVMAllocAMD");
svmFunctions.fn_clSVMFree = (clSVMFreeAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSVMFreeAMD");
svmFunctions.fn_clSetKernelArgSVMPointer = (clSetKernelArgSVMPointerAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSetKernelArgSVMPointerAMD");
//svmFunctions.fn_clSetKernelExecInfo = (clSetKernelExecInfoAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSetKernelExecInfoAMD");
//svmFunctions.fn_clEnqueueSVMFree = (clEnqueueSVMFreeAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMFreeAMD");
svmFunctions.fn_clEnqueueSVMMemcpy = (clEnqueueSVMMemcpyAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMemcpyAMD");
svmFunctions.fn_clEnqueueSVMMemFill = (clEnqueueSVMMemFillAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMemFillAMD");
svmFunctions.fn_clEnqueueSVMMap = (clEnqueueSVMMapAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMapAMD");
svmFunctions.fn_clEnqueueSVMUnmap = (clEnqueueSVMUnmapAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMUnmapAMD");
CV_Assert(svmFunctions.isValid());
}
catch (...)
{
CV_OPENCL_SVM_TRACE_P("Something is totally wrong\n");
goto noSVM;
}
}
svmAvailable = true;
svmEnabled = !svm::checkDisableSVM();
svmInitialized = true;
CV_OPENCL_SVM_TRACE_P("OpenCV OpenCL SVM support initialized\n");
return;
noSVM:
CV_OPENCL_SVM_TRACE_P("OpenCL SVM is not detected\n");
svmAvailable = false;
svmEnabled = false;
svmCapabilities.value_ = 0;
svmInitialized = true;
svmFunctions.fn_clSVMAlloc = NULL;
return;
}
#endif
};
......@@ -2610,6 +2853,71 @@ Program Context::getProg(const ProgramSource& prog,
return p ? p->getProg(prog, buildopts, errmsg) : Program();
}
#ifdef HAVE_OPENCL_SVM
bool Context::useSVM() const
{
Context::Impl* i = p;
CV_Assert(i);
if (!i->svmInitialized)
i->svmInit();
return i->svmEnabled;
}
void Context::setUseSVM(bool enabled)
{
Context::Impl* i = p;
CV_Assert(i);
if (!i->svmInitialized)
i->svmInit();
if (enabled && !i->svmAvailable)
{
CV_ErrorNoReturn(Error::StsError, "OpenCL Shared Virtual Memory (SVM) is not supported by OpenCL device");
}
i->svmEnabled = enabled;
}
#else
bool Context::useSVM() const { return false; }
void Context::setUseSVM(bool enabled) { CV_Assert(!enabled); }
#endif
#ifdef HAVE_OPENCL_SVM
namespace svm {
const SVMCapabilities getSVMCapabilitites(const ocl::Context& context)
{
Context::Impl* i = context.p;
CV_Assert(i);
if (!i->svmInitialized)
i->svmInit();
return i->svmCapabilities;
}
CV_EXPORTS const SVMFunctions* getSVMFunctions(const ocl::Context& context)
{
Context::Impl* i = context.p;
CV_Assert(i);
CV_Assert(i->svmInitialized); // getSVMCapabilitites() must be called first
CV_Assert(i->svmFunctions.fn_clSVMAlloc != NULL);
return &i->svmFunctions;
}
CV_EXPORTS bool useSVM(UMatUsageFlags usageFlags)
{
if (checkForceSVMUmatUsage())
return true;
if (checkDisableSVMUMatUsage())
return false;
if ((usageFlags & USAGE_ALLOCATE_SHARED_MEMORY) != 0)
return true;
return false; // don't use SVM by default
}
} // namespace cv::ocl::svm
#endif // HAVE_OPENCL_SVM
void initializeContextFromHandle(Context& ctx, void* platform, void* _context, void* _device)
{
cl_context context = (cl_context)_context;
......@@ -2979,12 +3287,33 @@ int Kernel::set(int i, const KernelArg& arg)
return -1;
}
#ifdef HAVE_OPENCL_SVM
if ((arg.m->u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
{
const Context& ctx = Context::getDefault();
const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
uchar*& svmDataPtr = (uchar*&)arg.m->u->handle;
CV_OPENCL_SVM_TRACE_P("clSetKernelArgSVMPointer: %p\n", svmDataPtr);
#if 1 // TODO
cl_int status = svmFns->fn_clSetKernelArgSVMPointer(p->handle, (cl_uint)i, svmDataPtr);
#else
cl_int status = svmFns->fn_clSetKernelArgSVMPointer(p->handle, (cl_uint)i, &svmDataPtr);
#endif
CV_Assert(status == CL_SUCCESS);
}
else
#endif
{
CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h) == CL_SUCCESS);
}
if (ptronly)
CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i++, sizeof(h), &h) == CL_SUCCESS);
{
i++;
}
else if( arg.m->dims <= 2 )
{
UMat2D u2d(*arg.m);
CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h) == CL_SUCCESS);
CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u2d.step), &u2d.step) == CL_SUCCESS);
CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u2d.offset), &u2d.offset) == CL_SUCCESS);
i += 3;
......@@ -3000,7 +3329,6 @@ int Kernel::set(int i, const KernelArg& arg)
else
{
UMat3D u3d(*arg.m);
CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h) == CL_SUCCESS);
CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.slicestep), &u3d.slicestep) == CL_SUCCESS);
CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.step), &u3d.step) == CL_SUCCESS);
CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+3), sizeof(u3d.offset), &u3d.offset) == CL_SUCCESS);
......@@ -3433,39 +3761,55 @@ ProgramSource::hash_t ProgramSource::hash() const
//////////////////////////////////////////// OpenCLAllocator //////////////////////////////////////////////////
template<typename T>
class OpenCLBufferPool
{
protected:
~OpenCLBufferPool() { }
public:
virtual cl_mem allocate(size_t size, CV_OUT size_t& capacity) = 0;
virtual void release(cl_mem handle, size_t capacity) = 0;
virtual T allocate(size_t size) = 0;
virtual void release(T buffer) = 0;
};
class OpenCLBufferPoolImpl : public BufferPoolController, public OpenCLBufferPool
template <typename Derived, typename BufferEntry, typename T>
class OpenCLBufferPoolBaseImpl : public BufferPoolController, public OpenCLBufferPool<T>
{
public:
struct BufferEntry
{
cl_mem clBuffer_;
size_t capacity_;
};
private:
inline Derived& derived() { return *static_cast<Derived*>(this); }
protected:
Mutex mutex_;
size_t currentReservedSize;
size_t maxReservedSize;
std::list<BufferEntry> reservedEntries_; // LRU order
std::list<BufferEntry> allocatedEntries_; // Allocated and used entries
std::list<BufferEntry> reservedEntries_; // LRU order. Allocated, but not used entries
// synchronized
bool _findAndRemoveEntryFromAllocatedList(CV_OUT BufferEntry& entry, T buffer)
{
typename std::list<BufferEntry>::iterator i = allocatedEntries_.begin();
for (; i != allocatedEntries_.end(); ++i)
{
BufferEntry& e = *i;
if (e.clBuffer_ == buffer)
{
entry = e;
allocatedEntries_.erase(i);
return true;
}
}
return false;
}
// synchronized
bool _findAndRemoveEntryFromReservedList(CV_OUT BufferEntry& entry, const size_t size)
{
if (reservedEntries_.empty())
return false;
std::list<BufferEntry>::iterator i = reservedEntries_.begin();
std::list<BufferEntry>::iterator result_pos = reservedEntries_.end();
BufferEntry result = {NULL, 0};
typename std::list<BufferEntry>::iterator i = reservedEntries_.begin();
typename std::list<BufferEntry>::iterator result_pos = reservedEntries_.end();
BufferEntry result;
size_t minDiff = (size_t)(-1);
for (; i != reservedEntries_.end(); ++i)
{
......@@ -3489,6 +3833,7 @@ protected:
reservedEntries_.erase(result_pos);
entry = result;
currentReservedSize -= entry.capacity_;
allocatedEntries_.push_back(entry);
return true;
}
return false;
......@@ -3503,7 +3848,7 @@ protected:
const BufferEntry& entry = reservedEntries_.back();
CV_DbgAssert(currentReservedSize >= entry.capacity_);
currentReservedSize -= entry.capacity_;
_releaseBufferEntry(entry);
derived()._releaseBufferEntry(entry);
reservedEntries_.pop_back();
}
}
......@@ -3523,72 +3868,45 @@ protected:
return 1024*1024;
}
void _allocateBufferEntry(BufferEntry& entry, size_t size)
{
CV_DbgAssert(entry.clBuffer_ == NULL);
entry.capacity_ = alignSize(size, (int)_allocationGranularity(size));
Context& ctx = Context::getDefault();
cl_int retval = CL_SUCCESS;
entry.clBuffer_ = clCreateBuffer((cl_context)ctx.ptr(), CL_MEM_READ_WRITE, entry.capacity_, 0, &retval);
CV_Assert(retval == CL_SUCCESS);
CV_Assert(entry.clBuffer_ != NULL);
if(retval == CL_SUCCESS)
{
CV_IMPL_ADD(CV_IMPL_OCL);
}
LOG_BUFFER_POOL("OpenCL allocate %lld (0x%llx) bytes: %p\n",
(long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_);
}
void _releaseBufferEntry(const BufferEntry& entry)
{
CV_Assert(entry.capacity_ != 0);
CV_Assert(entry.clBuffer_ != NULL);
LOG_BUFFER_POOL("OpenCL release buffer: %p, %lld (0x%llx) bytes\n",
entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_);
clReleaseMemObject(entry.clBuffer_);
}
public:
OpenCLBufferPoolImpl()
: currentReservedSize(0), maxReservedSize(0)
OpenCLBufferPoolBaseImpl()
: currentReservedSize(0),
maxReservedSize(0)
{
int poolSize = ocl::Device::getDefault().isIntel() ? 1 << 27 : 0;
maxReservedSize = getConfigurationParameterForSize("OPENCV_OPENCL_BUFFERPOOL_LIMIT", poolSize);
// nothing
}
virtual ~OpenCLBufferPoolImpl()
virtual ~OpenCLBufferPoolBaseImpl()
{
freeAllReservedBuffers();
CV_Assert(reservedEntries_.empty());
}
public:
virtual cl_mem allocate(size_t size, CV_OUT size_t& capacity)
virtual T allocate(size_t size)
{
BufferEntry entry = {NULL, 0};
if (maxReservedSize > 0)
AutoLock locker(mutex_);
BufferEntry entry;
if (maxReservedSize > 0 && _findAndRemoveEntryFromReservedList(entry, size))
{
AutoLock locker(mutex_);
if (_findAndRemoveEntryFromReservedList(entry, size))
{
CV_DbgAssert(size <= entry.capacity_);
LOG_BUFFER_POOL("Reuse reserved buffer: %p\n", entry.clBuffer_);
capacity = entry.capacity_;
return entry.clBuffer_;
}
CV_DbgAssert(size <= entry.capacity_);
LOG_BUFFER_POOL("Reuse reserved buffer: %p\n", entry.clBuffer_);
}
else
{
derived()._allocateBufferEntry(entry, size);
}
_allocateBufferEntry(entry, size);
capacity = entry.capacity_;
return entry.clBuffer_;
}
virtual void release(cl_mem handle, size_t capacity)
virtual void release(T buffer)
{
BufferEntry entry = {handle, capacity};
AutoLock locker(mutex_);
BufferEntry entry;
CV_Assert(_findAndRemoveEntryFromAllocatedList(entry, buffer));
if (maxReservedSize == 0 || entry.capacity_ > maxReservedSize / 8)
{
_releaseBufferEntry(entry);
derived()._releaseBufferEntry(entry);
}
else
{
AutoLock locker(mutex_);
reservedEntries_.push_front(entry);
currentReservedSize += entry.capacity_;
_checkSizeOfReservedEntries();
......@@ -3604,7 +3922,7 @@ public:
maxReservedSize = size;
if (maxReservedSize < oldMaxReservedSize)
{
std::list<BufferEntry>::iterator i = reservedEntries_.begin();
typename std::list<BufferEntry>::iterator i = reservedEntries_.begin();
for (; i != reservedEntries_.end();)
{
const BufferEntry& entry = *i;
......@@ -3612,7 +3930,7 @@ public:
{
CV_DbgAssert(currentReservedSize >= entry.capacity_);
currentReservedSize -= entry.capacity_;
_releaseBufferEntry(entry);
derived()._releaseBufferEntry(entry);
i = reservedEntries_.erase(i);
continue;
}
......@@ -3624,16 +3942,123 @@ public:
virtual void freeAllReservedBuffers()
{
AutoLock locker(mutex_);
std::list<BufferEntry>::const_iterator i = reservedEntries_.begin();
typename std::list<BufferEntry>::const_iterator i = reservedEntries_.begin();
for (; i != reservedEntries_.end(); ++i)
{
const BufferEntry& entry = *i;
_releaseBufferEntry(entry);
derived()._releaseBufferEntry(entry);
}
reservedEntries_.clear();
}
};
struct CLBufferEntry
{
cl_mem clBuffer_;
size_t capacity_;
CLBufferEntry() : clBuffer_((cl_mem)NULL), capacity_(0) { }
};
class OpenCLBufferPoolImpl : public OpenCLBufferPoolBaseImpl<OpenCLBufferPoolImpl, CLBufferEntry, cl_mem>
{
public:
typedef struct CLBufferEntry BufferEntry;
protected:
int createFlags_;
public:
OpenCLBufferPoolImpl(int createFlags = 0)
: createFlags_(createFlags)
{
}
void _allocateBufferEntry(BufferEntry& entry, size_t size)
{
CV_DbgAssert(entry.clBuffer_ == NULL);
entry.capacity_ = alignSize(size, (int)_allocationGranularity(size));
Context& ctx = Context::getDefault();
cl_int retval = CL_SUCCESS;
entry.clBuffer_ = clCreateBuffer((cl_context)ctx.ptr(), CL_MEM_READ_WRITE|createFlags_, entry.capacity_, 0, &retval);
CV_Assert(retval == CL_SUCCESS);
CV_Assert(entry.clBuffer_ != NULL);
if(retval == CL_SUCCESS)
{
CV_IMPL_ADD(CV_IMPL_OCL);
}
LOG_BUFFER_POOL("OpenCL allocate %lld (0x%llx) bytes: %p\n",
(long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_);
allocatedEntries_.push_back(entry);
}
void _releaseBufferEntry(const BufferEntry& entry)
{
CV_Assert(entry.capacity_ != 0);
CV_Assert(entry.clBuffer_ != NULL);
LOG_BUFFER_POOL("OpenCL release buffer: %p, %lld (0x%llx) bytes\n",
entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_);
clReleaseMemObject(entry.clBuffer_);
}
};
#ifdef HAVE_OPENCL_SVM
struct CLSVMBufferEntry
{
void* clBuffer_;
size_t capacity_;
CLSVMBufferEntry() : clBuffer_(NULL), capacity_(0) { }
};
class OpenCLSVMBufferPoolImpl : public OpenCLBufferPoolBaseImpl<OpenCLSVMBufferPoolImpl, CLSVMBufferEntry, void*>
{
public:
typedef struct CLSVMBufferEntry BufferEntry;
public:
OpenCLSVMBufferPoolImpl()
{
}
void _allocateBufferEntry(BufferEntry& entry, size_t size)
{
CV_DbgAssert(entry.clBuffer_ == NULL);
entry.capacity_ = alignSize(size, (int)_allocationGranularity(size));
Context& ctx = Context::getDefault();
const svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx);
bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
cl_svm_mem_flags memFlags = CL_MEM_READ_WRITE |
(isFineGrainBuffer ? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0);
const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
CV_DbgAssert(svmFns->isValid());
CV_OPENCL_SVM_TRACE_P("clSVMAlloc: %d\n", (int)entry.capacity_);
void *buf = svmFns->fn_clSVMAlloc((cl_context)ctx.ptr(), memFlags, entry.capacity_, 0);
CV_Assert(buf);
entry.clBuffer_ = buf;
{
CV_IMPL_ADD(CV_IMPL_OCL);
}
LOG_BUFFER_POOL("OpenCL SVM allocate %lld (0x%llx) bytes: %p\n",
(long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_);
allocatedEntries_.push_back(entry);
}
void _releaseBufferEntry(const BufferEntry& entry)
{
CV_Assert(entry.capacity_ != 0);
CV_Assert(entry.clBuffer_ != NULL);
LOG_BUFFER_POOL("OpenCL release SVM buffer: %p, %lld (0x%llx) bytes\n",
entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_);
Context& ctx = Context::getDefault();
const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
CV_DbgAssert(svmFns->isValid());
CV_OPENCL_SVM_TRACE_P("clSVMFree: %p\n", entry.clBuffer_);
svmFns->fn_clSVMFree((cl_context)ctx.ptr(), entry.clBuffer_);
}
};
#endif
#if defined _MSC_VER
#pragma warning(disable:4127) // conditional expression is constant
#endif
......@@ -3697,12 +4122,37 @@ private:
class OpenCLAllocator : public MatAllocator
{
mutable OpenCLBufferPoolImpl bufferPool;
mutable OpenCLBufferPoolImpl bufferPoolHostPtr;
#ifdef HAVE_OPENCL_SVM
mutable OpenCLSVMBufferPoolImpl bufferPoolSVM;
#endif
enum AllocatorFlags
{
ALLOCATOR_FLAGS_BUFFER_POOL_USED = 1 << 0
ALLOCATOR_FLAGS_BUFFER_POOL_USED = 1 << 0,
ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED = 1 << 1
#ifdef HAVE_OPENCL_SVM
,ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED = 1 << 2
#endif
};
public:
OpenCLAllocator() { matStdAllocator = Mat::getStdAllocator(); }
OpenCLAllocator()
: bufferPool(0),
bufferPoolHostPtr(CL_MEM_ALLOC_HOST_PTR)
{
size_t defaultPoolSize, poolSize;
defaultPoolSize = ocl::Device::getDefault().isIntel() ? 1 << 27 : 0;
poolSize = getConfigurationParameterForSize("OPENCV_OPENCL_BUFFERPOOL_LIMIT", defaultPoolSize);
bufferPool.setMaxReservedSize(poolSize);
poolSize = getConfigurationParameterForSize("OPENCV_OPENCL_HOST_PTR_BUFFERPOOL_LIMIT", defaultPoolSize);
bufferPoolHostPtr.setMaxReservedSize(poolSize);
#ifdef HAVE_OPENCL_SVM
poolSize = getConfigurationParameterForSize("OPENCV_OPENCL_SVM_BUFFERPOOL_LIMIT", defaultPoolSize);
bufferPoolSVM.setMaxReservedSize(poolSize);
#endif
matStdAllocator = Mat::getStdAllocator();
}
UMatData* defaultAllocate(int dims, const int* sizes, int type, void* data, size_t* step,
int flags, UMatUsageFlags usageFlags) const
......@@ -3739,33 +4189,47 @@ public:
}
Context& ctx = Context::getDefault();
int createFlags = 0, flags0 = 0;
getBestFlags(ctx, flags, usageFlags, createFlags, flags0);
size_t capacity = 0;
void* handle = NULL;
int allocatorFlags = 0;
#ifdef HAVE_OPENCL_SVM
const svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx);
if (ctx.useSVM() && svm::useSVM(usageFlags) && !svmCaps.isNoSVMSupport())
{
allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED;
handle = bufferPoolSVM.allocate(total);
// this property is constant, so single buffer pool can be used here
bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
allocatorFlags |= isFineGrainBuffer ? svm::OPENCL_SVM_FINE_GRAIN_BUFFER : svm::OPENCL_SVM_COARSE_GRAIN_BUFFER;
}
else
#endif
if (createFlags == 0)
{
handle = bufferPool.allocate(total, capacity);
if (!handle)
return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_USED;
handle = bufferPool.allocate(total);
}
else if (createFlags == CL_MEM_ALLOC_HOST_PTR)
{
allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED;
handle = bufferPoolHostPtr.allocate(total);
}
else
{
capacity = total;
cl_int retval = 0;
handle = clCreateBuffer((cl_context)ctx.ptr(),
CL_MEM_READ_WRITE|createFlags, total, 0, &retval);
if( !handle || retval != CL_SUCCESS )
return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
CV_IMPL_ADD(CV_IMPL_OCL)
CV_Assert(handle != NULL); // Unsupported, throw
}
if (!handle)
return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
UMatData* u = new UMatData(this);
u->data = 0;
u->size = total;
u->capacity = capacity;
u->handle = handle;
u->flags = flags0;
u->allocatorFlags_ = allocatorFlags;
......@@ -3788,22 +4252,81 @@ public:
getBestFlags(ctx, accessFlags, usageFlags, createFlags, flags0);
cl_context ctx_handle = (cl_context)ctx.ptr();
cl_int retval = 0;
int tempUMatFlags = UMatData::TEMP_UMAT;
u->handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|CL_MEM_READ_WRITE,
u->size, u->origdata, &retval);
if((!u->handle || retval != CL_SUCCESS) && !(accessFlags & ACCESS_FAST))
int allocatorFlags = 0;
int tempUMatFlags = 0;
void* handle = NULL;
cl_int retval = CL_SUCCESS;
#ifdef HAVE_OPENCL_SVM
svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx);
bool useSVM = ctx.useSVM() && svm::useSVM(usageFlags);
if (useSVM && svmCaps.isSupportFineGrainSystem())
{
u->handle = clCreateBuffer(ctx_handle, CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags,
u->size, u->origdata, &retval);
tempUMatFlags = UMatData::TEMP_COPIED_UMAT;
allocatorFlags = svm::OPENCL_SVM_FINE_GRAIN_SYSTEM;
tempUMatFlags = UMatData::TEMP_UMAT;
handle = u->origdata;
CV_OPENCL_SVM_TRACE_P("Use fine grain system: %d (%p)\n", (int)u->size, handle);
}
else if (useSVM && (svmCaps.isSupportFineGrainBuffer() || svmCaps.isSupportCoarseGrainBuffer()))
{
if (!(accessFlags & ACCESS_FAST)) // memcpy used
{
bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
cl_svm_mem_flags memFlags = createFlags |
(isFineGrainBuffer ? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0);
const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
CV_DbgAssert(svmFns->isValid());
CV_OPENCL_SVM_TRACE_P("clSVMAlloc + copy: %d\n", (int)u->size);
handle = svmFns->fn_clSVMAlloc((cl_context)ctx.ptr(), memFlags, u->size, 0);
CV_Assert(handle);
cl_command_queue q = NULL;
if (!isFineGrainBuffer)
{
q = (cl_command_queue)Queue::getDefault().ptr();
CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", handle, (int)u->size);
cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_TRUE, CL_MAP_WRITE,
handle, u->size,
0, NULL, NULL);
CV_Assert(status == CL_SUCCESS);
}
memcpy(handle, u->origdata, u->size);
if (!isFineGrainBuffer)
{
CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", handle);
cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, handle, 0, NULL, NULL);
CV_Assert(status == CL_SUCCESS);
}
tempUMatFlags = UMatData::TEMP_UMAT | UMatData::TEMP_COPIED_UMAT;
allocatorFlags |= isFineGrainBuffer ? svm::OPENCL_SVM_FINE_GRAIN_BUFFER
: svm::OPENCL_SVM_COARSE_GRAIN_BUFFER;
}
}
else
#endif
{
tempUMatFlags = UMatData::TEMP_UMAT;
handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|createFlags,
u->size, u->origdata, &retval);
if((!handle || retval < 0) && !(accessFlags & ACCESS_FAST))
{
handle = clCreateBuffer(ctx_handle, CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags,
u->size, u->origdata, &retval);
tempUMatFlags |= UMatData::TEMP_COPIED_UMAT;
}
}
if(!u->handle || retval != CL_SUCCESS)
if(!handle || retval != CL_SUCCESS)
return false;
u->handle = handle;
u->prevAllocator = u->currAllocator;
u->currAllocator = this;
u->flags |= tempUMatFlags;
u->allocatorFlags_ = allocatorFlags;
}
if(accessFlags & ACCESS_WRITE)
u->markHostCopyObsolete(true);
......@@ -3848,34 +4371,93 @@ public:
CV_Assert(u->urefcount >= 0);
CV_Assert(u->refcount >= 0);
// TODO: !!! when we add Shared Virtual Memory Support,
// this function (as well as the others) should be corrected
CV_Assert(u->handle != 0 && u->urefcount == 0);
if(u->tempUMat())
{
// UMatDataAutoLock lock(u);
if( u->hostCopyObsolete() && u->refcount > 0 )
{
cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
if( u->tempCopiedUMat() )
#ifdef HAVE_OPENCL_SVM
if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
{
AlignedDataPtr<false, true> alignedPtr(u->origdata, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
CV_OclDbgAssert(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
u->size, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS);
Context& ctx = Context::getDefault();
const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
CV_DbgAssert(svmFns->isValid());
if( u->tempCopiedUMat() )
{
CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
(u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER);
bool isFineGrainBuffer = (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER;
cl_command_queue q = NULL;
if (!isFineGrainBuffer)
{
CV_DbgAssert(((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0));
q = (cl_command_queue)Queue::getDefault().ptr();
CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ,
u->handle, u->size,
0, NULL, NULL);
CV_Assert(status == CL_SUCCESS);
}
clFinish(q);
memcpy(u->origdata, u->handle, u->size);
if (!isFineGrainBuffer)
{
CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle, 0, NULL, NULL);
CV_Assert(status == CL_SUCCESS);
}
}
else
{
CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM);
// nothing
}
}
else
#endif
{
cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
if( u->tempCopiedUMat() )
{
AlignedDataPtr<false, true> alignedPtr(u->origdata, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
CV_OclDbgAssert(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
u->size, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS);
}
else
{
// TODO Is it really needed for clCreateBuffer with CL_MEM_USE_HOST_PTR?
cl_int retval = 0;
void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
(CL_MAP_READ | CL_MAP_WRITE),
0, u->size, 0, 0, 0, &retval);
CV_OclDbgAssert(retval == CL_SUCCESS);
CV_OclDbgAssert(clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0) == CL_SUCCESS);
CV_OclDbgAssert(clFinish(q) == CL_SUCCESS);
}
}
u->markHostCopyObsolete(false);
}
#ifdef HAVE_OPENCL_SVM
if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
{
if( u->tempCopiedUMat() )
{
cl_int retval = 0;
void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
(CL_MAP_READ | CL_MAP_WRITE),
0, u->size, 0, 0, 0, &retval);
CV_OclDbgAssert(retval == CL_SUCCESS);
CV_OclDbgAssert(clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0) == CL_SUCCESS);
CV_OclDbgAssert(clFinish(q) == CL_SUCCESS);
Context& ctx = Context::getDefault();
const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
CV_DbgAssert(svmFns->isValid());
CV_OPENCL_SVM_TRACE_P("clSVMFree: %p\n", u->handle);
svmFns->fn_clSVMFree((cl_context)ctx.ptr(), u->handle);
}
}
u->markHostCopyObsolete(false);
clReleaseMemObject((cl_mem)u->handle);
else
#endif
{
clReleaseMemObject((cl_mem)u->handle);
}
u->handle = 0;
u->currAllocator = u->prevAllocator;
if(u->data && u->copyOnMap() && !(u->flags & UMatData::USER_ALLOCATED))
......@@ -3894,14 +4476,42 @@ public:
}
if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_USED)
{
bufferPool.release((cl_mem)u->handle, u->capacity);
bufferPool.release((cl_mem)u->handle);
}
else if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED)
{
bufferPoolHostPtr.release((cl_mem)u->handle);
}
#ifdef HAVE_OPENCL_SVM
else if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED)
{
if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
{
//nothing
}
else if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
(u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
{
Context& ctx = Context::getDefault();
const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
CV_DbgAssert(svmFns->isValid());
cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) != 0)
{
CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle, 0, NULL, NULL);
CV_Assert(status == CL_SUCCESS);
}
}
bufferPoolSVM.release((void*)u->handle);
}
#endif
else
{
clReleaseMemObject((cl_mem)u->handle);
}
u->handle = 0;
u->capacity = 0;
delete u;
}
}
......@@ -3925,13 +4535,41 @@ public:
{
if( !u->copyOnMap() )
{
// TODO
// because there can be other map requests for the same UMat with different access flags,
// we use the universal (read-write) access mode.
#ifdef HAVE_OPENCL_SVM
if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
{
if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
{
Context& ctx = Context::getDefault();
const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
CV_DbgAssert(svmFns->isValid());
if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0)
{
CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ | CL_MAP_WRITE,
u->handle, u->size,
0, NULL, NULL);
CV_Assert(status == CL_SUCCESS);
u->allocatorFlags_ |= svm::OPENCL_SVM_BUFFER_MAP;
}
}
clFinish(q);
u->data = (uchar*)u->handle;
u->markHostCopyObsolete(false);
u->markDeviceMemMapped(true);
return;
}
#endif
if (u->data) // FIXIT Workaround for UMat synchronization issue
{
//CV_Assert(u->hostCopyObsolete() == false);
return;
}
// because there can be other map requests for the same UMat with different access flags,
// we use the universal (read-write) access mode.
cl_int retval = 0;
u->data = (uchar*)clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
(CL_MAP_READ | CL_MAP_WRITE),
......@@ -3943,6 +4581,7 @@ public:
return;
}
// TODO Is it really a good idea and was it tested well?
// if map failed, switch to copy-on-map mode for the particular buffer
u->flags |= UMatData::COPY_ON_MAP;
}
......@@ -3957,6 +4596,9 @@ public:
if( (accessFlags & ACCESS_READ) != 0 && u->hostCopyObsolete() )
{
AlignedDataPtr<false, true> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
#ifdef HAVE_OPENCL_SVM
CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == 0);
#endif
CV_Assert( clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
u->size, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS );
u->markHostCopyObsolete(false);
......@@ -3983,6 +4625,31 @@ public:
{
CV_Assert(u->data != NULL);
u->markDeviceMemMapped(false);
#ifdef HAVE_OPENCL_SVM
if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
{
if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
{
Context& ctx = Context::getDefault();
const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
CV_DbgAssert(svmFns->isValid());
CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) != 0);
{
CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle,
0, NULL, NULL);
CV_Assert(status == CL_SUCCESS);
clFinish(q);
u->allocatorFlags_ &= ~svm::OPENCL_SVM_BUFFER_MAP;
}
}
u->data = 0;
u->markDeviceCopyObsolete(false);
u->markHostCopyObsolete(false);
return;
}
#endif
CV_Assert( (retval = clEnqueueUnmapMemObject(q,
(cl_mem)u->handle, u->data, 0, 0, 0)) == CL_SUCCESS );
if (Device::getDefault().isAMD())
......@@ -3995,6 +4662,9 @@ public:
else if( u->copyOnMap() && u->deviceCopyObsolete() )
{
AlignedDataPtr<true, false> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
#ifdef HAVE_OPENCL_SVM
CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == 0);
#endif
CV_Assert( (retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
u->size, alignedPtr.getAlignedPtr(), 0, 0, 0)) == CL_SUCCESS );
}
......@@ -4102,17 +4772,78 @@ public:
srcrawofs, new_srcofs, new_srcstep,
dstrawofs, new_dstofs, new_dststep);
AlignedDataPtr<false, true> alignedPtr((uchar*)dstptr, sz[0] * dststep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
if( iscontinuous )
#ifdef HAVE_OPENCL_SVM
if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
{
CV_Assert( clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
srcrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS );
CV_DbgAssert(u->data == NULL || u->data == u->handle);
Context& ctx = Context::getDefault();
const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
CV_DbgAssert(svmFns->isValid());
CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0);
if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
{
CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ,
u->handle, u->size,
0, NULL, NULL);
CV_Assert(status == CL_SUCCESS);
}
clFinish(q);
if( iscontinuous )
{
memcpy(dstptr, (uchar*)u->handle + srcrawofs, total);
}
else
{
// This code is from MatAllocator::download()
int isz[CV_MAX_DIM];
uchar* srcptr = (uchar*)u->handle;
for( int i = 0; i < dims; i++ )
{
CV_Assert( sz[i] <= (size_t)INT_MAX );
if( sz[i] == 0 )
return;
if( srcofs )
srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1);
isz[i] = (int)sz[i];
}
Mat src(dims, isz, CV_8U, srcptr, srcstep);
Mat dst(dims, isz, CV_8U, dstptr, dststep);
const Mat* arrays[] = { &src, &dst };
uchar* ptrs[2];
NAryMatIterator it(arrays, ptrs, 2);
size_t j, planesz = it.size;
for( j = 0; j < it.nplanes; j++, ++it )
memcpy(ptrs[1], ptrs[0], planesz);
}
if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
{
CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle,
0, NULL, NULL);
CV_Assert(status == CL_SUCCESS);
clFinish(q);
}
}
else
#endif
{
CV_Assert( clEnqueueReadBufferRect(q, (cl_mem)u->handle, CL_TRUE,
new_srcofs, new_dstofs, new_sz, new_srcstep[0], new_srcstep[1],
new_dststep[0], new_dststep[1], alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS );
AlignedDataPtr<false, true> alignedPtr((uchar*)dstptr, sz[0] * dststep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
if( iscontinuous )
{
CV_Assert( clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
srcrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0) >= 0 );
}
else
{
CV_Assert( clEnqueueReadBufferRect(q, (cl_mem)u->handle, CL_TRUE,
new_srcofs, new_dstofs, new_sz, new_srcstep[0], new_srcstep[1],
new_dststep[0], new_dststep[1], alignedPtr.getAlignedPtr(), 0, 0, 0) >= 0 );
}
}
}
......@@ -4153,20 +4884,91 @@ public:
CV_Assert( u->handle != 0 );
cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
AlignedDataPtr<true, false> alignedPtr((uchar*)srcptr, sz[0] * srcstep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
if( iscontinuous )
#ifdef HAVE_OPENCL_SVM
if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
{
CV_Assert( clEnqueueWriteBuffer(q, (cl_mem)u->handle,
CL_TRUE, dstrawofs, total, srcptr, 0, 0, 0) == CL_SUCCESS );
CV_DbgAssert(u->data == NULL || u->data == u->handle);
Context& ctx = Context::getDefault();
const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
CV_DbgAssert(svmFns->isValid());
CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0);
if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
{
CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_WRITE,
u->handle, u->size,
0, NULL, NULL);
CV_Assert(status == CL_SUCCESS);
}
clFinish(q);
if( iscontinuous )
{
memcpy((uchar*)u->handle + dstrawofs, srcptr, total);
}
else
{
// This code is from MatAllocator::upload()
int isz[CV_MAX_DIM];
uchar* dstptr = (uchar*)u->handle;
for( int i = 0; i < dims; i++ )
{
CV_Assert( sz[i] <= (size_t)INT_MAX );
if( sz[i] == 0 )
return;
if( dstofs )
dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1);
isz[i] = (int)sz[i];
}
Mat src(dims, isz, CV_8U, (void*)srcptr, srcstep);
Mat dst(dims, isz, CV_8U, dstptr, dststep);
const Mat* arrays[] = { &src, &dst };
uchar* ptrs[2];
NAryMatIterator it(arrays, ptrs, 2);
size_t j, planesz = it.size;
for( j = 0; j < it.nplanes; j++, ++it )
memcpy(ptrs[1], ptrs[0], planesz);
}
if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
{
CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle,
0, NULL, NULL);
CV_Assert(status == CL_SUCCESS);
clFinish(q);
}
}
else
#endif
{
CV_Assert( clEnqueueWriteBufferRect(q, (cl_mem)u->handle, CL_TRUE,
new_dstofs, new_srcofs, new_sz, new_dststep[0], new_dststep[1],
new_srcstep[0], new_srcstep[1], srcptr, 0, 0, 0) == CL_SUCCESS );
AlignedDataPtr<true, false> alignedPtr((uchar*)srcptr, sz[0] * srcstep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
if( iscontinuous )
{
CV_Assert( clEnqueueWriteBuffer(q, (cl_mem)u->handle,
CL_TRUE, dstrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0) >= 0 );
}
else
{
CV_Assert( clEnqueueWriteBufferRect(q, (cl_mem)u->handle, CL_TRUE,
new_dstofs, new_srcofs, new_sz, new_dststep[0], new_dststep[1],
new_srcstep[0], new_srcstep[1], alignedPtr.getAlignedPtr(), 0, 0, 0) >= 0 );
}
}
u->markHostCopyObsolete(true);
#ifdef HAVE_OPENCL_SVM
if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
(u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
{
// nothing
}
else
#endif
{
u->markHostCopyObsolete(true);
}
u->markDeviceCopyObsolete(false);
}
......@@ -4198,7 +5000,17 @@ public:
{
download(src, dst->data + dstrawofs, dims, sz, srcofs, srcstep, dststep);
dst->markHostCopyObsolete(false);
dst->markDeviceCopyObsolete(true);
#ifdef HAVE_OPENCL_SVM
if ((dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
(dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
{
// nothing
}
else
#endif
{
dst->markDeviceCopyObsolete(true);
}
return;
}
......@@ -4206,26 +5018,110 @@ public:
CV_Assert(dst->refcount == 0);
cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
cl_int retval;
if( iscontinuous )
cl_int retval = CL_SUCCESS;
#ifdef HAVE_OPENCL_SVM
if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0 ||
(dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
{
CV_Assert( (retval = clEnqueueCopyBuffer(q, (cl_mem)src->handle, (cl_mem)dst->handle,
srcrawofs, dstrawofs, total, 0, 0, 0)) == CL_SUCCESS );
if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0 &&
(dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
{
Context& ctx = Context::getDefault();
const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
CV_DbgAssert(svmFns->isValid());
if( iscontinuous )
{
CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMemcpy: %p <-- %p (%d)\n",
(uchar*)dst->handle + dstrawofs, (uchar*)src->handle + srcrawofs, (int)total);
cl_int status = svmFns->fn_clEnqueueSVMMemcpy(q, CL_TRUE,
(uchar*)dst->handle + dstrawofs, (uchar*)src->handle + srcrawofs,
total, 0, NULL, NULL);
CV_Assert(status == CL_SUCCESS);
}
else
{
clFinish(q);
// This code is from MatAllocator::download()/upload()
int isz[CV_MAX_DIM];
uchar* srcptr = (uchar*)src->handle;
for( int i = 0; i < dims; i++ )
{
CV_Assert( sz[i] <= (size_t)INT_MAX );
if( sz[i] == 0 )
return;
if( srcofs )
srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1);
isz[i] = (int)sz[i];
}
Mat m_src(dims, isz, CV_8U, srcptr, srcstep);
uchar* dstptr = (uchar*)dst->handle;
for( int i = 0; i < dims; i++ )
{
if( dstofs )
dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1);
}
Mat m_dst(dims, isz, CV_8U, dstptr, dststep);
const Mat* arrays[] = { &m_src, &m_dst };
uchar* ptrs[2];
NAryMatIterator it(arrays, ptrs, 2);
size_t j, planesz = it.size;
for( j = 0; j < it.nplanes; j++, ++it )
memcpy(ptrs[1], ptrs[0], planesz);
}
}
else
{
if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
{
map(src, ACCESS_READ);
upload(dst, src->data + srcrawofs, dims, sz, dstofs, dststep, srcstep);
unmap(src);
}
else
{
map(dst, ACCESS_WRITE);
download(src, dst->data + dstrawofs, dims, sz, srcofs, srcstep, dststep);
unmap(dst);
}
}
}
else
#endif
{
CV_Assert( (retval = clEnqueueCopyBufferRect(q, (cl_mem)src->handle, (cl_mem)dst->handle,
new_srcofs, new_dstofs, new_sz,
new_srcstep[0], new_srcstep[1],
new_dststep[0], new_dststep[1],
0, 0, 0)) == CL_SUCCESS );
if( iscontinuous )
{
CV_Assert( (retval = clEnqueueCopyBuffer(q, (cl_mem)src->handle, (cl_mem)dst->handle,
srcrawofs, dstrawofs, total, 0, 0, 0)) == CL_SUCCESS );
}
else
{
CV_Assert( (retval = clEnqueueCopyBufferRect(q, (cl_mem)src->handle, (cl_mem)dst->handle,
new_srcofs, new_dstofs, new_sz,
new_srcstep[0], new_srcstep[1],
new_dststep[0], new_dststep[1],
0, 0, 0)) == CL_SUCCESS );
}
}
if(retval == CL_SUCCESS)
if (retval == CL_SUCCESS)
{
CV_IMPL_ADD(CV_IMPL_OCL)
}
dst->markHostCopyObsolete(true);
#ifdef HAVE_OPENCL_SVM
if ((dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
(dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
{
// nothing
}
else
#endif
{
dst->markHostCopyObsolete(true);
}
dst->markDeviceCopyObsolete(false);
if( _sync )
......@@ -4234,7 +5130,23 @@ public:
}
}
BufferPoolController* getBufferPoolController() const { return &bufferPool; }
BufferPoolController* getBufferPoolController(const char* id) const {
#ifdef HAVE_OPENCL_SVM
if ((svm::checkForceSVMUmatUsage() && (id == NULL || strcmp(id, "OCL") == 0)) || (id != NULL && strcmp(id, "SVM") == 0))
{
return &bufferPoolSVM;
}
#endif
if (id != NULL && strcmp(id, "HOST_ALLOC") == 0)
{
return &bufferPoolHostPtr;
}
if (id != NULL && strcmp(id, "OCL") != 0)
{
CV_ErrorNoReturn(cv::Error::StsBadArg, "getBufferPoolController(): unknown BufferPool ID\n");
}
return &bufferPool;
}
MatAllocator* matStdAllocator;
};
......@@ -4818,7 +5730,7 @@ void* Image2D::ptr() const
return p ? p->handle : 0;
}
bool isPerformanceCheckBypassed()
bool internal::isPerformanceCheckBypassed()
{
static bool initialized = false;
static bool value = false;
......@@ -4830,4 +5742,22 @@ bool isPerformanceCheckBypassed()
return value;
}
bool internal::isCLBuffer(UMat& u)
{
void* h = u.handle(ACCESS_RW);
if (!h)
return true;
CV_DbgAssert(u.u->currAllocator == getOpenCLAllocator());
#if 1
if ((u.u->allocatorFlags_ & 0xffff0000) != 0) // OpenCL SVM flags are stored here
return false;
#else
cl_mem_object_type type = 0;
cl_int ret = clGetMemObjectInfo((cl_mem)h, CL_MEM_TYPE, sizeof(type), &type, NULL);
if (ret != CL_SUCCESS || type != CL_MEM_OBJECT_BUFFER)
return false;
#endif
return true;
}
}}
......@@ -182,6 +182,65 @@ static void* opencl_check_fn(int ID);
#define CUSTOM_FUNCTION_ID 1000
#ifdef HAVE_OPENCL_SVM
#include "opencv2/core/opencl/runtime/opencl_svm_20.hpp"
#define SVM_FUNCTION_ID_START CUSTOM_FUNCTION_ID
#define SVM_FUNCTION_ID_END CUSTOM_FUNCTION_ID + 100
enum OPENCL_FN_SVM_ID
{
OPENCL_FN_clSVMAlloc = SVM_FUNCTION_ID_START,
OPENCL_FN_clSVMFree,
OPENCL_FN_clSetKernelArgSVMPointer,
OPENCL_FN_clSetKernelExecInfo,
OPENCL_FN_clEnqueueSVMFree,
OPENCL_FN_clEnqueueSVMMemcpy,
OPENCL_FN_clEnqueueSVMMemFill,
OPENCL_FN_clEnqueueSVMMap,
OPENCL_FN_clEnqueueSVMUnmap,
};
void* (CL_API_CALL *clSVMAlloc)(cl_context context, cl_svm_mem_flags flags, size_t size, unsigned int alignment) =
opencl_fn4<OPENCL_FN_clSVMAlloc, void*, cl_context, cl_svm_mem_flags, size_t, unsigned int>::switch_fn;
static const struct DynamicFnEntry _clSVMAlloc_definition = { "clSVMAlloc", (void**)&clSVMAlloc};
void (CL_API_CALL *clSVMFree)(cl_context context, void* svm_pointer) =
opencl_fn2<OPENCL_FN_clSVMFree, void, cl_context, void*>::switch_fn;
static const struct DynamicFnEntry _clSVMFree_definition = { "clSVMFree", (void**)&clSVMFree};
cl_int (CL_API_CALL *clSetKernelArgSVMPointer)(cl_kernel kernel, cl_uint arg_index, const void* arg_value) =
opencl_fn3<OPENCL_FN_clSetKernelArgSVMPointer, cl_int, cl_kernel, cl_uint, const void*>::switch_fn;
static const struct DynamicFnEntry _clSetKernelArgSVMPointer_definition = { "clSetKernelArgSVMPointer", (void**)&clSetKernelArgSVMPointer};
//void* (CL_API_CALL *clSetKernelExecInfo)(cl_kernel kernel, cl_kernel_exec_info param_name, size_t param_value_size, const void* param_value) =
// opencl_fn4<OPENCL_FN_clSetKernelExecInfo, void*, cl_kernel, cl_kernel_exec_info, size_t, const void*>::switch_fn;
//static const struct DynamicFnEntry _clSetKernelExecInfo_definition = { "clSetKernelExecInfo", (void**)&clSetKernelExecInfo};
//cl_int (CL_API_CALL *clEnqueueSVMFree)(...) =
// opencl_fn8<OPENCL_FN_clEnqueueSVMFree, cl_int, ...>::switch_fn;
//static const struct DynamicFnEntry _clEnqueueSVMFree_definition = { "clEnqueueSVMFree", (void**)&clEnqueueSVMFree};
cl_int (CL_API_CALL *clEnqueueSVMMemcpy)(cl_command_queue command_queue, cl_bool blocking_copy, void* dst_ptr, const void* src_ptr, size_t size, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) =
opencl_fn8<OPENCL_FN_clEnqueueSVMMemcpy, cl_int, cl_command_queue, cl_bool, void*, const void*, size_t, cl_uint, const cl_event*, cl_event*>::switch_fn;
static const struct DynamicFnEntry _clEnqueueSVMMemcpy_definition = { "clEnqueueSVMMemcpy", (void**)&clEnqueueSVMMemcpy};
cl_int (CL_API_CALL *clEnqueueSVMMemFill)(cl_command_queue command_queue, void* svm_ptr, const void* pattern, size_t pattern_size, size_t size, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) =
opencl_fn8<OPENCL_FN_clEnqueueSVMMemFill, cl_int, cl_command_queue, void*, const void*, size_t, size_t, cl_uint, const cl_event*, cl_event*>::switch_fn;
static const struct DynamicFnEntry _clEnqueueSVMMemFill_definition = { "clEnqueueSVMMemFill", (void**)&clEnqueueSVMMemFill};
cl_int (CL_API_CALL *clEnqueueSVMMap)(cl_command_queue command_queue, cl_bool blocking_map, cl_map_flags map_flags, void* svm_ptr, size_t size, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) =
opencl_fn8<OPENCL_FN_clEnqueueSVMMap, cl_int, cl_command_queue, cl_bool, cl_map_flags, void*, size_t, cl_uint, const cl_event*, cl_event*>::switch_fn;
static const struct DynamicFnEntry _clEnqueueSVMMap_definition = { "clEnqueueSVMMap", (void**)&clEnqueueSVMMap};
cl_int (CL_API_CALL *clEnqueueSVMUnmap)(cl_command_queue command_queue, void* svm_ptr, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) =
opencl_fn5<OPENCL_FN_clEnqueueSVMUnmap, cl_int, cl_command_queue, void*, cl_uint, const cl_event*, cl_event*>::switch_fn;
static const struct DynamicFnEntry _clEnqueueSVMUnmap_definition = { "clEnqueueSVMUnmap", (void**)&clEnqueueSVMUnmap};
static const struct DynamicFnEntry* opencl_svm_fn_list[] = {
&_clSVMAlloc_definition,
&_clSVMFree_definition,
&_clSetKernelArgSVMPointer_definition,
NULL/*&_clSetKernelExecInfo_definition*/,
NULL/*&_clEnqueueSVMFree_definition*/,
&_clEnqueueSVMMemcpy_definition,
&_clEnqueueSVMMemFill_definition,
&_clEnqueueSVMMap_definition,
&_clEnqueueSVMUnmap_definition,
};
#endif // HAVE_OPENCL_SVM
//
// END OF CUSTOM FUNCTIONS HERE
//
......@@ -194,6 +253,14 @@ static void* opencl_check_fn(int ID)
assert(ID >= 0 && ID < (int)(sizeof(opencl_fn_list)/sizeof(opencl_fn_list[0])));
e = opencl_fn_list[ID];
}
#ifdef HAVE_OPENCL_SVM
else if (ID >= SVM_FUNCTION_ID_START && ID < SVM_FUNCTION_ID_END)
{
ID = ID - SVM_FUNCTION_ID_START;
assert(ID >= 0 && ID < (int)(sizeof(opencl_svm_fn_list)/sizeof(opencl_svm_fn_list[0])));
e = opencl_svm_fn_list[ID];
}
#endif
else
{
CV_ErrorNoReturn(cv::Error::StsBadArg, "Invalid function ID");
......
......@@ -55,7 +55,7 @@ UMatData::UMatData(const MatAllocator* allocator)
prevAllocator = currAllocator = allocator;
urefcount = refcount = 0;
data = origdata = 0;
size = 0; capacity = 0;
size = 0;
flags = 0;
handle = 0;
userdata = 0;
......@@ -67,7 +67,7 @@ UMatData::~UMatData()
prevAllocator = currAllocator = 0;
urefcount = refcount = 0;
data = origdata = 0;
size = 0; capacity = 0;
size = 0;
flags = 0;
handle = 0;
userdata = 0;
......@@ -221,7 +221,7 @@ UMat Mat::getUMat(int accessFlags, UMatUsageFlags usageFlags) const
temp_u = a->allocate(dims, size.p, type(), data, step.p, accessFlags, usageFlags);
temp_u->refcount = 1;
}
UMat::getStdAllocator()->allocate(temp_u, accessFlags, usageFlags);
UMat::getStdAllocator()->allocate(temp_u, accessFlags, usageFlags); // TODO result is not checked
hdr.flags = flags;
setSize(hdr, dims, size.p, step.p);
finalizeHdr(hdr);
......@@ -575,7 +575,7 @@ Mat UMat::getMat(int accessFlags) const
{
if(!u)
return Mat();
u->currAllocator->map(u, accessFlags | ACCESS_READ);
u->currAllocator->map(u, accessFlags | ACCESS_READ); // TODO Support ACCESS_WRITE without unnecessary data transfers
CV_Assert(u->data != 0);
Mat hdr(dims, size.p, type(), u->data + offset, step.p);
hdr.flags = flags;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册