Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
0a07d780
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
0a07d780
编写于
1月 02, 2015
作者:
A
Alexander Alekhin
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
ocl: OpenCL SVM support
上级
58ad952b
变更
15
隐藏空白更改
内联
并排
Showing
15 changed file
with
1535 addition
and
154 deletion
+1535
-154
CMakeLists.txt
CMakeLists.txt
+1
-0
cmake/OpenCVDetectOpenCL.cmake
cmake/OpenCVDetectOpenCL.cmake
+4
-0
cmake/templates/cvconfig.h.in
cmake/templates/cvconfig.h.in
+1
-0
modules/core/include/opencv2/core/mat.hpp
modules/core/include/opencv2/core/mat.hpp
+2
-2
modules/core/include/opencv2/core/ocl.hpp
modules/core/include/opencv2/core/ocl.hpp
+17
-3
modules/core/include/opencv2/core/opencl/opencl_svm.hpp
modules/core/include/opencv2/core/opencl/opencl_svm.hpp
+81
-0
modules/core/include/opencv2/core/opencl/runtime/opencl_core.hpp
.../core/include/opencv2/core/opencl/runtime/opencl_core.hpp
+12
-0
modules/core/include/opencv2/core/opencl/runtime/opencl_svm_20.hpp
...ore/include/opencv2/core/opencl/runtime/opencl_svm_20.hpp
+52
-0
modules/core/include/opencv2/core/opencl/runtime/opencl_svm_definitions.hpp
...de/opencv2/core/opencl/runtime/opencl_svm_definitions.hpp
+42
-0
modules/core/include/opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp
.../opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp
+166
-0
modules/core/src/matmul.cpp
modules/core/src/matmul.cpp
+10
-0
modules/core/src/matrix.cpp
modules/core/src/matrix.cpp
+2
-1
modules/core/src/ocl.cpp
modules/core/src/ocl.cpp
+1074
-144
modules/core/src/opencl/runtime/opencl_core.cpp
modules/core/src/opencl/runtime/opencl_core.cpp
+67
-0
modules/core/src/umatrix.cpp
modules/core/src/umatrix.cpp
+4
-4
未找到文件。
CMakeLists.txt
浏览文件 @
0a07d780
...
...
@@ -162,6 +162,7 @@ OCV_OPTION(WITH_XIMEA "Include XIMEA cameras support" OFF
OCV_OPTION
(
WITH_XINE
"Include Xine support (GPL)"
OFF
IF
(
UNIX AND NOT APPLE AND NOT ANDROID
)
)
OCV_OPTION
(
WITH_CLP
"Include Clp support (EPL)"
OFF
)
OCV_OPTION
(
WITH_OPENCL
"Include OpenCL Runtime support"
ON
IF
(
NOT IOS
)
)
OCV_OPTION
(
WITH_OPENCL_SVM
"Include OpenCL Shared Virtual Memory support"
OFF
)
# experimental
OCV_OPTION
(
WITH_OPENCLAMDFFT
"Include AMD OpenCL FFT library support"
ON
IF
(
NOT ANDROID AND NOT IOS
)
)
OCV_OPTION
(
WITH_OPENCLAMDBLAS
"Include AMD OpenCL BLAS library support"
ON
IF
(
NOT ANDROID AND NOT IOS
)
)
OCV_OPTION
(
WITH_DIRECTX
"Include DirectX support"
ON IF WIN32
)
...
...
cmake/OpenCVDetectOpenCL.cmake
浏览文件 @
0a07d780
...
...
@@ -26,6 +26,10 @@ if(OPENCL_FOUND)
set
(
HAVE_OPENCL 1
)
if
(
WITH_OPENCL_SVM
)
set
(
HAVE_OPENCL_SVM 1
)
endif
()
if
(
HAVE_OPENCL_STATIC
)
set
(
OPENCL_LIBRARIES
"
${
OPENCL_LIBRARY
}
"
)
else
()
...
...
cmake/templates/cvconfig.h.in
浏览文件 @
0a07d780
...
...
@@ -122,6 +122,7 @@
/* OpenCL Support */
#cmakedefine HAVE_OPENCL
#cmakedefine HAVE_OPENCL_STATIC
#cmakedefine HAVE_OPENCL_SVM
/* OpenEXR codec */
#cmakedefine HAVE_OPENEXR
...
...
modules/core/include/opencv2/core/mat.hpp
浏览文件 @
0a07d780
...
...
@@ -415,7 +415,7 @@ public:
const
size_t
dstofs
[],
const
size_t
dststep
[],
bool
sync
)
const
;
// default implementation returns DummyBufferPoolController
virtual
BufferPoolController
*
getBufferPoolController
()
const
;
virtual
BufferPoolController
*
getBufferPoolController
(
const
char
*
id
=
NULL
)
const
;
};
...
...
@@ -481,7 +481,7 @@ struct CV_EXPORTS UMatData
int
refcount
;
uchar
*
data
;
uchar
*
origdata
;
size_t
size
,
capacity
;
size_t
size
;
int
flags
;
void
*
handle
;
...
...
modules/core/include/opencv2/core/ocl.hpp
浏览文件 @
0a07d780
...
...
@@ -56,6 +56,8 @@ CV_EXPORTS_W bool haveAmdFft();
CV_EXPORTS_W
void
setUseOpenCL
(
bool
flag
);
CV_EXPORTS_W
void
finish
();
CV_EXPORTS
bool
haveSVM
();
class
CV_EXPORTS
Context
;
class
CV_EXPORTS
Device
;
class
CV_EXPORTS
Kernel
;
...
...
@@ -248,7 +250,10 @@ public:
void
*
ptr
()
const
;
friend
void
initializeContextFromHandle
(
Context
&
ctx
,
void
*
platform
,
void
*
context
,
void
*
device
);
protected:
bool
useSVM
()
const
;
void
setUseSVM
(
bool
enabled
);
struct
Impl
;
Impl
*
p
;
};
...
...
@@ -666,8 +671,17 @@ protected:
CV_EXPORTS
MatAllocator
*
getOpenCLAllocator
();
CV_EXPORTS_W
bool
isPerformanceCheckBypassed
();
#define OCL_PERFORMANCE_CHECK(condition) (cv::ocl::isPerformanceCheckBypassed() || (condition))
#ifdef __OPENCV_BUILD
namespace
internal
{
CV_EXPORTS
bool
isPerformanceCheckBypassed
();
#define OCL_PERFORMANCE_CHECK(condition) (cv::ocl::internal::isPerformanceCheckBypassed() || (condition))
CV_EXPORTS
bool
isCLBuffer
(
UMat
&
u
);
}
// namespace internal
#endif
//! @}
...
...
modules/core/include/opencv2/core/opencl/opencl_svm.hpp
0 → 100644
浏览文件 @
0a07d780
/* See LICENSE file in the root OpenCV directory */
#ifndef __OPENCV_CORE_OPENCL_SVM_HPP__
#define __OPENCV_CORE_OPENCL_SVM_HPP__
//
// Internal usage only (binary compatibility is not guaranteed)
//
#ifndef __OPENCV_BUILD
#error Internal header file
#endif
#if defined(HAVE_OPENCL) && defined(HAVE_OPENCL_SVM)
#include "runtime/opencl_core.hpp"
#include "runtime/opencl_svm_20.hpp"
#include "runtime/opencl_svm_hsa_extension.hpp"
namespace
cv
{
namespace
ocl
{
namespace
svm
{
struct
SVMCapabilities
{
enum
Value
{
SVM_COARSE_GRAIN_BUFFER
=
(
1
<<
0
),
SVM_FINE_GRAIN_BUFFER
=
(
1
<<
1
),
SVM_FINE_GRAIN_SYSTEM
=
(
1
<<
2
),
SVM_ATOMICS
=
(
1
<<
3
),
};
int
value_
;
SVMCapabilities
(
int
capabilities
=
0
)
:
value_
(
capabilities
)
{
}
operator
int
()
const
{
return
value_
;
}
inline
bool
isNoSVMSupport
()
const
{
return
value_
==
0
;
}
inline
bool
isSupportCoarseGrainBuffer
()
const
{
return
(
value_
&
SVM_COARSE_GRAIN_BUFFER
)
!=
0
;
}
inline
bool
isSupportFineGrainBuffer
()
const
{
return
(
value_
&
SVM_FINE_GRAIN_BUFFER
)
!=
0
;
}
inline
bool
isSupportFineGrainSystem
()
const
{
return
(
value_
&
SVM_FINE_GRAIN_SYSTEM
)
!=
0
;
}
inline
bool
isSupportAtomics
()
const
{
return
(
value_
&
SVM_ATOMICS
)
!=
0
;
}
};
CV_EXPORTS
const
SVMCapabilities
getSVMCapabilitites
(
const
ocl
::
Context
&
context
);
struct
SVMFunctions
{
clSVMAllocAMD_fn
fn_clSVMAlloc
;
clSVMFreeAMD_fn
fn_clSVMFree
;
clSetKernelArgSVMPointerAMD_fn
fn_clSetKernelArgSVMPointer
;
//clSetKernelExecInfoAMD_fn fn_clSetKernelExecInfo;
//clEnqueueSVMFreeAMD_fn fn_clEnqueueSVMFree;
clEnqueueSVMMemcpyAMD_fn
fn_clEnqueueSVMMemcpy
;
clEnqueueSVMMemFillAMD_fn
fn_clEnqueueSVMMemFill
;
clEnqueueSVMMapAMD_fn
fn_clEnqueueSVMMap
;
clEnqueueSVMUnmapAMD_fn
fn_clEnqueueSVMUnmap
;
inline
SVMFunctions
()
:
fn_clSVMAlloc
(
NULL
),
fn_clSVMFree
(
NULL
),
fn_clSetKernelArgSVMPointer
(
NULL
),
/*fn_clSetKernelExecInfo(NULL),*/
/*fn_clEnqueueSVMFree(NULL),*/
fn_clEnqueueSVMMemcpy
(
NULL
),
fn_clEnqueueSVMMemFill
(
NULL
),
fn_clEnqueueSVMMap
(
NULL
),
fn_clEnqueueSVMUnmap
(
NULL
)
{
// nothing
}
inline
bool
isValid
()
const
{
return
fn_clSVMAlloc
!=
NULL
&&
fn_clSVMFree
&&
fn_clSetKernelArgSVMPointer
&&
/*fn_clSetKernelExecInfo && fn_clEnqueueSVMFree &&*/
fn_clEnqueueSVMMemcpy
&&
fn_clEnqueueSVMMemFill
&&
fn_clEnqueueSVMMap
&&
fn_clEnqueueSVMUnmap
;
}
};
// We should guarantee that SVMFunctions lifetime is not less than context's lifetime
CV_EXPORTS
const
SVMFunctions
*
getSVMFunctions
(
const
ocl
::
Context
&
context
);
CV_EXPORTS
bool
useSVM
(
UMatUsageFlags
usageFlags
);
}}}
//namespace cv::ocl::svm
#endif
#endif // __OPENCV_CORE_OPENCL_SVM_HPP__
/* End of file. */
modules/core/include/opencv2/core/opencl/runtime/opencl_core.hpp
浏览文件 @
0a07d780
...
...
@@ -62,6 +62,18 @@
#endif
#endif
#ifdef HAVE_OPENCL_SVM
#define clSVMAlloc clSVMAlloc_
#define clSVMFree clSVMFree_
#define clSetKernelArgSVMPointer clSetKernelArgSVMPointer_
#define clSetKernelExecInfo clSetKernelExecInfo_
#define clEnqueueSVMFree clEnqueueSVMFree_
#define clEnqueueSVMMemcpy clEnqueueSVMMemcpy_
#define clEnqueueSVMMemFill clEnqueueSVMMemFill_
#define clEnqueueSVMMap clEnqueueSVMMap_
#define clEnqueueSVMUnmap clEnqueueSVMUnmap_
#endif
#include "autogenerated/opencl_core.hpp"
#endif // HAVE_OPENCL_STATIC
...
...
modules/core/include/opencv2/core/opencl/runtime/opencl_svm_20.hpp
0 → 100644
浏览文件 @
0a07d780
/* See LICENSE file in the root OpenCV directory */
#ifndef __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP__
#define __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP__
#if defined(HAVE_OPENCL_SVM)
#include "opencl_core.hpp"
#include "opencl_svm_definitions.hpp"
#ifndef HAVE_OPENCL_STATIC
#undef clSVMAlloc
#define clSVMAlloc clSVMAlloc_pfn
#undef clSVMFree
#define clSVMFree clSVMFree_pfn
#undef clSetKernelArgSVMPointer
#define clSetKernelArgSVMPointer clSetKernelArgSVMPointer_pfn
#undef clSetKernelExecInfo
//#define clSetKernelExecInfo clSetKernelExecInfo_pfn
#undef clEnqueueSVMFree
//#define clEnqueueSVMFree clEnqueueSVMFree_pfn
#undef clEnqueueSVMMemcpy
#define clEnqueueSVMMemcpy clEnqueueSVMMemcpy_pfn
#undef clEnqueueSVMMemFill
#define clEnqueueSVMMemFill clEnqueueSVMMemFill_pfn
#undef clEnqueueSVMMap
#define clEnqueueSVMMap clEnqueueSVMMap_pfn
#undef clEnqueueSVMUnmap
#define clEnqueueSVMUnmap clEnqueueSVMUnmap_pfn
extern
CL_RUNTIME_EXPORT
void
*
(
CL_API_CALL
*
clSVMAlloc
)(
cl_context
context
,
cl_svm_mem_flags
flags
,
size_t
size
,
unsigned
int
alignment
);
extern
CL_RUNTIME_EXPORT
void
(
CL_API_CALL
*
clSVMFree
)(
cl_context
context
,
void
*
svm_pointer
);
extern
CL_RUNTIME_EXPORT
cl_int
(
CL_API_CALL
*
clSetKernelArgSVMPointer
)(
cl_kernel
kernel
,
cl_uint
arg_index
,
const
void
*
arg_value
);
//extern CL_RUNTIME_EXPORT void* (CL_API_CALL *clSetKernelExecInfo)(cl_kernel kernel, cl_kernel_exec_info param_name, size_t param_value_size, const void* param_value);
//extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMFree)(cl_command_queue command_queue, cl_uint num_svm_pointers, void* svm_pointers[],
// void (CL_CALLBACK *pfn_free_func)(cl_command_queue queue, cl_uint num_svm_pointers, void* svm_pointers[], void* user_data), void* user_data,
// cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
extern
CL_RUNTIME_EXPORT
cl_int
(
CL_API_CALL
*
clEnqueueSVMMemcpy
)(
cl_command_queue
command_queue
,
cl_bool
blocking_copy
,
void
*
dst_ptr
,
const
void
*
src_ptr
,
size_t
size
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
);
extern
CL_RUNTIME_EXPORT
cl_int
(
CL_API_CALL
*
clEnqueueSVMMemFill
)(
cl_command_queue
command_queue
,
void
*
svm_ptr
,
const
void
*
pattern
,
size_t
pattern_size
,
size_t
size
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
);
extern
CL_RUNTIME_EXPORT
cl_int
(
CL_API_CALL
*
clEnqueueSVMMap
)(
cl_command_queue
command_queue
,
cl_bool
blocking_map
,
cl_map_flags
map_flags
,
void
*
svm_ptr
,
size_t
size
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
);
extern
CL_RUNTIME_EXPORT
cl_int
(
CL_API_CALL
*
clEnqueueSVMUnmap
)(
cl_command_queue
command_queue
,
void
*
svm_ptr
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
);
#endif // HAVE_OPENCL_STATIC
#endif // HAVE_OPENCL_SVM
#endif // __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP__
modules/core/include/opencv2/core/opencl/runtime/opencl_svm_definitions.hpp
0 → 100644
浏览文件 @
0a07d780
/* See LICENSE file in the root OpenCV directory */
#ifndef __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP__
#define __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP__
#if defined(HAVE_OPENCL_SVM)
#if defined(CL_VERSION_2_0)
// OpenCL 2.0 contains SVM definitions
#else
typedef
cl_bitfield
cl_device_svm_capabilities
;
typedef
cl_bitfield
cl_svm_mem_flags
;
typedef
cl_uint
cl_kernel_exec_info
;
//
// TODO Add real values after OpenCL 2.0 release
//
#ifndef CL_DEVICE_SVM_CAPABILITIES
#define CL_DEVICE_SVM_CAPABILITIES 0x1053
#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER (1 << 0)
#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER (1 << 1)
#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM (1 << 2)
#define CL_DEVICE_SVM_ATOMICS (1 << 3)
#endif
#ifndef CL_MEM_SVM_FINE_GRAIN_BUFFER
#define CL_MEM_SVM_FINE_GRAIN_BUFFER (1 << 10)
#endif
#ifndef CL_MEM_SVM_ATOMICS
#define CL_MEM_SVM_ATOMICS (1 << 11)
#endif
#endif // CL_VERSION_2_0
#endif // HAVE_OPENCL_SVM
#endif // __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP__
modules/core/include/opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp
0 → 100644
浏览文件 @
0a07d780
/* See LICENSE file in the root OpenCV directory */
#ifndef __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP__
#define __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP__
#if defined(HAVE_OPENCL_SVM)
#include "opencl_core.hpp"
#ifndef CL_DEVICE_SVM_CAPABILITIES_AMD
//
// Part of the file is an extract from the cl_ext.h file from AMD APP SDK package.
// Below is the original copyright.
//
/*******************************************************************************
* Copyright (c) 2008-2013 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
/*******************************************
* Shared Virtual Memory (SVM) extension
*******************************************/
typedef
cl_bitfield
cl_device_svm_capabilities_amd
;
typedef
cl_bitfield
cl_svm_mem_flags_amd
;
typedef
cl_uint
cl_kernel_exec_info_amd
;
/* cl_device_info */
#define CL_DEVICE_SVM_CAPABILITIES_AMD 0x1053
#define CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT_AMD 0x1054
/* cl_device_svm_capabilities_amd */
#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_AMD (1 << 0)
#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER_AMD (1 << 1)
#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM_AMD (1 << 2)
#define CL_DEVICE_SVM_ATOMICS_AMD (1 << 3)
/* cl_svm_mem_flags_amd */
#define CL_MEM_SVM_FINE_GRAIN_BUFFER_AMD (1 << 10)
#define CL_MEM_SVM_ATOMICS_AMD (1 << 11)
/* cl_mem_info */
#define CL_MEM_USES_SVM_POINTER_AMD 0x1109
/* cl_kernel_exec_info_amd */
#define CL_KERNEL_EXEC_INFO_SVM_PTRS_AMD 0x11B6
#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_AMD 0x11B7
/* cl_command_type */
#define CL_COMMAND_SVM_FREE_AMD 0x1209
#define CL_COMMAND_SVM_MEMCPY_AMD 0x120A
#define CL_COMMAND_SVM_MEMFILL_AMD 0x120B
#define CL_COMMAND_SVM_MAP_AMD 0x120C
#define CL_COMMAND_SVM_UNMAP_AMD 0x120D
typedef
CL_API_ENTRY
void
*
(
CL_API_CALL
*
clSVMAllocAMD_fn
)(
cl_context
/* context */
,
cl_svm_mem_flags_amd
/* flags */
,
size_t
/* size */
,
unsigned
int
/* alignment */
)
CL_EXT_SUFFIX__VERSION_1_2
;
typedef
CL_API_ENTRY
void
(
CL_API_CALL
*
clSVMFreeAMD_fn
)(
cl_context
/* context */
,
void
*
/* svm_pointer */
)
CL_EXT_SUFFIX__VERSION_1_2
;
typedef
CL_API_ENTRY
cl_int
(
CL_API_CALL
*
clEnqueueSVMFreeAMD_fn
)(
cl_command_queue
/* command_queue */
,
cl_uint
/* num_svm_pointers */
,
void
**
/* svm_pointers */
,
void
(
CL_CALLBACK
*
)(
/*pfn_free_func*/
cl_command_queue
/* queue */
,
cl_uint
/* num_svm_pointers */
,
void
**
/* svm_pointers */
,
void
*
/* user_data */
),
void
*
/* user_data */
,
cl_uint
/* num_events_in_wait_list */
,
const
cl_event
*
/* event_wait_list */
,
cl_event
*
/* event */
)
CL_EXT_SUFFIX__VERSION_1_2
;
typedef
CL_API_ENTRY
cl_int
(
CL_API_CALL
*
clEnqueueSVMMemcpyAMD_fn
)(
cl_command_queue
/* command_queue */
,
cl_bool
/* blocking_copy */
,
void
*
/* dst_ptr */
,
const
void
*
/* src_ptr */
,
size_t
/* size */
,
cl_uint
/* num_events_in_wait_list */
,
const
cl_event
*
/* event_wait_list */
,
cl_event
*
/* event */
)
CL_EXT_SUFFIX__VERSION_1_2
;
typedef
CL_API_ENTRY
cl_int
(
CL_API_CALL
*
clEnqueueSVMMemFillAMD_fn
)(
cl_command_queue
/* command_queue */
,
void
*
/* svm_ptr */
,
const
void
*
/* pattern */
,
size_t
/* pattern_size */
,
size_t
/* size */
,
cl_uint
/* num_events_in_wait_list */
,
const
cl_event
*
/* event_wait_list */
,
cl_event
*
/* event */
)
CL_EXT_SUFFIX__VERSION_1_2
;
typedef
CL_API_ENTRY
cl_int
(
CL_API_CALL
*
clEnqueueSVMMapAMD_fn
)(
cl_command_queue
/* command_queue */
,
cl_bool
/* blocking_map */
,
cl_map_flags
/* map_flags */
,
void
*
/* svm_ptr */
,
size_t
/* size */
,
cl_uint
/* num_events_in_wait_list */
,
const
cl_event
*
/* event_wait_list */
,
cl_event
*
/* event */
)
CL_EXT_SUFFIX__VERSION_1_2
;
typedef
CL_API_ENTRY
cl_int
(
CL_API_CALL
*
clEnqueueSVMUnmapAMD_fn
)(
cl_command_queue
/* command_queue */
,
void
*
/* svm_ptr */
,
cl_uint
/* num_events_in_wait_list */
,
const
cl_event
*
/* event_wait_list */
,
cl_event
*
/* event */
)
CL_EXT_SUFFIX__VERSION_1_2
;
typedef
CL_API_ENTRY
cl_int
(
CL_API_CALL
*
clSetKernelArgSVMPointerAMD_fn
)(
cl_kernel
/* kernel */
,
cl_uint
/* arg_index */
,
const
void
*
/* arg_value */
)
CL_EXT_SUFFIX__VERSION_1_2
;
typedef
CL_API_ENTRY
cl_int
(
CL_API_CALL
*
clSetKernelExecInfoAMD_fn
)(
cl_kernel
/* kernel */
,
cl_kernel_exec_info_amd
/* param_name */
,
size_t
/* param_value_size */
,
const
void
*
/* param_value */
)
CL_EXT_SUFFIX__VERSION_1_2
;
#endif
#endif // HAVE_OPENCL_SVM
#endif // __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP__
modules/core/src/matmul.cpp
浏览文件 @
0a07d780
...
...
@@ -721,6 +721,16 @@ static bool ocl_gemm_amdblas( InputArray matA, InputArray matB, double alpha,
return
false
;
UMat
A
=
matA
.
getUMat
(),
B
=
matB
.
getUMat
(),
D
=
matD
.
getUMat
();
if
(
!
ocl
::
internal
::
isCLBuffer
(
A
)
||
!
ocl
::
internal
::
isCLBuffer
(
B
)
||
!
ocl
::
internal
::
isCLBuffer
(
D
))
{
return
false
;
}
if
(
haveC
)
{
UMat
C
=
matC
.
getUMat
();
if
(
!
ocl
::
internal
::
isCLBuffer
(
C
))
return
false
;
}
if
(
haveC
)
ctrans
?
transpose
(
matC
,
D
)
:
matC
.
copyTo
(
D
);
else
...
...
modules/core/src/matrix.cpp
浏览文件 @
0a07d780
...
...
@@ -159,8 +159,9 @@ void MatAllocator::copy(UMatData* usrc, UMatData* udst, int dims, const size_t s
memcpy
(
ptrs
[
1
],
ptrs
[
0
],
planesz
);
}
BufferPoolController
*
MatAllocator
::
getBufferPoolController
()
const
BufferPoolController
*
MatAllocator
::
getBufferPoolController
(
const
char
*
id
)
const
{
(
void
)
id
;
static
DummyBufferPoolController
dummy
;
return
&
dummy
;
}
...
...
modules/core/src/ocl.cpp
浏览文件 @
0a07d780
...
...
@@ -48,6 +48,8 @@
#define CV_OPENCL_ALWAYS_SHOW_BUILD_LOG 0
#define CV_OPENCL_SHOW_RUN_ERRORS 0
#define CV_OPENCL_SHOW_SVM_ERROR_LOG 1
#define CV_OPENCL_SHOW_SVM_LOG 0
#include "opencv2/core/bufferpool.hpp"
#ifndef LOG_BUFFER_POOL
...
...
@@ -111,6 +113,20 @@ static size_t getConfigurationParameterForSize(const char* name, size_t defaultV
CV_ErrorNoReturn
(
cv
::
Error
::
StsBadArg
,
cv
::
format
(
"Invalid value for %s parameter: %s"
,
name
,
value
.
c_str
()));
}
#if CV_OPENCL_SHOW_SVM_LOG
// TODO add timestamp logging
#define CV_OPENCL_SVM_TRACE_P printf("line %d (ocl.cpp): ", __LINE__); printf
#else
#define CV_OPENCL_SVM_TRACE_P(...)
#endif
#if CV_OPENCL_SHOW_SVM_ERROR_LOG
// TODO add timestamp logging
#define CV_OPENCL_SVM_TRACE_ERROR_P printf("Error on line %d (ocl.cpp): ", __LINE__); printf
#else
#define CV_OPENCL_SVM_TRACE_ERROR_P(...)
#endif
#include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp"
#include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
...
...
@@ -920,6 +936,7 @@ OCL_FUNC(cl_int, clGetSupportedImageFormats,
cl_uint
*
num_image_formats
),
(
context
,
flags
,
image_type
,
num_entries
,
image_formats
,
num_image_formats
))
/*
OCL_FUNC(cl_int, clGetMemObjectInfo,
(cl_mem memobj,
...
...
@@ -1342,6 +1359,12 @@ static bool isRaiseError()
#define CV_OclDbgAssert(expr) do { if (isRaiseError()) { CV_Assert(expr); } else { (void)(expr); } } while ((void)0, 0)
#endif
#ifdef HAVE_OPENCL_SVM
#include "opencv2/core/opencl/runtime/opencl_svm_20.hpp"
#include "opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp"
#include "opencv2/core/opencl/opencl_svm.hpp"
#endif
namespace
cv
{
namespace
ocl
{
struct
UMat2D
...
...
@@ -1627,6 +1650,15 @@ bool haveAmdFft()
#endif
bool
haveSVM
()
{
#ifdef HAVE_OPENCL_SVM
return
true
;
#else
return
false
;
#endif
}
void
finish
()
{
Queue
::
getDefault
().
finish
();
...
...
@@ -2357,12 +2389,86 @@ not_found:
}
#endif
#ifdef HAVE_OPENCL_SVM
namespace
svm
{
enum
AllocatorFlags
{
// don't use first 16 bits
OPENCL_SVM_COARSE_GRAIN_BUFFER
=
1
<<
16
,
// clSVMAlloc + SVM map/unmap
OPENCL_SVM_FINE_GRAIN_BUFFER
=
2
<<
16
,
// clSVMAlloc
OPENCL_SVM_FINE_GRAIN_SYSTEM
=
3
<<
16
,
// direct access
OPENCL_SVM_BUFFER_MASK
=
3
<<
16
,
OPENCL_SVM_BUFFER_MAP
=
4
<<
16
};
static
bool
checkForceSVMUmatUsage
()
{
static
bool
initialized
=
false
;
static
bool
force
=
false
;
if
(
!
initialized
)
{
force
=
getBoolParameter
(
"OPENCV_OPENCL_SVM_FORCE_UMAT_USAGE"
,
false
);
initialized
=
true
;
}
return
force
;
}
static
bool
checkDisableSVMUMatUsage
()
{
static
bool
initialized
=
false
;
static
bool
force
=
false
;
if
(
!
initialized
)
{
force
=
getBoolParameter
(
"OPENCV_OPENCL_SVM_DISABLE_UMAT_USAGE"
,
false
);
initialized
=
true
;
}
return
force
;
}
static
bool
checkDisableSVM
()
{
static
bool
initialized
=
false
;
static
bool
force
=
false
;
if
(
!
initialized
)
{
force
=
getBoolParameter
(
"OPENCV_OPENCL_SVM_DISABLE"
,
false
);
initialized
=
true
;
}
return
force
;
}
// see SVMCapabilities
static
unsigned
int
getSVMCapabilitiesMask
()
{
static
bool
initialized
=
false
;
static
unsigned
int
mask
=
0
;
if
(
!
initialized
)
{
const
char
*
envValue
=
getenv
(
"OPENCV_OPENCL_SVM_CAPABILITIES_MASK"
);
if
(
envValue
==
NULL
)
{
return
~
0U
;
// all bits 1
}
mask
=
atoi
(
envValue
);
initialized
=
true
;
}
return
mask
;
}
}
// namespace
#endif
struct
Context
::
Impl
{
Impl
()
static
Context
::
Impl
*
get
(
Context
&
context
)
{
return
context
.
p
;
}
void
__init
()
{
refcount
=
1
;
handle
=
0
;
#ifdef HAVE_OPENCL_SVM
svmInitialized
=
false
;
#endif
}
Impl
()
{
__init
();
}
void
setDefault
()
...
...
@@ -2401,8 +2507,7 @@ struct Context::Impl
Impl
(
int
dtype0
)
{
refcount
=
1
;
handle
=
0
;
__init
();
cl_int
retval
=
0
;
cl_platform_id
pl
=
(
cl_platform_id
)
Platform
::
getDefault
().
ptr
();
...
...
@@ -2419,7 +2524,7 @@ struct Context::Impl
AutoBuffer
<
void
*>
dlistbuf
(
nd0
*
2
+
1
);
cl_device_id
*
dlist
=
(
cl_device_id
*
)(
void
**
)
dlistbuf
;
cl_device_id
*
dlist_new
=
dlist
+
nd0
;
CV_OclDbgAssert
(
clGetDeviceIDs
(
pl
,
dtype
,
nd0
,
dlist
,
&
nd0
)
==
CL_SUCCESS
);
CV_OclDbgAssert
(
clGetDeviceIDs
(
pl
,
dtype
,
nd0
,
dlist
,
&
nd0
)
==
CL_SUCCESS
);
String
name0
;
for
(
i
=
0
;
i
<
nd0
;
i
++
)
...
...
@@ -2496,6 +2601,144 @@ struct Context::Impl
};
typedef
std
::
map
<
HashKey
,
Program
>
phash_t
;
phash_t
phash
;
#ifdef HAVE_OPENCL_SVM
bool
svmInitialized
;
bool
svmAvailable
;
bool
svmEnabled
;
svm
::
SVMCapabilities
svmCapabilities
;
svm
::
SVMFunctions
svmFunctions
;
void
svmInit
()
{
CV_Assert
(
handle
!=
NULL
);
const
Device
&
device
=
devices
[
0
];
cl_device_svm_capabilities
deviceCaps
=
0
;
CV_Assert
(((
void
)
0
,
CL_DEVICE_SVM_CAPABILITIES
==
CL_DEVICE_SVM_CAPABILITIES_AMD
));
// Check assumption
cl_int
status
=
clGetDeviceInfo
((
cl_device_id
)
device
.
ptr
(),
CL_DEVICE_SVM_CAPABILITIES
,
sizeof
(
deviceCaps
),
&
deviceCaps
,
NULL
);
if
(
status
!=
CL_SUCCESS
)
{
CV_OPENCL_SVM_TRACE_ERROR_P
(
"CL_DEVICE_SVM_CAPABILITIES via clGetDeviceInfo failed: %d
\n
"
,
status
);
goto
noSVM
;
}
CV_OPENCL_SVM_TRACE_P
(
"CL_DEVICE_SVM_CAPABILITIES returned: 0x%x
\n
"
,
(
int
)
deviceCaps
);
CV_Assert
(((
void
)
0
,
CL_DEVICE_SVM_COARSE_GRAIN_BUFFER
==
CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_AMD
));
// Check assumption
svmCapabilities
.
value_
=
((
deviceCaps
&
CL_DEVICE_SVM_COARSE_GRAIN_BUFFER
)
?
svm
::
SVMCapabilities
::
SVM_COARSE_GRAIN_BUFFER
:
0
)
|
((
deviceCaps
&
CL_DEVICE_SVM_FINE_GRAIN_BUFFER
)
?
svm
::
SVMCapabilities
::
SVM_FINE_GRAIN_BUFFER
:
0
)
|
((
deviceCaps
&
CL_DEVICE_SVM_FINE_GRAIN_SYSTEM
)
?
svm
::
SVMCapabilities
::
SVM_FINE_GRAIN_SYSTEM
:
0
)
|
((
deviceCaps
&
CL_DEVICE_SVM_ATOMICS
)
?
svm
::
SVMCapabilities
::
SVM_ATOMICS
:
0
);
svmCapabilities
.
value_
&=
svm
::
getSVMCapabilitiesMask
();
if
(
svmCapabilities
.
value_
==
0
)
{
CV_OPENCL_SVM_TRACE_ERROR_P
(
"svmCapabilities is empty
\n
"
);
goto
noSVM
;
}
try
{
// Try OpenCL 2.0
CV_OPENCL_SVM_TRACE_P
(
"Try SVM from OpenCL 2.0 ...
\n
"
);
void
*
ptr
=
clSVMAlloc
(
handle
,
CL_MEM_READ_WRITE
,
100
,
0
);
if
(
!
ptr
)
{
CV_OPENCL_SVM_TRACE_ERROR_P
(
"clSVMAlloc returned NULL...
\n
"
);
CV_ErrorNoReturn
(
Error
::
StsBadArg
,
"clSVMAlloc returned NULL"
);
}
try
{
bool
error
=
false
;
cl_command_queue
q
=
(
cl_command_queue
)
Queue
::
getDefault
().
ptr
();
if
(
CL_SUCCESS
!=
clEnqueueSVMMap
(
q
,
CL_TRUE
,
CL_MAP_WRITE
,
ptr
,
100
,
0
,
NULL
,
NULL
))
{
CV_OPENCL_SVM_TRACE_ERROR_P
(
"clEnqueueSVMMap failed...
\n
"
);
CV_ErrorNoReturn
(
Error
::
StsBadArg
,
"clEnqueueSVMMap FAILED"
);
}
clFinish
(
q
);
try
{
((
int
*
)
ptr
)[
0
]
=
100
;
}
catch
(...)
{
CV_OPENCL_SVM_TRACE_ERROR_P
(
"SVM buffer access test FAILED
\n
"
);
error
=
true
;
}
if
(
CL_SUCCESS
!=
clEnqueueSVMUnmap
(
q
,
ptr
,
0
,
NULL
,
NULL
))
{
CV_OPENCL_SVM_TRACE_ERROR_P
(
"clEnqueueSVMUnmap failed...
\n
"
);
CV_ErrorNoReturn
(
Error
::
StsBadArg
,
"clEnqueueSVMUnmap FAILED"
);
}
clFinish
(
q
);
if
(
error
)
{
CV_ErrorNoReturn
(
Error
::
StsBadArg
,
"OpenCL SVM buffer access test was FAILED"
);
}
}
catch
(...)
{
CV_OPENCL_SVM_TRACE_ERROR_P
(
"OpenCL SVM buffer access test was FAILED
\n
"
);
clSVMFree
(
handle
,
ptr
);
throw
;
}
clSVMFree
(
handle
,
ptr
);
svmFunctions
.
fn_clSVMAlloc
=
clSVMAlloc
;
svmFunctions
.
fn_clSVMFree
=
clSVMFree
;
svmFunctions
.
fn_clSetKernelArgSVMPointer
=
clSetKernelArgSVMPointer
;
//svmFunctions.fn_clSetKernelExecInfo = clSetKernelExecInfo;
//svmFunctions.fn_clEnqueueSVMFree = clEnqueueSVMFree;
svmFunctions
.
fn_clEnqueueSVMMemcpy
=
clEnqueueSVMMemcpy
;
svmFunctions
.
fn_clEnqueueSVMMemFill
=
clEnqueueSVMMemFill
;
svmFunctions
.
fn_clEnqueueSVMMap
=
clEnqueueSVMMap
;
svmFunctions
.
fn_clEnqueueSVMUnmap
=
clEnqueueSVMUnmap
;
}
catch
(...)
{
CV_OPENCL_SVM_TRACE_P
(
"clSVMAlloc failed, trying HSA extension...
\n
"
);
try
{
// Try HSA extension
String
extensions
=
device
.
extensions
();
if
(
extensions
.
find
(
"cl_amd_svm"
)
==
String
::
npos
)
{
CV_OPENCL_SVM_TRACE_P
(
"Device extension doesn't have cl_amd_svm: %s
\n
"
,
extensions
.
c_str
());
goto
noSVM
;
}
cl_platform_id
p
=
NULL
;
status
=
clGetDeviceInfo
((
cl_device_id
)
device
.
ptr
(),
CL_DEVICE_PLATFORM
,
sizeof
(
cl_platform_id
),
&
p
,
NULL
);
CV_Assert
(
status
==
CL_SUCCESS
);
svmFunctions
.
fn_clSVMAlloc
=
(
clSVMAllocAMD_fn
)
clGetExtensionFunctionAddressForPlatform
(
p
,
"clSVMAllocAMD"
);
svmFunctions
.
fn_clSVMFree
=
(
clSVMFreeAMD_fn
)
clGetExtensionFunctionAddressForPlatform
(
p
,
"clSVMFreeAMD"
);
svmFunctions
.
fn_clSetKernelArgSVMPointer
=
(
clSetKernelArgSVMPointerAMD_fn
)
clGetExtensionFunctionAddressForPlatform
(
p
,
"clSetKernelArgSVMPointerAMD"
);
//svmFunctions.fn_clSetKernelExecInfo = (clSetKernelExecInfoAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSetKernelExecInfoAMD");
//svmFunctions.fn_clEnqueueSVMFree = (clEnqueueSVMFreeAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMFreeAMD");
svmFunctions
.
fn_clEnqueueSVMMemcpy
=
(
clEnqueueSVMMemcpyAMD_fn
)
clGetExtensionFunctionAddressForPlatform
(
p
,
"clEnqueueSVMMemcpyAMD"
);
svmFunctions
.
fn_clEnqueueSVMMemFill
=
(
clEnqueueSVMMemFillAMD_fn
)
clGetExtensionFunctionAddressForPlatform
(
p
,
"clEnqueueSVMMemFillAMD"
);
svmFunctions
.
fn_clEnqueueSVMMap
=
(
clEnqueueSVMMapAMD_fn
)
clGetExtensionFunctionAddressForPlatform
(
p
,
"clEnqueueSVMMapAMD"
);
svmFunctions
.
fn_clEnqueueSVMUnmap
=
(
clEnqueueSVMUnmapAMD_fn
)
clGetExtensionFunctionAddressForPlatform
(
p
,
"clEnqueueSVMUnmapAMD"
);
CV_Assert
(
svmFunctions
.
isValid
());
}
catch
(...)
{
CV_OPENCL_SVM_TRACE_P
(
"Something is totally wrong
\n
"
);
goto
noSVM
;
}
}
svmAvailable
=
true
;
svmEnabled
=
!
svm
::
checkDisableSVM
();
svmInitialized
=
true
;
CV_OPENCL_SVM_TRACE_P
(
"OpenCV OpenCL SVM support initialized
\n
"
);
return
;
noSVM:
CV_OPENCL_SVM_TRACE_P
(
"OpenCL SVM is not detected
\n
"
);
svmAvailable
=
false
;
svmEnabled
=
false
;
svmCapabilities
.
value_
=
0
;
svmInitialized
=
true
;
svmFunctions
.
fn_clSVMAlloc
=
NULL
;
return
;
}
#endif
};
...
...
@@ -2610,6 +2853,71 @@ Program Context::getProg(const ProgramSource& prog,
return
p
?
p
->
getProg
(
prog
,
buildopts
,
errmsg
)
:
Program
();
}
#ifdef HAVE_OPENCL_SVM
bool
Context
::
useSVM
()
const
{
Context
::
Impl
*
i
=
p
;
CV_Assert
(
i
);
if
(
!
i
->
svmInitialized
)
i
->
svmInit
();
return
i
->
svmEnabled
;
}
void
Context
::
setUseSVM
(
bool
enabled
)
{
Context
::
Impl
*
i
=
p
;
CV_Assert
(
i
);
if
(
!
i
->
svmInitialized
)
i
->
svmInit
();
if
(
enabled
&&
!
i
->
svmAvailable
)
{
CV_ErrorNoReturn
(
Error
::
StsError
,
"OpenCL Shared Virtual Memory (SVM) is not supported by OpenCL device"
);
}
i
->
svmEnabled
=
enabled
;
}
#else
bool
Context
::
useSVM
()
const
{
return
false
;
}
void
Context
::
setUseSVM
(
bool
enabled
)
{
CV_Assert
(
!
enabled
);
}
#endif
#ifdef HAVE_OPENCL_SVM
namespace
svm
{
const
SVMCapabilities
getSVMCapabilitites
(
const
ocl
::
Context
&
context
)
{
Context
::
Impl
*
i
=
context
.
p
;
CV_Assert
(
i
);
if
(
!
i
->
svmInitialized
)
i
->
svmInit
();
return
i
->
svmCapabilities
;
}
CV_EXPORTS
const
SVMFunctions
*
getSVMFunctions
(
const
ocl
::
Context
&
context
)
{
Context
::
Impl
*
i
=
context
.
p
;
CV_Assert
(
i
);
CV_Assert
(
i
->
svmInitialized
);
// getSVMCapabilitites() must be called first
CV_Assert
(
i
->
svmFunctions
.
fn_clSVMAlloc
!=
NULL
);
return
&
i
->
svmFunctions
;
}
CV_EXPORTS
bool
useSVM
(
UMatUsageFlags
usageFlags
)
{
if
(
checkForceSVMUmatUsage
())
return
true
;
if
(
checkDisableSVMUMatUsage
())
return
false
;
if
((
usageFlags
&
USAGE_ALLOCATE_SHARED_MEMORY
)
!=
0
)
return
true
;
return
false
;
// don't use SVM by default
}
}
// namespace cv::ocl::svm
#endif // HAVE_OPENCL_SVM
void
initializeContextFromHandle
(
Context
&
ctx
,
void
*
platform
,
void
*
_context
,
void
*
_device
)
{
cl_context
context
=
(
cl_context
)
_context
;
...
...
@@ -2979,12 +3287,33 @@ int Kernel::set(int i, const KernelArg& arg)
return
-
1
;
}
#ifdef HAVE_OPENCL_SVM
if
((
arg
.
m
->
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
!=
0
)
{
const
Context
&
ctx
=
Context
::
getDefault
();
const
svm
::
SVMFunctions
*
svmFns
=
svm
::
getSVMFunctions
(
ctx
);
uchar
*&
svmDataPtr
=
(
uchar
*&
)
arg
.
m
->
u
->
handle
;
CV_OPENCL_SVM_TRACE_P
(
"clSetKernelArgSVMPointer: %p
\n
"
,
svmDataPtr
);
#if 1 // TODO
cl_int
status
=
svmFns
->
fn_clSetKernelArgSVMPointer
(
p
->
handle
,
(
cl_uint
)
i
,
svmDataPtr
);
#else
cl_int
status
=
svmFns
->
fn_clSetKernelArgSVMPointer
(
p
->
handle
,
(
cl_uint
)
i
,
&
svmDataPtr
);
#endif
CV_Assert
(
status
==
CL_SUCCESS
);
}
else
#endif
{
CV_OclDbgAssert
(
clSetKernelArg
(
p
->
handle
,
(
cl_uint
)
i
,
sizeof
(
h
),
&
h
)
==
CL_SUCCESS
);
}
if
(
ptronly
)
CV_OclDbgAssert
(
clSetKernelArg
(
p
->
handle
,
(
cl_uint
)
i
++
,
sizeof
(
h
),
&
h
)
==
CL_SUCCESS
);
{
i
++
;
}
else
if
(
arg
.
m
->
dims
<=
2
)
{
UMat2D
u2d
(
*
arg
.
m
);
CV_OclDbgAssert
(
clSetKernelArg
(
p
->
handle
,
(
cl_uint
)
i
,
sizeof
(
h
),
&
h
)
==
CL_SUCCESS
);
CV_OclDbgAssert
(
clSetKernelArg
(
p
->
handle
,
(
cl_uint
)(
i
+
1
),
sizeof
(
u2d
.
step
),
&
u2d
.
step
)
==
CL_SUCCESS
);
CV_OclDbgAssert
(
clSetKernelArg
(
p
->
handle
,
(
cl_uint
)(
i
+
2
),
sizeof
(
u2d
.
offset
),
&
u2d
.
offset
)
==
CL_SUCCESS
);
i
+=
3
;
...
...
@@ -3000,7 +3329,6 @@ int Kernel::set(int i, const KernelArg& arg)
else
{
UMat3D
u3d
(
*
arg
.
m
);
CV_OclDbgAssert
(
clSetKernelArg
(
p
->
handle
,
(
cl_uint
)
i
,
sizeof
(
h
),
&
h
)
==
CL_SUCCESS
);
CV_OclDbgAssert
(
clSetKernelArg
(
p
->
handle
,
(
cl_uint
)(
i
+
1
),
sizeof
(
u3d
.
slicestep
),
&
u3d
.
slicestep
)
==
CL_SUCCESS
);
CV_OclDbgAssert
(
clSetKernelArg
(
p
->
handle
,
(
cl_uint
)(
i
+
2
),
sizeof
(
u3d
.
step
),
&
u3d
.
step
)
==
CL_SUCCESS
);
CV_OclDbgAssert
(
clSetKernelArg
(
p
->
handle
,
(
cl_uint
)(
i
+
3
),
sizeof
(
u3d
.
offset
),
&
u3d
.
offset
)
==
CL_SUCCESS
);
...
...
@@ -3433,39 +3761,55 @@ ProgramSource::hash_t ProgramSource::hash() const
//////////////////////////////////////////// OpenCLAllocator //////////////////////////////////////////////////
template
<
typename
T
>
class
OpenCLBufferPool
{
protected:
~
OpenCLBufferPool
()
{
}
public:
virtual
cl_mem
allocate
(
size_t
size
,
CV_OUT
size_t
&
capacity
)
=
0
;
virtual
void
release
(
cl_mem
handle
,
size_t
capacity
)
=
0
;
virtual
T
allocate
(
size_t
size
)
=
0
;
virtual
void
release
(
T
buffer
)
=
0
;
};
class
OpenCLBufferPoolImpl
:
public
BufferPoolController
,
public
OpenCLBufferPool
template
<
typename
Derived
,
typename
BufferEntry
,
typename
T
>
class
OpenCLBufferPoolBaseImpl
:
public
BufferPoolController
,
public
OpenCLBufferPool
<
T
>
{
public:
struct
BufferEntry
{
cl_mem
clBuffer_
;
size_t
capacity_
;
};
private:
inline
Derived
&
derived
()
{
return
*
static_cast
<
Derived
*>
(
this
);
}
protected:
Mutex
mutex_
;
size_t
currentReservedSize
;
size_t
maxReservedSize
;
std
::
list
<
BufferEntry
>
reservedEntries_
;
// LRU order
std
::
list
<
BufferEntry
>
allocatedEntries_
;
// Allocated and used entries
std
::
list
<
BufferEntry
>
reservedEntries_
;
// LRU order. Allocated, but not used entries
// synchronized
bool
_findAndRemoveEntryFromAllocatedList
(
CV_OUT
BufferEntry
&
entry
,
T
buffer
)
{
typename
std
::
list
<
BufferEntry
>::
iterator
i
=
allocatedEntries_
.
begin
();
for
(;
i
!=
allocatedEntries_
.
end
();
++
i
)
{
BufferEntry
&
e
=
*
i
;
if
(
e
.
clBuffer_
==
buffer
)
{
entry
=
e
;
allocatedEntries_
.
erase
(
i
);
return
true
;
}
}
return
false
;
}
// synchronized
bool
_findAndRemoveEntryFromReservedList
(
CV_OUT
BufferEntry
&
entry
,
const
size_t
size
)
{
if
(
reservedEntries_
.
empty
())
return
false
;
std
::
list
<
BufferEntry
>::
iterator
i
=
reservedEntries_
.
begin
();
std
::
list
<
BufferEntry
>::
iterator
result_pos
=
reservedEntries_
.
end
();
BufferEntry
result
=
{
NULL
,
0
}
;
typename
std
::
list
<
BufferEntry
>::
iterator
i
=
reservedEntries_
.
begin
();
typename
std
::
list
<
BufferEntry
>::
iterator
result_pos
=
reservedEntries_
.
end
();
BufferEntry
result
;
size_t
minDiff
=
(
size_t
)(
-
1
);
for
(;
i
!=
reservedEntries_
.
end
();
++
i
)
{
...
...
@@ -3489,6 +3833,7 @@ protected:
reservedEntries_
.
erase
(
result_pos
);
entry
=
result
;
currentReservedSize
-=
entry
.
capacity_
;
allocatedEntries_
.
push_back
(
entry
);
return
true
;
}
return
false
;
...
...
@@ -3503,7 +3848,7 @@ protected:
const
BufferEntry
&
entry
=
reservedEntries_
.
back
();
CV_DbgAssert
(
currentReservedSize
>=
entry
.
capacity_
);
currentReservedSize
-=
entry
.
capacity_
;
_releaseBufferEntry
(
entry
);
derived
().
_releaseBufferEntry
(
entry
);
reservedEntries_
.
pop_back
();
}
}
...
...
@@ -3523,72 +3868,45 @@ protected:
return
1024
*
1024
;
}
void
_allocateBufferEntry
(
BufferEntry
&
entry
,
size_t
size
)
{
CV_DbgAssert
(
entry
.
clBuffer_
==
NULL
);
entry
.
capacity_
=
alignSize
(
size
,
(
int
)
_allocationGranularity
(
size
));
Context
&
ctx
=
Context
::
getDefault
();
cl_int
retval
=
CL_SUCCESS
;
entry
.
clBuffer_
=
clCreateBuffer
((
cl_context
)
ctx
.
ptr
(),
CL_MEM_READ_WRITE
,
entry
.
capacity_
,
0
,
&
retval
);
CV_Assert
(
retval
==
CL_SUCCESS
);
CV_Assert
(
entry
.
clBuffer_
!=
NULL
);
if
(
retval
==
CL_SUCCESS
)
{
CV_IMPL_ADD
(
CV_IMPL_OCL
);
}
LOG_BUFFER_POOL
(
"OpenCL allocate %lld (0x%llx) bytes: %p
\n
"
,
(
long
long
)
entry
.
capacity_
,
(
long
long
)
entry
.
capacity_
,
entry
.
clBuffer_
);
}
void
_releaseBufferEntry
(
const
BufferEntry
&
entry
)
{
CV_Assert
(
entry
.
capacity_
!=
0
);
CV_Assert
(
entry
.
clBuffer_
!=
NULL
);
LOG_BUFFER_POOL
(
"OpenCL release buffer: %p, %lld (0x%llx) bytes
\n
"
,
entry
.
clBuffer_
,
(
long
long
)
entry
.
capacity_
,
(
long
long
)
entry
.
capacity_
);
clReleaseMemObject
(
entry
.
clBuffer_
);
}
public:
OpenCLBufferPoolImpl
()
:
currentReservedSize
(
0
),
maxReservedSize
(
0
)
OpenCLBufferPoolBaseImpl
()
:
currentReservedSize
(
0
),
maxReservedSize
(
0
)
{
int
poolSize
=
ocl
::
Device
::
getDefault
().
isIntel
()
?
1
<<
27
:
0
;
maxReservedSize
=
getConfigurationParameterForSize
(
"OPENCV_OPENCL_BUFFERPOOL_LIMIT"
,
poolSize
);
// nothing
}
virtual
~
OpenCLBufferPoolImpl
()
virtual
~
OpenCLBufferPool
Base
Impl
()
{
freeAllReservedBuffers
();
CV_Assert
(
reservedEntries_
.
empty
());
}
public:
virtual
cl_mem
allocate
(
size_t
size
,
CV_OUT
size_t
&
capacity
)
virtual
T
allocate
(
size_t
size
)
{
BufferEntry
entry
=
{
NULL
,
0
};
if
(
maxReservedSize
>
0
)
AutoLock
locker
(
mutex_
);
BufferEntry
entry
;
if
(
maxReservedSize
>
0
&&
_findAndRemoveEntryFromReservedList
(
entry
,
size
))
{
AutoLock
locker
(
mutex_
);
if
(
_findAndRemoveEntryFromReservedList
(
entry
,
size
))
{
CV_DbgAssert
(
size
<=
entry
.
capacity_
);
LOG_BUFFER_POOL
(
"Reuse reserved buffer: %p
\n
"
,
entry
.
clBuffer_
);
capacity
=
entry
.
capacity_
;
return
entry
.
clBuffer_
;
}
CV_DbgAssert
(
size
<=
entry
.
capacity_
);
LOG_BUFFER_POOL
(
"Reuse reserved buffer: %p
\n
"
,
entry
.
clBuffer_
);
}
else
{
derived
().
_allocateBufferEntry
(
entry
,
size
);
}
_allocateBufferEntry
(
entry
,
size
);
capacity
=
entry
.
capacity_
;
return
entry
.
clBuffer_
;
}
virtual
void
release
(
cl_mem
handle
,
size_t
capacity
)
virtual
void
release
(
T
buffer
)
{
BufferEntry
entry
=
{
handle
,
capacity
};
AutoLock
locker
(
mutex_
);
BufferEntry
entry
;
CV_Assert
(
_findAndRemoveEntryFromAllocatedList
(
entry
,
buffer
));
if
(
maxReservedSize
==
0
||
entry
.
capacity_
>
maxReservedSize
/
8
)
{
_releaseBufferEntry
(
entry
);
derived
().
_releaseBufferEntry
(
entry
);
}
else
{
AutoLock
locker
(
mutex_
);
reservedEntries_
.
push_front
(
entry
);
currentReservedSize
+=
entry
.
capacity_
;
_checkSizeOfReservedEntries
();
...
...
@@ -3604,7 +3922,7 @@ public:
maxReservedSize
=
size
;
if
(
maxReservedSize
<
oldMaxReservedSize
)
{
std
::
list
<
BufferEntry
>::
iterator
i
=
reservedEntries_
.
begin
();
typename
std
::
list
<
BufferEntry
>::
iterator
i
=
reservedEntries_
.
begin
();
for
(;
i
!=
reservedEntries_
.
end
();)
{
const
BufferEntry
&
entry
=
*
i
;
...
...
@@ -3612,7 +3930,7 @@ public:
{
CV_DbgAssert
(
currentReservedSize
>=
entry
.
capacity_
);
currentReservedSize
-=
entry
.
capacity_
;
_releaseBufferEntry
(
entry
);
derived
().
_releaseBufferEntry
(
entry
);
i
=
reservedEntries_
.
erase
(
i
);
continue
;
}
...
...
@@ -3624,16 +3942,123 @@ public:
virtual
void
freeAllReservedBuffers
()
{
AutoLock
locker
(
mutex_
);
std
::
list
<
BufferEntry
>::
const_iterator
i
=
reservedEntries_
.
begin
();
typename
std
::
list
<
BufferEntry
>::
const_iterator
i
=
reservedEntries_
.
begin
();
for
(;
i
!=
reservedEntries_
.
end
();
++
i
)
{
const
BufferEntry
&
entry
=
*
i
;
_releaseBufferEntry
(
entry
);
derived
().
_releaseBufferEntry
(
entry
);
}
reservedEntries_
.
clear
();
}
};
struct
CLBufferEntry
{
cl_mem
clBuffer_
;
size_t
capacity_
;
CLBufferEntry
()
:
clBuffer_
((
cl_mem
)
NULL
),
capacity_
(
0
)
{
}
};
class
OpenCLBufferPoolImpl
:
public
OpenCLBufferPoolBaseImpl
<
OpenCLBufferPoolImpl
,
CLBufferEntry
,
cl_mem
>
{
public:
typedef
struct
CLBufferEntry
BufferEntry
;
protected:
int
createFlags_
;
public:
OpenCLBufferPoolImpl
(
int
createFlags
=
0
)
:
createFlags_
(
createFlags
)
{
}
void
_allocateBufferEntry
(
BufferEntry
&
entry
,
size_t
size
)
{
CV_DbgAssert
(
entry
.
clBuffer_
==
NULL
);
entry
.
capacity_
=
alignSize
(
size
,
(
int
)
_allocationGranularity
(
size
));
Context
&
ctx
=
Context
::
getDefault
();
cl_int
retval
=
CL_SUCCESS
;
entry
.
clBuffer_
=
clCreateBuffer
((
cl_context
)
ctx
.
ptr
(),
CL_MEM_READ_WRITE
|
createFlags_
,
entry
.
capacity_
,
0
,
&
retval
);
CV_Assert
(
retval
==
CL_SUCCESS
);
CV_Assert
(
entry
.
clBuffer_
!=
NULL
);
if
(
retval
==
CL_SUCCESS
)
{
CV_IMPL_ADD
(
CV_IMPL_OCL
);
}
LOG_BUFFER_POOL
(
"OpenCL allocate %lld (0x%llx) bytes: %p
\n
"
,
(
long
long
)
entry
.
capacity_
,
(
long
long
)
entry
.
capacity_
,
entry
.
clBuffer_
);
allocatedEntries_
.
push_back
(
entry
);
}
void
_releaseBufferEntry
(
const
BufferEntry
&
entry
)
{
CV_Assert
(
entry
.
capacity_
!=
0
);
CV_Assert
(
entry
.
clBuffer_
!=
NULL
);
LOG_BUFFER_POOL
(
"OpenCL release buffer: %p, %lld (0x%llx) bytes
\n
"
,
entry
.
clBuffer_
,
(
long
long
)
entry
.
capacity_
,
(
long
long
)
entry
.
capacity_
);
clReleaseMemObject
(
entry
.
clBuffer_
);
}
};
#ifdef HAVE_OPENCL_SVM
struct
CLSVMBufferEntry
{
void
*
clBuffer_
;
size_t
capacity_
;
CLSVMBufferEntry
()
:
clBuffer_
(
NULL
),
capacity_
(
0
)
{
}
};
class
OpenCLSVMBufferPoolImpl
:
public
OpenCLBufferPoolBaseImpl
<
OpenCLSVMBufferPoolImpl
,
CLSVMBufferEntry
,
void
*>
{
public:
typedef
struct
CLSVMBufferEntry
BufferEntry
;
public:
OpenCLSVMBufferPoolImpl
()
{
}
void
_allocateBufferEntry
(
BufferEntry
&
entry
,
size_t
size
)
{
CV_DbgAssert
(
entry
.
clBuffer_
==
NULL
);
entry
.
capacity_
=
alignSize
(
size
,
(
int
)
_allocationGranularity
(
size
));
Context
&
ctx
=
Context
::
getDefault
();
const
svm
::
SVMCapabilities
svmCaps
=
svm
::
getSVMCapabilitites
(
ctx
);
bool
isFineGrainBuffer
=
svmCaps
.
isSupportFineGrainBuffer
();
cl_svm_mem_flags
memFlags
=
CL_MEM_READ_WRITE
|
(
isFineGrainBuffer
?
CL_MEM_SVM_FINE_GRAIN_BUFFER
:
0
);
const
svm
::
SVMFunctions
*
svmFns
=
svm
::
getSVMFunctions
(
ctx
);
CV_DbgAssert
(
svmFns
->
isValid
());
CV_OPENCL_SVM_TRACE_P
(
"clSVMAlloc: %d
\n
"
,
(
int
)
entry
.
capacity_
);
void
*
buf
=
svmFns
->
fn_clSVMAlloc
((
cl_context
)
ctx
.
ptr
(),
memFlags
,
entry
.
capacity_
,
0
);
CV_Assert
(
buf
);
entry
.
clBuffer_
=
buf
;
{
CV_IMPL_ADD
(
CV_IMPL_OCL
);
}
LOG_BUFFER_POOL
(
"OpenCL SVM allocate %lld (0x%llx) bytes: %p
\n
"
,
(
long
long
)
entry
.
capacity_
,
(
long
long
)
entry
.
capacity_
,
entry
.
clBuffer_
);
allocatedEntries_
.
push_back
(
entry
);
}
void
_releaseBufferEntry
(
const
BufferEntry
&
entry
)
{
CV_Assert
(
entry
.
capacity_
!=
0
);
CV_Assert
(
entry
.
clBuffer_
!=
NULL
);
LOG_BUFFER_POOL
(
"OpenCL release SVM buffer: %p, %lld (0x%llx) bytes
\n
"
,
entry
.
clBuffer_
,
(
long
long
)
entry
.
capacity_
,
(
long
long
)
entry
.
capacity_
);
Context
&
ctx
=
Context
::
getDefault
();
const
svm
::
SVMFunctions
*
svmFns
=
svm
::
getSVMFunctions
(
ctx
);
CV_DbgAssert
(
svmFns
->
isValid
());
CV_OPENCL_SVM_TRACE_P
(
"clSVMFree: %p
\n
"
,
entry
.
clBuffer_
);
svmFns
->
fn_clSVMFree
((
cl_context
)
ctx
.
ptr
(),
entry
.
clBuffer_
);
}
};
#endif
#if defined _MSC_VER
#pragma warning(disable:4127) // conditional expression is constant
#endif
...
...
@@ -3697,12 +4122,37 @@ private:
class
OpenCLAllocator
:
public
MatAllocator
{
mutable
OpenCLBufferPoolImpl
bufferPool
;
mutable
OpenCLBufferPoolImpl
bufferPoolHostPtr
;
#ifdef HAVE_OPENCL_SVM
mutable
OpenCLSVMBufferPoolImpl
bufferPoolSVM
;
#endif
enum
AllocatorFlags
{
ALLOCATOR_FLAGS_BUFFER_POOL_USED
=
1
<<
0
ALLOCATOR_FLAGS_BUFFER_POOL_USED
=
1
<<
0
,
ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED
=
1
<<
1
#ifdef HAVE_OPENCL_SVM
,
ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED
=
1
<<
2
#endif
};
public:
OpenCLAllocator
()
{
matStdAllocator
=
Mat
::
getStdAllocator
();
}
OpenCLAllocator
()
:
bufferPool
(
0
),
bufferPoolHostPtr
(
CL_MEM_ALLOC_HOST_PTR
)
{
size_t
defaultPoolSize
,
poolSize
;
defaultPoolSize
=
ocl
::
Device
::
getDefault
().
isIntel
()
?
1
<<
27
:
0
;
poolSize
=
getConfigurationParameterForSize
(
"OPENCV_OPENCL_BUFFERPOOL_LIMIT"
,
defaultPoolSize
);
bufferPool
.
setMaxReservedSize
(
poolSize
);
poolSize
=
getConfigurationParameterForSize
(
"OPENCV_OPENCL_HOST_PTR_BUFFERPOOL_LIMIT"
,
defaultPoolSize
);
bufferPoolHostPtr
.
setMaxReservedSize
(
poolSize
);
#ifdef HAVE_OPENCL_SVM
poolSize
=
getConfigurationParameterForSize
(
"OPENCV_OPENCL_SVM_BUFFERPOOL_LIMIT"
,
defaultPoolSize
);
bufferPoolSVM
.
setMaxReservedSize
(
poolSize
);
#endif
matStdAllocator
=
Mat
::
getStdAllocator
();
}
UMatData
*
defaultAllocate
(
int
dims
,
const
int
*
sizes
,
int
type
,
void
*
data
,
size_t
*
step
,
int
flags
,
UMatUsageFlags
usageFlags
)
const
...
...
@@ -3739,33 +4189,47 @@ public:
}
Context
&
ctx
=
Context
::
getDefault
();
int
createFlags
=
0
,
flags0
=
0
;
getBestFlags
(
ctx
,
flags
,
usageFlags
,
createFlags
,
flags0
);
size_t
capacity
=
0
;
void
*
handle
=
NULL
;
int
allocatorFlags
=
0
;
#ifdef HAVE_OPENCL_SVM
const
svm
::
SVMCapabilities
svmCaps
=
svm
::
getSVMCapabilitites
(
ctx
);
if
(
ctx
.
useSVM
()
&&
svm
::
useSVM
(
usageFlags
)
&&
!
svmCaps
.
isNoSVMSupport
())
{
allocatorFlags
=
ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED
;
handle
=
bufferPoolSVM
.
allocate
(
total
);
// this property is constant, so single buffer pool can be used here
bool
isFineGrainBuffer
=
svmCaps
.
isSupportFineGrainBuffer
();
allocatorFlags
|=
isFineGrainBuffer
?
svm
::
OPENCL_SVM_FINE_GRAIN_BUFFER
:
svm
::
OPENCL_SVM_COARSE_GRAIN_BUFFER
;
}
else
#endif
if
(
createFlags
==
0
)
{
handle
=
bufferPool
.
allocate
(
total
,
capacity
);
if
(
!
handle
)
return
defaultAllocate
(
dims
,
sizes
,
type
,
data
,
step
,
flags
,
usageFlags
);
allocatorFlags
=
ALLOCATOR_FLAGS_BUFFER_POOL_USED
;
handle
=
bufferPool
.
allocate
(
total
);
}
else
if
(
createFlags
==
CL_MEM_ALLOC_HOST_PTR
)
{
allocatorFlags
=
ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED
;
handle
=
bufferPoolHostPtr
.
allocate
(
total
);
}
else
{
capacity
=
total
;
cl_int
retval
=
0
;
handle
=
clCreateBuffer
((
cl_context
)
ctx
.
ptr
(),
CL_MEM_READ_WRITE
|
createFlags
,
total
,
0
,
&
retval
);
if
(
!
handle
||
retval
!=
CL_SUCCESS
)
return
defaultAllocate
(
dims
,
sizes
,
type
,
data
,
step
,
flags
,
usageFlags
);
CV_IMPL_ADD
(
CV_IMPL_OCL
)
CV_Assert
(
handle
!=
NULL
);
// Unsupported, throw
}
if
(
!
handle
)
return
defaultAllocate
(
dims
,
sizes
,
type
,
data
,
step
,
flags
,
usageFlags
);
UMatData
*
u
=
new
UMatData
(
this
);
u
->
data
=
0
;
u
->
size
=
total
;
u
->
capacity
=
capacity
;
u
->
handle
=
handle
;
u
->
flags
=
flags0
;
u
->
allocatorFlags_
=
allocatorFlags
;
...
...
@@ -3788,22 +4252,81 @@ public:
getBestFlags
(
ctx
,
accessFlags
,
usageFlags
,
createFlags
,
flags0
);
cl_context
ctx_handle
=
(
cl_context
)
ctx
.
ptr
();
cl_int
retval
=
0
;
int
tempUMatFlags
=
UMatData
::
TEMP_UMAT
;
u
->
handle
=
clCreateBuffer
(
ctx_handle
,
CL_MEM_USE_HOST_PTR
|
CL_MEM_READ_WRITE
,
u
->
size
,
u
->
origdata
,
&
retval
);
if
((
!
u
->
handle
||
retval
!=
CL_SUCCESS
)
&&
!
(
accessFlags
&
ACCESS_FAST
))
int
allocatorFlags
=
0
;
int
tempUMatFlags
=
0
;
void
*
handle
=
NULL
;
cl_int
retval
=
CL_SUCCESS
;
#ifdef HAVE_OPENCL_SVM
svm
::
SVMCapabilities
svmCaps
=
svm
::
getSVMCapabilitites
(
ctx
);
bool
useSVM
=
ctx
.
useSVM
()
&&
svm
::
useSVM
(
usageFlags
);
if
(
useSVM
&&
svmCaps
.
isSupportFineGrainSystem
())
{
u
->
handle
=
clCreateBuffer
(
ctx_handle
,
CL_MEM_COPY_HOST_PTR
|
CL_MEM_READ_WRITE
|
createFlags
,
u
->
size
,
u
->
origdata
,
&
retval
);
tempUMatFlags
=
UMatData
::
TEMP_COPIED_UMAT
;
allocatorFlags
=
svm
::
OPENCL_SVM_FINE_GRAIN_SYSTEM
;
tempUMatFlags
=
UMatData
::
TEMP_UMAT
;
handle
=
u
->
origdata
;
CV_OPENCL_SVM_TRACE_P
(
"Use fine grain system: %d (%p)
\n
"
,
(
int
)
u
->
size
,
handle
);
}
else
if
(
useSVM
&&
(
svmCaps
.
isSupportFineGrainBuffer
()
||
svmCaps
.
isSupportCoarseGrainBuffer
()))
{
if
(
!
(
accessFlags
&
ACCESS_FAST
))
// memcpy used
{
bool
isFineGrainBuffer
=
svmCaps
.
isSupportFineGrainBuffer
();
cl_svm_mem_flags
memFlags
=
createFlags
|
(
isFineGrainBuffer
?
CL_MEM_SVM_FINE_GRAIN_BUFFER
:
0
);
const
svm
::
SVMFunctions
*
svmFns
=
svm
::
getSVMFunctions
(
ctx
);
CV_DbgAssert
(
svmFns
->
isValid
());
CV_OPENCL_SVM_TRACE_P
(
"clSVMAlloc + copy: %d
\n
"
,
(
int
)
u
->
size
);
handle
=
svmFns
->
fn_clSVMAlloc
((
cl_context
)
ctx
.
ptr
(),
memFlags
,
u
->
size
,
0
);
CV_Assert
(
handle
);
cl_command_queue
q
=
NULL
;
if
(
!
isFineGrainBuffer
)
{
q
=
(
cl_command_queue
)
Queue
::
getDefault
().
ptr
();
CV_OPENCL_SVM_TRACE_P
(
"clEnqueueSVMMap: %p (%d)
\n
"
,
handle
,
(
int
)
u
->
size
);
cl_int
status
=
svmFns
->
fn_clEnqueueSVMMap
(
q
,
CL_TRUE
,
CL_MAP_WRITE
,
handle
,
u
->
size
,
0
,
NULL
,
NULL
);
CV_Assert
(
status
==
CL_SUCCESS
);
}
memcpy
(
handle
,
u
->
origdata
,
u
->
size
);
if
(
!
isFineGrainBuffer
)
{
CV_OPENCL_SVM_TRACE_P
(
"clEnqueueSVMUnmap: %p
\n
"
,
handle
);
cl_int
status
=
svmFns
->
fn_clEnqueueSVMUnmap
(
q
,
handle
,
0
,
NULL
,
NULL
);
CV_Assert
(
status
==
CL_SUCCESS
);
}
tempUMatFlags
=
UMatData
::
TEMP_UMAT
|
UMatData
::
TEMP_COPIED_UMAT
;
allocatorFlags
|=
isFineGrainBuffer
?
svm
::
OPENCL_SVM_FINE_GRAIN_BUFFER
:
svm
::
OPENCL_SVM_COARSE_GRAIN_BUFFER
;
}
}
else
#endif
{
tempUMatFlags
=
UMatData
::
TEMP_UMAT
;
handle
=
clCreateBuffer
(
ctx_handle
,
CL_MEM_USE_HOST_PTR
|
createFlags
,
u
->
size
,
u
->
origdata
,
&
retval
);
if
((
!
handle
||
retval
<
0
)
&&
!
(
accessFlags
&
ACCESS_FAST
))
{
handle
=
clCreateBuffer
(
ctx_handle
,
CL_MEM_COPY_HOST_PTR
|
CL_MEM_READ_WRITE
|
createFlags
,
u
->
size
,
u
->
origdata
,
&
retval
);
tempUMatFlags
|=
UMatData
::
TEMP_COPIED_UMAT
;
}
}
if
(
!
u
->
handle
||
retval
!=
CL_SUCCESS
)
if
(
!
handle
||
retval
!=
CL_SUCCESS
)
return
false
;
u
->
handle
=
handle
;
u
->
prevAllocator
=
u
->
currAllocator
;
u
->
currAllocator
=
this
;
u
->
flags
|=
tempUMatFlags
;
u
->
allocatorFlags_
=
allocatorFlags
;
}
if
(
accessFlags
&
ACCESS_WRITE
)
u
->
markHostCopyObsolete
(
true
);
...
...
@@ -3848,34 +4371,93 @@ public:
CV_Assert
(
u
->
urefcount
>=
0
);
CV_Assert
(
u
->
refcount
>=
0
);
// TODO: !!! when we add Shared Virtual Memory Support,
// this function (as well as the others) should be corrected
CV_Assert
(
u
->
handle
!=
0
&&
u
->
urefcount
==
0
);
if
(
u
->
tempUMat
())
{
// UMatDataAutoLock lock(u);
if
(
u
->
hostCopyObsolete
()
&&
u
->
refcount
>
0
)
{
cl_command_queue
q
=
(
cl_command_queue
)
Queue
::
getDefault
().
ptr
();
if
(
u
->
tempCopiedUMat
()
)
#ifdef HAVE_OPENCL_SVM
if
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
!=
0
)
{
AlignedDataPtr
<
false
,
true
>
alignedPtr
(
u
->
origdata
,
u
->
size
,
CV_OPENCL_DATA_PTR_ALIGNMENT
);
CV_OclDbgAssert
(
clEnqueueReadBuffer
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
0
,
u
->
size
,
alignedPtr
.
getAlignedPtr
(),
0
,
0
,
0
)
==
CL_SUCCESS
);
Context
&
ctx
=
Context
::
getDefault
();
const
svm
::
SVMFunctions
*
svmFns
=
svm
::
getSVMFunctions
(
ctx
);
CV_DbgAssert
(
svmFns
->
isValid
());
if
(
u
->
tempCopiedUMat
()
)
{
CV_DbgAssert
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_FINE_GRAIN_BUFFER
||
(
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_COARSE_GRAIN_BUFFER
);
bool
isFineGrainBuffer
=
(
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_FINE_GRAIN_BUFFER
;
cl_command_queue
q
=
NULL
;
if
(
!
isFineGrainBuffer
)
{
CV_DbgAssert
(((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MAP
)
==
0
));
q
=
(
cl_command_queue
)
Queue
::
getDefault
().
ptr
();
CV_OPENCL_SVM_TRACE_P
(
"clEnqueueSVMMap: %p (%d)
\n
"
,
u
->
handle
,
(
int
)
u
->
size
);
cl_int
status
=
svmFns
->
fn_clEnqueueSVMMap
(
q
,
CL_FALSE
,
CL_MAP_READ
,
u
->
handle
,
u
->
size
,
0
,
NULL
,
NULL
);
CV_Assert
(
status
==
CL_SUCCESS
);
}
clFinish
(
q
);
memcpy
(
u
->
origdata
,
u
->
handle
,
u
->
size
);
if
(
!
isFineGrainBuffer
)
{
CV_OPENCL_SVM_TRACE_P
(
"clEnqueueSVMUnmap: %p
\n
"
,
u
->
handle
);
cl_int
status
=
svmFns
->
fn_clEnqueueSVMUnmap
(
q
,
u
->
handle
,
0
,
NULL
,
NULL
);
CV_Assert
(
status
==
CL_SUCCESS
);
}
}
else
{
CV_DbgAssert
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_FINE_GRAIN_SYSTEM
);
// nothing
}
}
else
#endif
{
cl_command_queue
q
=
(
cl_command_queue
)
Queue
::
getDefault
().
ptr
();
if
(
u
->
tempCopiedUMat
()
)
{
AlignedDataPtr
<
false
,
true
>
alignedPtr
(
u
->
origdata
,
u
->
size
,
CV_OPENCL_DATA_PTR_ALIGNMENT
);
CV_OclDbgAssert
(
clEnqueueReadBuffer
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
0
,
u
->
size
,
alignedPtr
.
getAlignedPtr
(),
0
,
0
,
0
)
==
CL_SUCCESS
);
}
else
{
// TODO Is it really needed for clCreateBuffer with CL_MEM_USE_HOST_PTR?
cl_int
retval
=
0
;
void
*
data
=
clEnqueueMapBuffer
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
(
CL_MAP_READ
|
CL_MAP_WRITE
),
0
,
u
->
size
,
0
,
0
,
0
,
&
retval
);
CV_OclDbgAssert
(
retval
==
CL_SUCCESS
);
CV_OclDbgAssert
(
clEnqueueUnmapMemObject
(
q
,
(
cl_mem
)
u
->
handle
,
data
,
0
,
0
,
0
)
==
CL_SUCCESS
);
CV_OclDbgAssert
(
clFinish
(
q
)
==
CL_SUCCESS
);
}
}
u
->
markHostCopyObsolete
(
false
);
}
#ifdef HAVE_OPENCL_SVM
if
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
!=
0
)
{
if
(
u
->
tempCopiedUMat
()
)
{
cl_int
retval
=
0
;
void
*
data
=
clEnqueueMapBuffer
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
(
CL_MAP_READ
|
CL_MAP_WRITE
),
0
,
u
->
size
,
0
,
0
,
0
,
&
retval
);
CV_OclDbgAssert
(
retval
==
CL_SUCCESS
);
CV_OclDbgAssert
(
clEnqueueUnmapMemObject
(
q
,
(
cl_mem
)
u
->
handle
,
data
,
0
,
0
,
0
)
==
CL_SUCCESS
);
CV_OclDbgAssert
(
clFinish
(
q
)
==
CL_SUCCESS
);
Context
&
ctx
=
Context
::
getDefault
();
const
svm
::
SVMFunctions
*
svmFns
=
svm
::
getSVMFunctions
(
ctx
);
CV_DbgAssert
(
svmFns
->
isValid
());
CV_OPENCL_SVM_TRACE_P
(
"clSVMFree: %p
\n
"
,
u
->
handle
);
svmFns
->
fn_clSVMFree
((
cl_context
)
ctx
.
ptr
(),
u
->
handle
);
}
}
u
->
markHostCopyObsolete
(
false
);
clReleaseMemObject
((
cl_mem
)
u
->
handle
);
else
#endif
{
clReleaseMemObject
((
cl_mem
)
u
->
handle
);
}
u
->
handle
=
0
;
u
->
currAllocator
=
u
->
prevAllocator
;
if
(
u
->
data
&&
u
->
copyOnMap
()
&&
!
(
u
->
flags
&
UMatData
::
USER_ALLOCATED
))
...
...
@@ -3894,14 +4476,42 @@ public:
}
if
(
u
->
allocatorFlags_
&
ALLOCATOR_FLAGS_BUFFER_POOL_USED
)
{
bufferPool
.
release
((
cl_mem
)
u
->
handle
,
u
->
capacity
);
bufferPool
.
release
((
cl_mem
)
u
->
handle
);
}
else
if
(
u
->
allocatorFlags_
&
ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED
)
{
bufferPoolHostPtr
.
release
((
cl_mem
)
u
->
handle
);
}
#ifdef HAVE_OPENCL_SVM
else
if
(
u
->
allocatorFlags_
&
ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED
)
{
if
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_FINE_GRAIN_SYSTEM
)
{
//nothing
}
else
if
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_FINE_GRAIN_BUFFER
||
(
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_COARSE_GRAIN_BUFFER
)
{
Context
&
ctx
=
Context
::
getDefault
();
const
svm
::
SVMFunctions
*
svmFns
=
svm
::
getSVMFunctions
(
ctx
);
CV_DbgAssert
(
svmFns
->
isValid
());
cl_command_queue
q
=
(
cl_command_queue
)
Queue
::
getDefault
().
ptr
();
if
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MAP
)
!=
0
)
{
CV_OPENCL_SVM_TRACE_P
(
"clEnqueueSVMUnmap: %p
\n
"
,
u
->
handle
);
cl_int
status
=
svmFns
->
fn_clEnqueueSVMUnmap
(
q
,
u
->
handle
,
0
,
NULL
,
NULL
);
CV_Assert
(
status
==
CL_SUCCESS
);
}
}
bufferPoolSVM
.
release
((
void
*
)
u
->
handle
);
}
#endif
else
{
clReleaseMemObject
((
cl_mem
)
u
->
handle
);
}
u
->
handle
=
0
;
u
->
capacity
=
0
;
delete
u
;
}
}
...
...
@@ -3925,13 +4535,41 @@ public:
{
if
(
!
u
->
copyOnMap
()
)
{
// TODO
// because there can be other map requests for the same UMat with different access flags,
// we use the universal (read-write) access mode.
#ifdef HAVE_OPENCL_SVM
if
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
!=
0
)
{
if
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_COARSE_GRAIN_BUFFER
)
{
Context
&
ctx
=
Context
::
getDefault
();
const
svm
::
SVMFunctions
*
svmFns
=
svm
::
getSVMFunctions
(
ctx
);
CV_DbgAssert
(
svmFns
->
isValid
());
if
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MAP
)
==
0
)
{
CV_OPENCL_SVM_TRACE_P
(
"clEnqueueSVMMap: %p (%d)
\n
"
,
u
->
handle
,
(
int
)
u
->
size
);
cl_int
status
=
svmFns
->
fn_clEnqueueSVMMap
(
q
,
CL_FALSE
,
CL_MAP_READ
|
CL_MAP_WRITE
,
u
->
handle
,
u
->
size
,
0
,
NULL
,
NULL
);
CV_Assert
(
status
==
CL_SUCCESS
);
u
->
allocatorFlags_
|=
svm
::
OPENCL_SVM_BUFFER_MAP
;
}
}
clFinish
(
q
);
u
->
data
=
(
uchar
*
)
u
->
handle
;
u
->
markHostCopyObsolete
(
false
);
u
->
markDeviceMemMapped
(
true
);
return
;
}
#endif
if
(
u
->
data
)
// FIXIT Workaround for UMat synchronization issue
{
//CV_Assert(u->hostCopyObsolete() == false);
return
;
}
// because there can be other map requests for the same UMat with different access flags,
// we use the universal (read-write) access mode.
cl_int
retval
=
0
;
u
->
data
=
(
uchar
*
)
clEnqueueMapBuffer
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
(
CL_MAP_READ
|
CL_MAP_WRITE
),
...
...
@@ -3943,6 +4581,7 @@ public:
return
;
}
// TODO Is it really a good idea and was it tested well?
// if map failed, switch to copy-on-map mode for the particular buffer
u
->
flags
|=
UMatData
::
COPY_ON_MAP
;
}
...
...
@@ -3957,6 +4596,9 @@ public:
if
(
(
accessFlags
&
ACCESS_READ
)
!=
0
&&
u
->
hostCopyObsolete
()
)
{
AlignedDataPtr
<
false
,
true
>
alignedPtr
(
u
->
data
,
u
->
size
,
CV_OPENCL_DATA_PTR_ALIGNMENT
);
#ifdef HAVE_OPENCL_SVM
CV_DbgAssert
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
0
);
#endif
CV_Assert
(
clEnqueueReadBuffer
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
0
,
u
->
size
,
alignedPtr
.
getAlignedPtr
(),
0
,
0
,
0
)
==
CL_SUCCESS
);
u
->
markHostCopyObsolete
(
false
);
...
...
@@ -3983,6 +4625,31 @@ public:
{
CV_Assert
(
u
->
data
!=
NULL
);
u
->
markDeviceMemMapped
(
false
);
#ifdef HAVE_OPENCL_SVM
if
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
!=
0
)
{
if
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_COARSE_GRAIN_BUFFER
)
{
Context
&
ctx
=
Context
::
getDefault
();
const
svm
::
SVMFunctions
*
svmFns
=
svm
::
getSVMFunctions
(
ctx
);
CV_DbgAssert
(
svmFns
->
isValid
());
CV_DbgAssert
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MAP
)
!=
0
);
{
CV_OPENCL_SVM_TRACE_P
(
"clEnqueueSVMUnmap: %p
\n
"
,
u
->
handle
);
cl_int
status
=
svmFns
->
fn_clEnqueueSVMUnmap
(
q
,
u
->
handle
,
0
,
NULL
,
NULL
);
CV_Assert
(
status
==
CL_SUCCESS
);
clFinish
(
q
);
u
->
allocatorFlags_
&=
~
svm
::
OPENCL_SVM_BUFFER_MAP
;
}
}
u
->
data
=
0
;
u
->
markDeviceCopyObsolete
(
false
);
u
->
markHostCopyObsolete
(
false
);
return
;
}
#endif
CV_Assert
(
(
retval
=
clEnqueueUnmapMemObject
(
q
,
(
cl_mem
)
u
->
handle
,
u
->
data
,
0
,
0
,
0
))
==
CL_SUCCESS
);
if
(
Device
::
getDefault
().
isAMD
())
...
...
@@ -3995,6 +4662,9 @@ public:
else
if
(
u
->
copyOnMap
()
&&
u
->
deviceCopyObsolete
()
)
{
AlignedDataPtr
<
true
,
false
>
alignedPtr
(
u
->
data
,
u
->
size
,
CV_OPENCL_DATA_PTR_ALIGNMENT
);
#ifdef HAVE_OPENCL_SVM
CV_DbgAssert
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
0
);
#endif
CV_Assert
(
(
retval
=
clEnqueueWriteBuffer
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
0
,
u
->
size
,
alignedPtr
.
getAlignedPtr
(),
0
,
0
,
0
))
==
CL_SUCCESS
);
}
...
...
@@ -4102,17 +4772,78 @@ public:
srcrawofs
,
new_srcofs
,
new_srcstep
,
dstrawofs
,
new_dstofs
,
new_dststep
);
AlignedDataPtr
<
false
,
true
>
alignedPtr
((
uchar
*
)
dstptr
,
sz
[
0
]
*
dststep
[
0
],
CV_OPENCL_DATA_PTR_ALIGNMENT
);
if
(
iscontinuous
)
#ifdef HAVE_OPENCL_SVM
if
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
!=
0
)
{
CV_Assert
(
clEnqueueReadBuffer
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
srcrawofs
,
total
,
alignedPtr
.
getAlignedPtr
(),
0
,
0
,
0
)
==
CL_SUCCESS
);
CV_DbgAssert
(
u
->
data
==
NULL
||
u
->
data
==
u
->
handle
);
Context
&
ctx
=
Context
::
getDefault
();
const
svm
::
SVMFunctions
*
svmFns
=
svm
::
getSVMFunctions
(
ctx
);
CV_DbgAssert
(
svmFns
->
isValid
());
CV_DbgAssert
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MAP
)
==
0
);
if
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_COARSE_GRAIN_BUFFER
)
{
CV_OPENCL_SVM_TRACE_P
(
"clEnqueueSVMMap: %p (%d)
\n
"
,
u
->
handle
,
(
int
)
u
->
size
);
cl_int
status
=
svmFns
->
fn_clEnqueueSVMMap
(
q
,
CL_FALSE
,
CL_MAP_READ
,
u
->
handle
,
u
->
size
,
0
,
NULL
,
NULL
);
CV_Assert
(
status
==
CL_SUCCESS
);
}
clFinish
(
q
);
if
(
iscontinuous
)
{
memcpy
(
dstptr
,
(
uchar
*
)
u
->
handle
+
srcrawofs
,
total
);
}
else
{
// This code is from MatAllocator::download()
int
isz
[
CV_MAX_DIM
];
uchar
*
srcptr
=
(
uchar
*
)
u
->
handle
;
for
(
int
i
=
0
;
i
<
dims
;
i
++
)
{
CV_Assert
(
sz
[
i
]
<=
(
size_t
)
INT_MAX
);
if
(
sz
[
i
]
==
0
)
return
;
if
(
srcofs
)
srcptr
+=
srcofs
[
i
]
*
(
i
<=
dims
-
2
?
srcstep
[
i
]
:
1
);
isz
[
i
]
=
(
int
)
sz
[
i
];
}
Mat
src
(
dims
,
isz
,
CV_8U
,
srcptr
,
srcstep
);
Mat
dst
(
dims
,
isz
,
CV_8U
,
dstptr
,
dststep
);
const
Mat
*
arrays
[]
=
{
&
src
,
&
dst
};
uchar
*
ptrs
[
2
];
NAryMatIterator
it
(
arrays
,
ptrs
,
2
);
size_t
j
,
planesz
=
it
.
size
;
for
(
j
=
0
;
j
<
it
.
nplanes
;
j
++
,
++
it
)
memcpy
(
ptrs
[
1
],
ptrs
[
0
],
planesz
);
}
if
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_COARSE_GRAIN_BUFFER
)
{
CV_OPENCL_SVM_TRACE_P
(
"clEnqueueSVMUnmap: %p
\n
"
,
u
->
handle
);
cl_int
status
=
svmFns
->
fn_clEnqueueSVMUnmap
(
q
,
u
->
handle
,
0
,
NULL
,
NULL
);
CV_Assert
(
status
==
CL_SUCCESS
);
clFinish
(
q
);
}
}
else
#endif
{
CV_Assert
(
clEnqueueReadBufferRect
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
new_srcofs
,
new_dstofs
,
new_sz
,
new_srcstep
[
0
],
new_srcstep
[
1
],
new_dststep
[
0
],
new_dststep
[
1
],
alignedPtr
.
getAlignedPtr
(),
0
,
0
,
0
)
==
CL_SUCCESS
);
AlignedDataPtr
<
false
,
true
>
alignedPtr
((
uchar
*
)
dstptr
,
sz
[
0
]
*
dststep
[
0
],
CV_OPENCL_DATA_PTR_ALIGNMENT
);
if
(
iscontinuous
)
{
CV_Assert
(
clEnqueueReadBuffer
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
srcrawofs
,
total
,
alignedPtr
.
getAlignedPtr
(),
0
,
0
,
0
)
>=
0
);
}
else
{
CV_Assert
(
clEnqueueReadBufferRect
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
new_srcofs
,
new_dstofs
,
new_sz
,
new_srcstep
[
0
],
new_srcstep
[
1
],
new_dststep
[
0
],
new_dststep
[
1
],
alignedPtr
.
getAlignedPtr
(),
0
,
0
,
0
)
>=
0
);
}
}
}
...
...
@@ -4153,20 +4884,91 @@ public:
CV_Assert
(
u
->
handle
!=
0
);
cl_command_queue
q
=
(
cl_command_queue
)
Queue
::
getDefault
().
ptr
();
AlignedDataPtr
<
true
,
false
>
alignedPtr
((
uchar
*
)
srcptr
,
sz
[
0
]
*
srcstep
[
0
],
CV_OPENCL_DATA_PTR_ALIGNMENT
);
if
(
iscontinuous
)
#ifdef HAVE_OPENCL_SVM
if
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
!=
0
)
{
CV_Assert
(
clEnqueueWriteBuffer
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
dstrawofs
,
total
,
srcptr
,
0
,
0
,
0
)
==
CL_SUCCESS
);
CV_DbgAssert
(
u
->
data
==
NULL
||
u
->
data
==
u
->
handle
);
Context
&
ctx
=
Context
::
getDefault
();
const
svm
::
SVMFunctions
*
svmFns
=
svm
::
getSVMFunctions
(
ctx
);
CV_DbgAssert
(
svmFns
->
isValid
());
CV_DbgAssert
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MAP
)
==
0
);
if
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_COARSE_GRAIN_BUFFER
)
{
CV_OPENCL_SVM_TRACE_P
(
"clEnqueueSVMMap: %p (%d)
\n
"
,
u
->
handle
,
(
int
)
u
->
size
);
cl_int
status
=
svmFns
->
fn_clEnqueueSVMMap
(
q
,
CL_FALSE
,
CL_MAP_WRITE
,
u
->
handle
,
u
->
size
,
0
,
NULL
,
NULL
);
CV_Assert
(
status
==
CL_SUCCESS
);
}
clFinish
(
q
);
if
(
iscontinuous
)
{
memcpy
((
uchar
*
)
u
->
handle
+
dstrawofs
,
srcptr
,
total
);
}
else
{
// This code is from MatAllocator::upload()
int
isz
[
CV_MAX_DIM
];
uchar
*
dstptr
=
(
uchar
*
)
u
->
handle
;
for
(
int
i
=
0
;
i
<
dims
;
i
++
)
{
CV_Assert
(
sz
[
i
]
<=
(
size_t
)
INT_MAX
);
if
(
sz
[
i
]
==
0
)
return
;
if
(
dstofs
)
dstptr
+=
dstofs
[
i
]
*
(
i
<=
dims
-
2
?
dststep
[
i
]
:
1
);
isz
[
i
]
=
(
int
)
sz
[
i
];
}
Mat
src
(
dims
,
isz
,
CV_8U
,
(
void
*
)
srcptr
,
srcstep
);
Mat
dst
(
dims
,
isz
,
CV_8U
,
dstptr
,
dststep
);
const
Mat
*
arrays
[]
=
{
&
src
,
&
dst
};
uchar
*
ptrs
[
2
];
NAryMatIterator
it
(
arrays
,
ptrs
,
2
);
size_t
j
,
planesz
=
it
.
size
;
for
(
j
=
0
;
j
<
it
.
nplanes
;
j
++
,
++
it
)
memcpy
(
ptrs
[
1
],
ptrs
[
0
],
planesz
);
}
if
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_COARSE_GRAIN_BUFFER
)
{
CV_OPENCL_SVM_TRACE_P
(
"clEnqueueSVMUnmap: %p
\n
"
,
u
->
handle
);
cl_int
status
=
svmFns
->
fn_clEnqueueSVMUnmap
(
q
,
u
->
handle
,
0
,
NULL
,
NULL
);
CV_Assert
(
status
==
CL_SUCCESS
);
clFinish
(
q
);
}
}
else
#endif
{
CV_Assert
(
clEnqueueWriteBufferRect
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
new_dstofs
,
new_srcofs
,
new_sz
,
new_dststep
[
0
],
new_dststep
[
1
],
new_srcstep
[
0
],
new_srcstep
[
1
],
srcptr
,
0
,
0
,
0
)
==
CL_SUCCESS
);
AlignedDataPtr
<
true
,
false
>
alignedPtr
((
uchar
*
)
srcptr
,
sz
[
0
]
*
srcstep
[
0
],
CV_OPENCL_DATA_PTR_ALIGNMENT
);
if
(
iscontinuous
)
{
CV_Assert
(
clEnqueueWriteBuffer
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
dstrawofs
,
total
,
alignedPtr
.
getAlignedPtr
(),
0
,
0
,
0
)
>=
0
);
}
else
{
CV_Assert
(
clEnqueueWriteBufferRect
(
q
,
(
cl_mem
)
u
->
handle
,
CL_TRUE
,
new_dstofs
,
new_srcofs
,
new_sz
,
new_dststep
[
0
],
new_dststep
[
1
],
new_srcstep
[
0
],
new_srcstep
[
1
],
alignedPtr
.
getAlignedPtr
(),
0
,
0
,
0
)
>=
0
);
}
}
u
->
markHostCopyObsolete
(
true
);
#ifdef HAVE_OPENCL_SVM
if
((
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_FINE_GRAIN_BUFFER
||
(
u
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_FINE_GRAIN_SYSTEM
)
{
// nothing
}
else
#endif
{
u
->
markHostCopyObsolete
(
true
);
}
u
->
markDeviceCopyObsolete
(
false
);
}
...
...
@@ -4198,7 +5000,17 @@ public:
{
download
(
src
,
dst
->
data
+
dstrawofs
,
dims
,
sz
,
srcofs
,
srcstep
,
dststep
);
dst
->
markHostCopyObsolete
(
false
);
dst
->
markDeviceCopyObsolete
(
true
);
#ifdef HAVE_OPENCL_SVM
if
((
dst
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_FINE_GRAIN_BUFFER
||
(
dst
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_FINE_GRAIN_SYSTEM
)
{
// nothing
}
else
#endif
{
dst
->
markDeviceCopyObsolete
(
true
);
}
return
;
}
...
...
@@ -4206,26 +5018,110 @@ public:
CV_Assert
(
dst
->
refcount
==
0
);
cl_command_queue
q
=
(
cl_command_queue
)
Queue
::
getDefault
().
ptr
();
cl_int
retval
;
if
(
iscontinuous
)
cl_int
retval
=
CL_SUCCESS
;
#ifdef HAVE_OPENCL_SVM
if
((
src
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
!=
0
||
(
dst
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
!=
0
)
{
CV_Assert
(
(
retval
=
clEnqueueCopyBuffer
(
q
,
(
cl_mem
)
src
->
handle
,
(
cl_mem
)
dst
->
handle
,
srcrawofs
,
dstrawofs
,
total
,
0
,
0
,
0
))
==
CL_SUCCESS
);
if
((
src
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
!=
0
&&
(
dst
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
!=
0
)
{
Context
&
ctx
=
Context
::
getDefault
();
const
svm
::
SVMFunctions
*
svmFns
=
svm
::
getSVMFunctions
(
ctx
);
CV_DbgAssert
(
svmFns
->
isValid
());
if
(
iscontinuous
)
{
CV_OPENCL_SVM_TRACE_P
(
"clEnqueueSVMMemcpy: %p <-- %p (%d)
\n
"
,
(
uchar
*
)
dst
->
handle
+
dstrawofs
,
(
uchar
*
)
src
->
handle
+
srcrawofs
,
(
int
)
total
);
cl_int
status
=
svmFns
->
fn_clEnqueueSVMMemcpy
(
q
,
CL_TRUE
,
(
uchar
*
)
dst
->
handle
+
dstrawofs
,
(
uchar
*
)
src
->
handle
+
srcrawofs
,
total
,
0
,
NULL
,
NULL
);
CV_Assert
(
status
==
CL_SUCCESS
);
}
else
{
clFinish
(
q
);
// This code is from MatAllocator::download()/upload()
int
isz
[
CV_MAX_DIM
];
uchar
*
srcptr
=
(
uchar
*
)
src
->
handle
;
for
(
int
i
=
0
;
i
<
dims
;
i
++
)
{
CV_Assert
(
sz
[
i
]
<=
(
size_t
)
INT_MAX
);
if
(
sz
[
i
]
==
0
)
return
;
if
(
srcofs
)
srcptr
+=
srcofs
[
i
]
*
(
i
<=
dims
-
2
?
srcstep
[
i
]
:
1
);
isz
[
i
]
=
(
int
)
sz
[
i
];
}
Mat
m_src
(
dims
,
isz
,
CV_8U
,
srcptr
,
srcstep
);
uchar
*
dstptr
=
(
uchar
*
)
dst
->
handle
;
for
(
int
i
=
0
;
i
<
dims
;
i
++
)
{
if
(
dstofs
)
dstptr
+=
dstofs
[
i
]
*
(
i
<=
dims
-
2
?
dststep
[
i
]
:
1
);
}
Mat
m_dst
(
dims
,
isz
,
CV_8U
,
dstptr
,
dststep
);
const
Mat
*
arrays
[]
=
{
&
m_src
,
&
m_dst
};
uchar
*
ptrs
[
2
];
NAryMatIterator
it
(
arrays
,
ptrs
,
2
);
size_t
j
,
planesz
=
it
.
size
;
for
(
j
=
0
;
j
<
it
.
nplanes
;
j
++
,
++
it
)
memcpy
(
ptrs
[
1
],
ptrs
[
0
],
planesz
);
}
}
else
{
if
((
src
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
!=
0
)
{
map
(
src
,
ACCESS_READ
);
upload
(
dst
,
src
->
data
+
srcrawofs
,
dims
,
sz
,
dstofs
,
dststep
,
srcstep
);
unmap
(
src
);
}
else
{
map
(
dst
,
ACCESS_WRITE
);
download
(
src
,
dst
->
data
+
dstrawofs
,
dims
,
sz
,
srcofs
,
srcstep
,
dststep
);
unmap
(
dst
);
}
}
}
else
#endif
{
CV_Assert
(
(
retval
=
clEnqueueCopyBufferRect
(
q
,
(
cl_mem
)
src
->
handle
,
(
cl_mem
)
dst
->
handle
,
new_srcofs
,
new_dstofs
,
new_sz
,
new_srcstep
[
0
],
new_srcstep
[
1
],
new_dststep
[
0
],
new_dststep
[
1
],
0
,
0
,
0
))
==
CL_SUCCESS
);
if
(
iscontinuous
)
{
CV_Assert
(
(
retval
=
clEnqueueCopyBuffer
(
q
,
(
cl_mem
)
src
->
handle
,
(
cl_mem
)
dst
->
handle
,
srcrawofs
,
dstrawofs
,
total
,
0
,
0
,
0
))
==
CL_SUCCESS
);
}
else
{
CV_Assert
(
(
retval
=
clEnqueueCopyBufferRect
(
q
,
(
cl_mem
)
src
->
handle
,
(
cl_mem
)
dst
->
handle
,
new_srcofs
,
new_dstofs
,
new_sz
,
new_srcstep
[
0
],
new_srcstep
[
1
],
new_dststep
[
0
],
new_dststep
[
1
],
0
,
0
,
0
))
==
CL_SUCCESS
);
}
}
if
(
retval
==
CL_SUCCESS
)
if
(
retval
==
CL_SUCCESS
)
{
CV_IMPL_ADD
(
CV_IMPL_OCL
)
}
dst
->
markHostCopyObsolete
(
true
);
#ifdef HAVE_OPENCL_SVM
if
((
dst
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_FINE_GRAIN_BUFFER
||
(
dst
->
allocatorFlags_
&
svm
::
OPENCL_SVM_BUFFER_MASK
)
==
svm
::
OPENCL_SVM_FINE_GRAIN_SYSTEM
)
{
// nothing
}
else
#endif
{
dst
->
markHostCopyObsolete
(
true
);
}
dst
->
markDeviceCopyObsolete
(
false
);
if
(
_sync
)
...
...
@@ -4234,7 +5130,23 @@ public:
}
}
BufferPoolController
*
getBufferPoolController
()
const
{
return
&
bufferPool
;
}
BufferPoolController
*
getBufferPoolController
(
const
char
*
id
)
const
{
#ifdef HAVE_OPENCL_SVM
if
((
svm
::
checkForceSVMUmatUsage
()
&&
(
id
==
NULL
||
strcmp
(
id
,
"OCL"
)
==
0
))
||
(
id
!=
NULL
&&
strcmp
(
id
,
"SVM"
)
==
0
))
{
return
&
bufferPoolSVM
;
}
#endif
if
(
id
!=
NULL
&&
strcmp
(
id
,
"HOST_ALLOC"
)
==
0
)
{
return
&
bufferPoolHostPtr
;
}
if
(
id
!=
NULL
&&
strcmp
(
id
,
"OCL"
)
!=
0
)
{
CV_ErrorNoReturn
(
cv
::
Error
::
StsBadArg
,
"getBufferPoolController(): unknown BufferPool ID
\n
"
);
}
return
&
bufferPool
;
}
MatAllocator
*
matStdAllocator
;
};
...
...
@@ -4818,7 +5730,7 @@ void* Image2D::ptr() const
return
p
?
p
->
handle
:
0
;
}
bool
isPerformanceCheckBypassed
()
bool
i
nternal
::
i
sPerformanceCheckBypassed
()
{
static
bool
initialized
=
false
;
static
bool
value
=
false
;
...
...
@@ -4830,4 +5742,22 @@ bool isPerformanceCheckBypassed()
return
value
;
}
bool
internal
::
isCLBuffer
(
UMat
&
u
)
{
void
*
h
=
u
.
handle
(
ACCESS_RW
);
if
(
!
h
)
return
true
;
CV_DbgAssert
(
u
.
u
->
currAllocator
==
getOpenCLAllocator
());
#if 1
if
((
u
.
u
->
allocatorFlags_
&
0xffff0000
)
!=
0
)
// OpenCL SVM flags are stored here
return
false
;
#else
cl_mem_object_type
type
=
0
;
cl_int
ret
=
clGetMemObjectInfo
((
cl_mem
)
h
,
CL_MEM_TYPE
,
sizeof
(
type
),
&
type
,
NULL
);
if
(
ret
!=
CL_SUCCESS
||
type
!=
CL_MEM_OBJECT_BUFFER
)
return
false
;
#endif
return
true
;
}
}}
modules/core/src/opencl/runtime/opencl_core.cpp
浏览文件 @
0a07d780
...
...
@@ -182,6 +182,65 @@ static void* opencl_check_fn(int ID);
#define CUSTOM_FUNCTION_ID 1000
#ifdef HAVE_OPENCL_SVM
#include "opencv2/core/opencl/runtime/opencl_svm_20.hpp"
#define SVM_FUNCTION_ID_START CUSTOM_FUNCTION_ID
#define SVM_FUNCTION_ID_END CUSTOM_FUNCTION_ID + 100
enum
OPENCL_FN_SVM_ID
{
OPENCL_FN_clSVMAlloc
=
SVM_FUNCTION_ID_START
,
OPENCL_FN_clSVMFree
,
OPENCL_FN_clSetKernelArgSVMPointer
,
OPENCL_FN_clSetKernelExecInfo
,
OPENCL_FN_clEnqueueSVMFree
,
OPENCL_FN_clEnqueueSVMMemcpy
,
OPENCL_FN_clEnqueueSVMMemFill
,
OPENCL_FN_clEnqueueSVMMap
,
OPENCL_FN_clEnqueueSVMUnmap
,
};
void
*
(
CL_API_CALL
*
clSVMAlloc
)(
cl_context
context
,
cl_svm_mem_flags
flags
,
size_t
size
,
unsigned
int
alignment
)
=
opencl_fn4
<
OPENCL_FN_clSVMAlloc
,
void
*
,
cl_context
,
cl_svm_mem_flags
,
size_t
,
unsigned
int
>::
switch_fn
;
static
const
struct
DynamicFnEntry
_clSVMAlloc_definition
=
{
"clSVMAlloc"
,
(
void
**
)
&
clSVMAlloc
};
void
(
CL_API_CALL
*
clSVMFree
)(
cl_context
context
,
void
*
svm_pointer
)
=
opencl_fn2
<
OPENCL_FN_clSVMFree
,
void
,
cl_context
,
void
*>::
switch_fn
;
static
const
struct
DynamicFnEntry
_clSVMFree_definition
=
{
"clSVMFree"
,
(
void
**
)
&
clSVMFree
};
cl_int
(
CL_API_CALL
*
clSetKernelArgSVMPointer
)(
cl_kernel
kernel
,
cl_uint
arg_index
,
const
void
*
arg_value
)
=
opencl_fn3
<
OPENCL_FN_clSetKernelArgSVMPointer
,
cl_int
,
cl_kernel
,
cl_uint
,
const
void
*>::
switch_fn
;
static
const
struct
DynamicFnEntry
_clSetKernelArgSVMPointer_definition
=
{
"clSetKernelArgSVMPointer"
,
(
void
**
)
&
clSetKernelArgSVMPointer
};
//void* (CL_API_CALL *clSetKernelExecInfo)(cl_kernel kernel, cl_kernel_exec_info param_name, size_t param_value_size, const void* param_value) =
// opencl_fn4<OPENCL_FN_clSetKernelExecInfo, void*, cl_kernel, cl_kernel_exec_info, size_t, const void*>::switch_fn;
//static const struct DynamicFnEntry _clSetKernelExecInfo_definition = { "clSetKernelExecInfo", (void**)&clSetKernelExecInfo};
//cl_int (CL_API_CALL *clEnqueueSVMFree)(...) =
// opencl_fn8<OPENCL_FN_clEnqueueSVMFree, cl_int, ...>::switch_fn;
//static const struct DynamicFnEntry _clEnqueueSVMFree_definition = { "clEnqueueSVMFree", (void**)&clEnqueueSVMFree};
cl_int
(
CL_API_CALL
*
clEnqueueSVMMemcpy
)(
cl_command_queue
command_queue
,
cl_bool
blocking_copy
,
void
*
dst_ptr
,
const
void
*
src_ptr
,
size_t
size
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
)
=
opencl_fn8
<
OPENCL_FN_clEnqueueSVMMemcpy
,
cl_int
,
cl_command_queue
,
cl_bool
,
void
*
,
const
void
*
,
size_t
,
cl_uint
,
const
cl_event
*
,
cl_event
*>::
switch_fn
;
static
const
struct
DynamicFnEntry
_clEnqueueSVMMemcpy_definition
=
{
"clEnqueueSVMMemcpy"
,
(
void
**
)
&
clEnqueueSVMMemcpy
};
cl_int
(
CL_API_CALL
*
clEnqueueSVMMemFill
)(
cl_command_queue
command_queue
,
void
*
svm_ptr
,
const
void
*
pattern
,
size_t
pattern_size
,
size_t
size
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
)
=
opencl_fn8
<
OPENCL_FN_clEnqueueSVMMemFill
,
cl_int
,
cl_command_queue
,
void
*
,
const
void
*
,
size_t
,
size_t
,
cl_uint
,
const
cl_event
*
,
cl_event
*>::
switch_fn
;
static
const
struct
DynamicFnEntry
_clEnqueueSVMMemFill_definition
=
{
"clEnqueueSVMMemFill"
,
(
void
**
)
&
clEnqueueSVMMemFill
};
cl_int
(
CL_API_CALL
*
clEnqueueSVMMap
)(
cl_command_queue
command_queue
,
cl_bool
blocking_map
,
cl_map_flags
map_flags
,
void
*
svm_ptr
,
size_t
size
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
)
=
opencl_fn8
<
OPENCL_FN_clEnqueueSVMMap
,
cl_int
,
cl_command_queue
,
cl_bool
,
cl_map_flags
,
void
*
,
size_t
,
cl_uint
,
const
cl_event
*
,
cl_event
*>::
switch_fn
;
static
const
struct
DynamicFnEntry
_clEnqueueSVMMap_definition
=
{
"clEnqueueSVMMap"
,
(
void
**
)
&
clEnqueueSVMMap
};
cl_int
(
CL_API_CALL
*
clEnqueueSVMUnmap
)(
cl_command_queue
command_queue
,
void
*
svm_ptr
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
)
=
opencl_fn5
<
OPENCL_FN_clEnqueueSVMUnmap
,
cl_int
,
cl_command_queue
,
void
*
,
cl_uint
,
const
cl_event
*
,
cl_event
*>::
switch_fn
;
static
const
struct
DynamicFnEntry
_clEnqueueSVMUnmap_definition
=
{
"clEnqueueSVMUnmap"
,
(
void
**
)
&
clEnqueueSVMUnmap
};
static
const
struct
DynamicFnEntry
*
opencl_svm_fn_list
[]
=
{
&
_clSVMAlloc_definition
,
&
_clSVMFree_definition
,
&
_clSetKernelArgSVMPointer_definition
,
NULL
/*&_clSetKernelExecInfo_definition*/
,
NULL
/*&_clEnqueueSVMFree_definition*/
,
&
_clEnqueueSVMMemcpy_definition
,
&
_clEnqueueSVMMemFill_definition
,
&
_clEnqueueSVMMap_definition
,
&
_clEnqueueSVMUnmap_definition
,
};
#endif // HAVE_OPENCL_SVM
//
// END OF CUSTOM FUNCTIONS HERE
//
...
...
@@ -194,6 +253,14 @@ static void* opencl_check_fn(int ID)
assert
(
ID
>=
0
&&
ID
<
(
int
)(
sizeof
(
opencl_fn_list
)
/
sizeof
(
opencl_fn_list
[
0
])));
e
=
opencl_fn_list
[
ID
];
}
#ifdef HAVE_OPENCL_SVM
else
if
(
ID
>=
SVM_FUNCTION_ID_START
&&
ID
<
SVM_FUNCTION_ID_END
)
{
ID
=
ID
-
SVM_FUNCTION_ID_START
;
assert
(
ID
>=
0
&&
ID
<
(
int
)(
sizeof
(
opencl_svm_fn_list
)
/
sizeof
(
opencl_svm_fn_list
[
0
])));
e
=
opencl_svm_fn_list
[
ID
];
}
#endif
else
{
CV_ErrorNoReturn
(
cv
::
Error
::
StsBadArg
,
"Invalid function ID"
);
...
...
modules/core/src/umatrix.cpp
浏览文件 @
0a07d780
...
...
@@ -55,7 +55,7 @@ UMatData::UMatData(const MatAllocator* allocator)
prevAllocator
=
currAllocator
=
allocator
;
urefcount
=
refcount
=
0
;
data
=
origdata
=
0
;
size
=
0
;
capacity
=
0
;
size
=
0
;
flags
=
0
;
handle
=
0
;
userdata
=
0
;
...
...
@@ -67,7 +67,7 @@ UMatData::~UMatData()
prevAllocator
=
currAllocator
=
0
;
urefcount
=
refcount
=
0
;
data
=
origdata
=
0
;
size
=
0
;
capacity
=
0
;
size
=
0
;
flags
=
0
;
handle
=
0
;
userdata
=
0
;
...
...
@@ -221,7 +221,7 @@ UMat Mat::getUMat(int accessFlags, UMatUsageFlags usageFlags) const
temp_u
=
a
->
allocate
(
dims
,
size
.
p
,
type
(),
data
,
step
.
p
,
accessFlags
,
usageFlags
);
temp_u
->
refcount
=
1
;
}
UMat
::
getStdAllocator
()
->
allocate
(
temp_u
,
accessFlags
,
usageFlags
);
UMat
::
getStdAllocator
()
->
allocate
(
temp_u
,
accessFlags
,
usageFlags
);
// TODO result is not checked
hdr
.
flags
=
flags
;
setSize
(
hdr
,
dims
,
size
.
p
,
step
.
p
);
finalizeHdr
(
hdr
);
...
...
@@ -575,7 +575,7 @@ Mat UMat::getMat(int accessFlags) const
{
if
(
!
u
)
return
Mat
();
u
->
currAllocator
->
map
(
u
,
accessFlags
|
ACCESS_READ
);
u
->
currAllocator
->
map
(
u
,
accessFlags
|
ACCESS_READ
);
// TODO Support ACCESS_WRITE without unnecessary data transfers
CV_Assert
(
u
->
data
!=
0
);
Mat
hdr
(
dims
,
size
.
p
,
type
(),
u
->
data
+
offset
,
step
.
p
);
hdr
.
flags
=
flags
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录