Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
0a07d780
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
0a07d780
编写于
1月 02, 2015
作者:
A
Alexander Alekhin
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
ocl: OpenCL SVM support
上级
58ad952b
变更
15
展开全部
隐藏空白更改
内联
并排
Showing
15 changed file
with
1535 addition
and
154 deletion
+1535
-154
CMakeLists.txt
CMakeLists.txt
+1
-0
cmake/OpenCVDetectOpenCL.cmake
cmake/OpenCVDetectOpenCL.cmake
+4
-0
cmake/templates/cvconfig.h.in
cmake/templates/cvconfig.h.in
+1
-0
modules/core/include/opencv2/core/mat.hpp
modules/core/include/opencv2/core/mat.hpp
+2
-2
modules/core/include/opencv2/core/ocl.hpp
modules/core/include/opencv2/core/ocl.hpp
+17
-3
modules/core/include/opencv2/core/opencl/opencl_svm.hpp
modules/core/include/opencv2/core/opencl/opencl_svm.hpp
+81
-0
modules/core/include/opencv2/core/opencl/runtime/opencl_core.hpp
.../core/include/opencv2/core/opencl/runtime/opencl_core.hpp
+12
-0
modules/core/include/opencv2/core/opencl/runtime/opencl_svm_20.hpp
...ore/include/opencv2/core/opencl/runtime/opencl_svm_20.hpp
+52
-0
modules/core/include/opencv2/core/opencl/runtime/opencl_svm_definitions.hpp
...de/opencv2/core/opencl/runtime/opencl_svm_definitions.hpp
+42
-0
modules/core/include/opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp
.../opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp
+166
-0
modules/core/src/matmul.cpp
modules/core/src/matmul.cpp
+10
-0
modules/core/src/matrix.cpp
modules/core/src/matrix.cpp
+2
-1
modules/core/src/ocl.cpp
modules/core/src/ocl.cpp
+1074
-144
modules/core/src/opencl/runtime/opencl_core.cpp
modules/core/src/opencl/runtime/opencl_core.cpp
+67
-0
modules/core/src/umatrix.cpp
modules/core/src/umatrix.cpp
+4
-4
未找到文件。
CMakeLists.txt
浏览文件 @
0a07d780
...
...
@@ -162,6 +162,7 @@ OCV_OPTION(WITH_XIMEA "Include XIMEA cameras support" OFF
OCV_OPTION
(
WITH_XINE
"Include Xine support (GPL)"
OFF
IF
(
UNIX AND NOT APPLE AND NOT ANDROID
)
)
OCV_OPTION
(
WITH_CLP
"Include Clp support (EPL)"
OFF
)
OCV_OPTION
(
WITH_OPENCL
"Include OpenCL Runtime support"
ON
IF
(
NOT IOS
)
)
OCV_OPTION
(
WITH_OPENCL_SVM
"Include OpenCL Shared Virtual Memory support"
OFF
)
# experimental
OCV_OPTION
(
WITH_OPENCLAMDFFT
"Include AMD OpenCL FFT library support"
ON
IF
(
NOT ANDROID AND NOT IOS
)
)
OCV_OPTION
(
WITH_OPENCLAMDBLAS
"Include AMD OpenCL BLAS library support"
ON
IF
(
NOT ANDROID AND NOT IOS
)
)
OCV_OPTION
(
WITH_DIRECTX
"Include DirectX support"
ON IF WIN32
)
...
...
cmake/OpenCVDetectOpenCL.cmake
浏览文件 @
0a07d780
...
...
@@ -26,6 +26,10 @@ if(OPENCL_FOUND)
set
(
HAVE_OPENCL 1
)
if
(
WITH_OPENCL_SVM
)
set
(
HAVE_OPENCL_SVM 1
)
endif
()
if
(
HAVE_OPENCL_STATIC
)
set
(
OPENCL_LIBRARIES
"
${
OPENCL_LIBRARY
}
"
)
else
()
...
...
cmake/templates/cvconfig.h.in
浏览文件 @
0a07d780
...
...
@@ -122,6 +122,7 @@
/* OpenCL Support */
#cmakedefine HAVE_OPENCL
#cmakedefine HAVE_OPENCL_STATIC
#cmakedefine HAVE_OPENCL_SVM
/* OpenEXR codec */
#cmakedefine HAVE_OPENEXR
...
...
modules/core/include/opencv2/core/mat.hpp
浏览文件 @
0a07d780
...
...
@@ -415,7 +415,7 @@ public:
const
size_t
dstofs
[],
const
size_t
dststep
[],
bool
sync
)
const
;
// default implementation returns DummyBufferPoolController
virtual
BufferPoolController
*
getBufferPoolController
()
const
;
virtual
BufferPoolController
*
getBufferPoolController
(
const
char
*
id
=
NULL
)
const
;
};
...
...
@@ -481,7 +481,7 @@ struct CV_EXPORTS UMatData
int
refcount
;
uchar
*
data
;
uchar
*
origdata
;
size_t
size
,
capacity
;
size_t
size
;
int
flags
;
void
*
handle
;
...
...
modules/core/include/opencv2/core/ocl.hpp
浏览文件 @
0a07d780
...
...
@@ -56,6 +56,8 @@ CV_EXPORTS_W bool haveAmdFft();
CV_EXPORTS_W
void
setUseOpenCL
(
bool
flag
);
CV_EXPORTS_W
void
finish
();
CV_EXPORTS
bool
haveSVM
();
class
CV_EXPORTS
Context
;
class
CV_EXPORTS
Device
;
class
CV_EXPORTS
Kernel
;
...
...
@@ -248,7 +250,10 @@ public:
void
*
ptr
()
const
;
friend
void
initializeContextFromHandle
(
Context
&
ctx
,
void
*
platform
,
void
*
context
,
void
*
device
);
protected:
bool
useSVM
()
const
;
void
setUseSVM
(
bool
enabled
);
struct
Impl
;
Impl
*
p
;
};
...
...
@@ -666,8 +671,17 @@ protected:
CV_EXPORTS
MatAllocator
*
getOpenCLAllocator
();
CV_EXPORTS_W
bool
isPerformanceCheckBypassed
();
#define OCL_PERFORMANCE_CHECK(condition) (cv::ocl::isPerformanceCheckBypassed() || (condition))
#ifdef __OPENCV_BUILD
namespace
internal
{
CV_EXPORTS
bool
isPerformanceCheckBypassed
();
#define OCL_PERFORMANCE_CHECK(condition) (cv::ocl::internal::isPerformanceCheckBypassed() || (condition))
CV_EXPORTS
bool
isCLBuffer
(
UMat
&
u
);
}
// namespace internal
#endif
//! @}
...
...
modules/core/include/opencv2/core/opencl/opencl_svm.hpp
0 → 100644
浏览文件 @
0a07d780
/* See LICENSE file in the root OpenCV directory */
#ifndef __OPENCV_CORE_OPENCL_SVM_HPP__
#define __OPENCV_CORE_OPENCL_SVM_HPP__
//
// Internal usage only (binary compatibility is not guaranteed)
//
#ifndef __OPENCV_BUILD
#error Internal header file
#endif
#if defined(HAVE_OPENCL) && defined(HAVE_OPENCL_SVM)
#include "runtime/opencl_core.hpp"
#include "runtime/opencl_svm_20.hpp"
#include "runtime/opencl_svm_hsa_extension.hpp"
namespace
cv
{
namespace
ocl
{
namespace
svm
{
struct
SVMCapabilities
{
enum
Value
{
SVM_COARSE_GRAIN_BUFFER
=
(
1
<<
0
),
SVM_FINE_GRAIN_BUFFER
=
(
1
<<
1
),
SVM_FINE_GRAIN_SYSTEM
=
(
1
<<
2
),
SVM_ATOMICS
=
(
1
<<
3
),
};
int
value_
;
SVMCapabilities
(
int
capabilities
=
0
)
:
value_
(
capabilities
)
{
}
operator
int
()
const
{
return
value_
;
}
inline
bool
isNoSVMSupport
()
const
{
return
value_
==
0
;
}
inline
bool
isSupportCoarseGrainBuffer
()
const
{
return
(
value_
&
SVM_COARSE_GRAIN_BUFFER
)
!=
0
;
}
inline
bool
isSupportFineGrainBuffer
()
const
{
return
(
value_
&
SVM_FINE_GRAIN_BUFFER
)
!=
0
;
}
inline
bool
isSupportFineGrainSystem
()
const
{
return
(
value_
&
SVM_FINE_GRAIN_SYSTEM
)
!=
0
;
}
inline
bool
isSupportAtomics
()
const
{
return
(
value_
&
SVM_ATOMICS
)
!=
0
;
}
};
CV_EXPORTS
const
SVMCapabilities
getSVMCapabilitites
(
const
ocl
::
Context
&
context
);
struct
SVMFunctions
{
clSVMAllocAMD_fn
fn_clSVMAlloc
;
clSVMFreeAMD_fn
fn_clSVMFree
;
clSetKernelArgSVMPointerAMD_fn
fn_clSetKernelArgSVMPointer
;
//clSetKernelExecInfoAMD_fn fn_clSetKernelExecInfo;
//clEnqueueSVMFreeAMD_fn fn_clEnqueueSVMFree;
clEnqueueSVMMemcpyAMD_fn
fn_clEnqueueSVMMemcpy
;
clEnqueueSVMMemFillAMD_fn
fn_clEnqueueSVMMemFill
;
clEnqueueSVMMapAMD_fn
fn_clEnqueueSVMMap
;
clEnqueueSVMUnmapAMD_fn
fn_clEnqueueSVMUnmap
;
inline
SVMFunctions
()
:
fn_clSVMAlloc
(
NULL
),
fn_clSVMFree
(
NULL
),
fn_clSetKernelArgSVMPointer
(
NULL
),
/*fn_clSetKernelExecInfo(NULL),*/
/*fn_clEnqueueSVMFree(NULL),*/
fn_clEnqueueSVMMemcpy
(
NULL
),
fn_clEnqueueSVMMemFill
(
NULL
),
fn_clEnqueueSVMMap
(
NULL
),
fn_clEnqueueSVMUnmap
(
NULL
)
{
// nothing
}
inline
bool
isValid
()
const
{
return
fn_clSVMAlloc
!=
NULL
&&
fn_clSVMFree
&&
fn_clSetKernelArgSVMPointer
&&
/*fn_clSetKernelExecInfo && fn_clEnqueueSVMFree &&*/
fn_clEnqueueSVMMemcpy
&&
fn_clEnqueueSVMMemFill
&&
fn_clEnqueueSVMMap
&&
fn_clEnqueueSVMUnmap
;
}
};
// We should guarantee that SVMFunctions lifetime is not less than context's lifetime
CV_EXPORTS
const
SVMFunctions
*
getSVMFunctions
(
const
ocl
::
Context
&
context
);
CV_EXPORTS
bool
useSVM
(
UMatUsageFlags
usageFlags
);
}}}
//namespace cv::ocl::svm
#endif
#endif // __OPENCV_CORE_OPENCL_SVM_HPP__
/* End of file. */
modules/core/include/opencv2/core/opencl/runtime/opencl_core.hpp
浏览文件 @
0a07d780
...
...
@@ -62,6 +62,18 @@
#endif
#endif
#ifdef HAVE_OPENCL_SVM
#define clSVMAlloc clSVMAlloc_
#define clSVMFree clSVMFree_
#define clSetKernelArgSVMPointer clSetKernelArgSVMPointer_
#define clSetKernelExecInfo clSetKernelExecInfo_
#define clEnqueueSVMFree clEnqueueSVMFree_
#define clEnqueueSVMMemcpy clEnqueueSVMMemcpy_
#define clEnqueueSVMMemFill clEnqueueSVMMemFill_
#define clEnqueueSVMMap clEnqueueSVMMap_
#define clEnqueueSVMUnmap clEnqueueSVMUnmap_
#endif
#include "autogenerated/opencl_core.hpp"
#endif // HAVE_OPENCL_STATIC
...
...
modules/core/include/opencv2/core/opencl/runtime/opencl_svm_20.hpp
0 → 100644
浏览文件 @
0a07d780
/* See LICENSE file in the root OpenCV directory */
#ifndef __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP__
#define __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP__
#if defined(HAVE_OPENCL_SVM)
#include "opencl_core.hpp"
#include "opencl_svm_definitions.hpp"
#ifndef HAVE_OPENCL_STATIC
#undef clSVMAlloc
#define clSVMAlloc clSVMAlloc_pfn
#undef clSVMFree
#define clSVMFree clSVMFree_pfn
#undef clSetKernelArgSVMPointer
#define clSetKernelArgSVMPointer clSetKernelArgSVMPointer_pfn
#undef clSetKernelExecInfo
//#define clSetKernelExecInfo clSetKernelExecInfo_pfn
#undef clEnqueueSVMFree
//#define clEnqueueSVMFree clEnqueueSVMFree_pfn
#undef clEnqueueSVMMemcpy
#define clEnqueueSVMMemcpy clEnqueueSVMMemcpy_pfn
#undef clEnqueueSVMMemFill
#define clEnqueueSVMMemFill clEnqueueSVMMemFill_pfn
#undef clEnqueueSVMMap
#define clEnqueueSVMMap clEnqueueSVMMap_pfn
#undef clEnqueueSVMUnmap
#define clEnqueueSVMUnmap clEnqueueSVMUnmap_pfn
extern
CL_RUNTIME_EXPORT
void
*
(
CL_API_CALL
*
clSVMAlloc
)(
cl_context
context
,
cl_svm_mem_flags
flags
,
size_t
size
,
unsigned
int
alignment
);
extern
CL_RUNTIME_EXPORT
void
(
CL_API_CALL
*
clSVMFree
)(
cl_context
context
,
void
*
svm_pointer
);
extern
CL_RUNTIME_EXPORT
cl_int
(
CL_API_CALL
*
clSetKernelArgSVMPointer
)(
cl_kernel
kernel
,
cl_uint
arg_index
,
const
void
*
arg_value
);
//extern CL_RUNTIME_EXPORT void* (CL_API_CALL *clSetKernelExecInfo)(cl_kernel kernel, cl_kernel_exec_info param_name, size_t param_value_size, const void* param_value);
//extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMFree)(cl_command_queue command_queue, cl_uint num_svm_pointers, void* svm_pointers[],
// void (CL_CALLBACK *pfn_free_func)(cl_command_queue queue, cl_uint num_svm_pointers, void* svm_pointers[], void* user_data), void* user_data,
// cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
extern
CL_RUNTIME_EXPORT
cl_int
(
CL_API_CALL
*
clEnqueueSVMMemcpy
)(
cl_command_queue
command_queue
,
cl_bool
blocking_copy
,
void
*
dst_ptr
,
const
void
*
src_ptr
,
size_t
size
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
);
extern
CL_RUNTIME_EXPORT
cl_int
(
CL_API_CALL
*
clEnqueueSVMMemFill
)(
cl_command_queue
command_queue
,
void
*
svm_ptr
,
const
void
*
pattern
,
size_t
pattern_size
,
size_t
size
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
);
extern
CL_RUNTIME_EXPORT
cl_int
(
CL_API_CALL
*
clEnqueueSVMMap
)(
cl_command_queue
command_queue
,
cl_bool
blocking_map
,
cl_map_flags
map_flags
,
void
*
svm_ptr
,
size_t
size
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
);
extern
CL_RUNTIME_EXPORT
cl_int
(
CL_API_CALL
*
clEnqueueSVMUnmap
)(
cl_command_queue
command_queue
,
void
*
svm_ptr
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
);
#endif // HAVE_OPENCL_STATIC
#endif // HAVE_OPENCL_SVM
#endif // __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP__
modules/core/include/opencv2/core/opencl/runtime/opencl_svm_definitions.hpp
0 → 100644
浏览文件 @
0a07d780
/* See LICENSE file in the root OpenCV directory */
#ifndef __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP__
#define __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP__
#if defined(HAVE_OPENCL_SVM)
#if defined(CL_VERSION_2_0)
// OpenCL 2.0 contains SVM definitions
#else
typedef
cl_bitfield
cl_device_svm_capabilities
;
typedef
cl_bitfield
cl_svm_mem_flags
;
typedef
cl_uint
cl_kernel_exec_info
;
//
// TODO Add real values after OpenCL 2.0 release
//
#ifndef CL_DEVICE_SVM_CAPABILITIES
#define CL_DEVICE_SVM_CAPABILITIES 0x1053
#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER (1 << 0)
#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER (1 << 1)
#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM (1 << 2)
#define CL_DEVICE_SVM_ATOMICS (1 << 3)
#endif
#ifndef CL_MEM_SVM_FINE_GRAIN_BUFFER
#define CL_MEM_SVM_FINE_GRAIN_BUFFER (1 << 10)
#endif
#ifndef CL_MEM_SVM_ATOMICS
#define CL_MEM_SVM_ATOMICS (1 << 11)
#endif
#endif // CL_VERSION_2_0
#endif // HAVE_OPENCL_SVM
#endif // __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP__
modules/core/include/opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp
0 → 100644
浏览文件 @
0a07d780
/* See LICENSE file in the root OpenCV directory */
#ifndef __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP__
#define __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP__
#if defined(HAVE_OPENCL_SVM)
#include "opencl_core.hpp"
#ifndef CL_DEVICE_SVM_CAPABILITIES_AMD
//
// Part of the file is an extract from the cl_ext.h file from AMD APP SDK package.
// Below is the original copyright.
//
/*******************************************************************************
* Copyright (c) 2008-2013 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
/*******************************************
* Shared Virtual Memory (SVM) extension
*******************************************/
typedef
cl_bitfield
cl_device_svm_capabilities_amd
;
typedef
cl_bitfield
cl_svm_mem_flags_amd
;
typedef
cl_uint
cl_kernel_exec_info_amd
;
/* cl_device_info */
#define CL_DEVICE_SVM_CAPABILITIES_AMD 0x1053
#define CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT_AMD 0x1054
/* cl_device_svm_capabilities_amd */
#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_AMD (1 << 0)
#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER_AMD (1 << 1)
#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM_AMD (1 << 2)
#define CL_DEVICE_SVM_ATOMICS_AMD (1 << 3)
/* cl_svm_mem_flags_amd */
#define CL_MEM_SVM_FINE_GRAIN_BUFFER_AMD (1 << 10)
#define CL_MEM_SVM_ATOMICS_AMD (1 << 11)
/* cl_mem_info */
#define CL_MEM_USES_SVM_POINTER_AMD 0x1109
/* cl_kernel_exec_info_amd */
#define CL_KERNEL_EXEC_INFO_SVM_PTRS_AMD 0x11B6
#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_AMD 0x11B7
/* cl_command_type */
#define CL_COMMAND_SVM_FREE_AMD 0x1209
#define CL_COMMAND_SVM_MEMCPY_AMD 0x120A
#define CL_COMMAND_SVM_MEMFILL_AMD 0x120B
#define CL_COMMAND_SVM_MAP_AMD 0x120C
#define CL_COMMAND_SVM_UNMAP_AMD 0x120D
typedef
CL_API_ENTRY
void
*
(
CL_API_CALL
*
clSVMAllocAMD_fn
)(
cl_context
/* context */
,
cl_svm_mem_flags_amd
/* flags */
,
size_t
/* size */
,
unsigned
int
/* alignment */
)
CL_EXT_SUFFIX__VERSION_1_2
;
typedef
CL_API_ENTRY
void
(
CL_API_CALL
*
clSVMFreeAMD_fn
)(
cl_context
/* context */
,
void
*
/* svm_pointer */
)
CL_EXT_SUFFIX__VERSION_1_2
;
typedef
CL_API_ENTRY
cl_int
(
CL_API_CALL
*
clEnqueueSVMFreeAMD_fn
)(
cl_command_queue
/* command_queue */
,
cl_uint
/* num_svm_pointers */
,
void
**
/* svm_pointers */
,
void
(
CL_CALLBACK
*
)(
/*pfn_free_func*/
cl_command_queue
/* queue */
,
cl_uint
/* num_svm_pointers */
,
void
**
/* svm_pointers */
,
void
*
/* user_data */
),
void
*
/* user_data */
,
cl_uint
/* num_events_in_wait_list */
,
const
cl_event
*
/* event_wait_list */
,
cl_event
*
/* event */
)
CL_EXT_SUFFIX__VERSION_1_2
;
typedef
CL_API_ENTRY
cl_int
(
CL_API_CALL
*
clEnqueueSVMMemcpyAMD_fn
)(
cl_command_queue
/* command_queue */
,
cl_bool
/* blocking_copy */
,
void
*
/* dst_ptr */
,
const
void
*
/* src_ptr */
,
size_t
/* size */
,
cl_uint
/* num_events_in_wait_list */
,
const
cl_event
*
/* event_wait_list */
,
cl_event
*
/* event */
)
CL_EXT_SUFFIX__VERSION_1_2
;
typedef
CL_API_ENTRY
cl_int
(
CL_API_CALL
*
clEnqueueSVMMemFillAMD_fn
)(
cl_command_queue
/* command_queue */
,
void
*
/* svm_ptr */
,
const
void
*
/* pattern */
,
size_t
/* pattern_size */
,
size_t
/* size */
,
cl_uint
/* num_events_in_wait_list */
,
const
cl_event
*
/* event_wait_list */
,
cl_event
*
/* event */
)
CL_EXT_SUFFIX__VERSION_1_2
;
typedef
CL_API_ENTRY
cl_int
(
CL_API_CALL
*
clEnqueueSVMMapAMD_fn
)(
cl_command_queue
/* command_queue */
,
cl_bool
/* blocking_map */
,
cl_map_flags
/* map_flags */
,
void
*
/* svm_ptr */
,
size_t
/* size */
,
cl_uint
/* num_events_in_wait_list */
,
const
cl_event
*
/* event_wait_list */
,
cl_event
*
/* event */
)
CL_EXT_SUFFIX__VERSION_1_2
;
typedef
CL_API_ENTRY
cl_int
(
CL_API_CALL
*
clEnqueueSVMUnmapAMD_fn
)(
cl_command_queue
/* command_queue */
,
void
*
/* svm_ptr */
,
cl_uint
/* num_events_in_wait_list */
,
const
cl_event
*
/* event_wait_list */
,
cl_event
*
/* event */
)
CL_EXT_SUFFIX__VERSION_1_2
;
typedef
CL_API_ENTRY
cl_int
(
CL_API_CALL
*
clSetKernelArgSVMPointerAMD_fn
)(
cl_kernel
/* kernel */
,
cl_uint
/* arg_index */
,
const
void
*
/* arg_value */
)
CL_EXT_SUFFIX__VERSION_1_2
;
typedef
CL_API_ENTRY
cl_int
(
CL_API_CALL
*
clSetKernelExecInfoAMD_fn
)(
cl_kernel
/* kernel */
,
cl_kernel_exec_info_amd
/* param_name */
,
size_t
/* param_value_size */
,
const
void
*
/* param_value */
)
CL_EXT_SUFFIX__VERSION_1_2
;
#endif
#endif // HAVE_OPENCL_SVM
#endif // __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP__
modules/core/src/matmul.cpp
浏览文件 @
0a07d780
...
...
@@ -721,6 +721,16 @@ static bool ocl_gemm_amdblas( InputArray matA, InputArray matB, double alpha,
return
false
;
UMat
A
=
matA
.
getUMat
(),
B
=
matB
.
getUMat
(),
D
=
matD
.
getUMat
();
if
(
!
ocl
::
internal
::
isCLBuffer
(
A
)
||
!
ocl
::
internal
::
isCLBuffer
(
B
)
||
!
ocl
::
internal
::
isCLBuffer
(
D
))
{
return
false
;
}
if
(
haveC
)
{
UMat
C
=
matC
.
getUMat
();
if
(
!
ocl
::
internal
::
isCLBuffer
(
C
))
return
false
;
}
if
(
haveC
)
ctrans
?
transpose
(
matC
,
D
)
:
matC
.
copyTo
(
D
);
else
...
...
modules/core/src/matrix.cpp
浏览文件 @
0a07d780
...
...
@@ -159,8 +159,9 @@ void MatAllocator::copy(UMatData* usrc, UMatData* udst, int dims, const size_t s
memcpy
(
ptrs
[
1
],
ptrs
[
0
],
planesz
);
}
BufferPoolController
*
MatAllocator
::
getBufferPoolController
()
const
BufferPoolController
*
MatAllocator
::
getBufferPoolController
(
const
char
*
id
)
const
{
(
void
)
id
;
static
DummyBufferPoolController
dummy
;
return
&
dummy
;
}
...
...
modules/core/src/ocl.cpp
浏览文件 @
0a07d780
此差异已折叠。
点击以展开。
modules/core/src/opencl/runtime/opencl_core.cpp
浏览文件 @
0a07d780
...
...
@@ -182,6 +182,65 @@ static void* opencl_check_fn(int ID);
#define CUSTOM_FUNCTION_ID 1000
#ifdef HAVE_OPENCL_SVM
#include "opencv2/core/opencl/runtime/opencl_svm_20.hpp"
#define SVM_FUNCTION_ID_START CUSTOM_FUNCTION_ID
#define SVM_FUNCTION_ID_END CUSTOM_FUNCTION_ID + 100
enum
OPENCL_FN_SVM_ID
{
OPENCL_FN_clSVMAlloc
=
SVM_FUNCTION_ID_START
,
OPENCL_FN_clSVMFree
,
OPENCL_FN_clSetKernelArgSVMPointer
,
OPENCL_FN_clSetKernelExecInfo
,
OPENCL_FN_clEnqueueSVMFree
,
OPENCL_FN_clEnqueueSVMMemcpy
,
OPENCL_FN_clEnqueueSVMMemFill
,
OPENCL_FN_clEnqueueSVMMap
,
OPENCL_FN_clEnqueueSVMUnmap
,
};
void
*
(
CL_API_CALL
*
clSVMAlloc
)(
cl_context
context
,
cl_svm_mem_flags
flags
,
size_t
size
,
unsigned
int
alignment
)
=
opencl_fn4
<
OPENCL_FN_clSVMAlloc
,
void
*
,
cl_context
,
cl_svm_mem_flags
,
size_t
,
unsigned
int
>::
switch_fn
;
static
const
struct
DynamicFnEntry
_clSVMAlloc_definition
=
{
"clSVMAlloc"
,
(
void
**
)
&
clSVMAlloc
};
void
(
CL_API_CALL
*
clSVMFree
)(
cl_context
context
,
void
*
svm_pointer
)
=
opencl_fn2
<
OPENCL_FN_clSVMFree
,
void
,
cl_context
,
void
*>::
switch_fn
;
static
const
struct
DynamicFnEntry
_clSVMFree_definition
=
{
"clSVMFree"
,
(
void
**
)
&
clSVMFree
};
cl_int
(
CL_API_CALL
*
clSetKernelArgSVMPointer
)(
cl_kernel
kernel
,
cl_uint
arg_index
,
const
void
*
arg_value
)
=
opencl_fn3
<
OPENCL_FN_clSetKernelArgSVMPointer
,
cl_int
,
cl_kernel
,
cl_uint
,
const
void
*>::
switch_fn
;
static
const
struct
DynamicFnEntry
_clSetKernelArgSVMPointer_definition
=
{
"clSetKernelArgSVMPointer"
,
(
void
**
)
&
clSetKernelArgSVMPointer
};
//void* (CL_API_CALL *clSetKernelExecInfo)(cl_kernel kernel, cl_kernel_exec_info param_name, size_t param_value_size, const void* param_value) =
// opencl_fn4<OPENCL_FN_clSetKernelExecInfo, void*, cl_kernel, cl_kernel_exec_info, size_t, const void*>::switch_fn;
//static const struct DynamicFnEntry _clSetKernelExecInfo_definition = { "clSetKernelExecInfo", (void**)&clSetKernelExecInfo};
//cl_int (CL_API_CALL *clEnqueueSVMFree)(...) =
// opencl_fn8<OPENCL_FN_clEnqueueSVMFree, cl_int, ...>::switch_fn;
//static const struct DynamicFnEntry _clEnqueueSVMFree_definition = { "clEnqueueSVMFree", (void**)&clEnqueueSVMFree};
cl_int
(
CL_API_CALL
*
clEnqueueSVMMemcpy
)(
cl_command_queue
command_queue
,
cl_bool
blocking_copy
,
void
*
dst_ptr
,
const
void
*
src_ptr
,
size_t
size
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
)
=
opencl_fn8
<
OPENCL_FN_clEnqueueSVMMemcpy
,
cl_int
,
cl_command_queue
,
cl_bool
,
void
*
,
const
void
*
,
size_t
,
cl_uint
,
const
cl_event
*
,
cl_event
*>::
switch_fn
;
static
const
struct
DynamicFnEntry
_clEnqueueSVMMemcpy_definition
=
{
"clEnqueueSVMMemcpy"
,
(
void
**
)
&
clEnqueueSVMMemcpy
};
cl_int
(
CL_API_CALL
*
clEnqueueSVMMemFill
)(
cl_command_queue
command_queue
,
void
*
svm_ptr
,
const
void
*
pattern
,
size_t
pattern_size
,
size_t
size
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
)
=
opencl_fn8
<
OPENCL_FN_clEnqueueSVMMemFill
,
cl_int
,
cl_command_queue
,
void
*
,
const
void
*
,
size_t
,
size_t
,
cl_uint
,
const
cl_event
*
,
cl_event
*>::
switch_fn
;
static
const
struct
DynamicFnEntry
_clEnqueueSVMMemFill_definition
=
{
"clEnqueueSVMMemFill"
,
(
void
**
)
&
clEnqueueSVMMemFill
};
cl_int
(
CL_API_CALL
*
clEnqueueSVMMap
)(
cl_command_queue
command_queue
,
cl_bool
blocking_map
,
cl_map_flags
map_flags
,
void
*
svm_ptr
,
size_t
size
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
)
=
opencl_fn8
<
OPENCL_FN_clEnqueueSVMMap
,
cl_int
,
cl_command_queue
,
cl_bool
,
cl_map_flags
,
void
*
,
size_t
,
cl_uint
,
const
cl_event
*
,
cl_event
*>::
switch_fn
;
static
const
struct
DynamicFnEntry
_clEnqueueSVMMap_definition
=
{
"clEnqueueSVMMap"
,
(
void
**
)
&
clEnqueueSVMMap
};
cl_int
(
CL_API_CALL
*
clEnqueueSVMUnmap
)(
cl_command_queue
command_queue
,
void
*
svm_ptr
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
)
=
opencl_fn5
<
OPENCL_FN_clEnqueueSVMUnmap
,
cl_int
,
cl_command_queue
,
void
*
,
cl_uint
,
const
cl_event
*
,
cl_event
*>::
switch_fn
;
static
const
struct
DynamicFnEntry
_clEnqueueSVMUnmap_definition
=
{
"clEnqueueSVMUnmap"
,
(
void
**
)
&
clEnqueueSVMUnmap
};
static
const
struct
DynamicFnEntry
*
opencl_svm_fn_list
[]
=
{
&
_clSVMAlloc_definition
,
&
_clSVMFree_definition
,
&
_clSetKernelArgSVMPointer_definition
,
NULL
/*&_clSetKernelExecInfo_definition*/
,
NULL
/*&_clEnqueueSVMFree_definition*/
,
&
_clEnqueueSVMMemcpy_definition
,
&
_clEnqueueSVMMemFill_definition
,
&
_clEnqueueSVMMap_definition
,
&
_clEnqueueSVMUnmap_definition
,
};
#endif // HAVE_OPENCL_SVM
//
// END OF CUSTOM FUNCTIONS HERE
//
...
...
@@ -194,6 +253,14 @@ static void* opencl_check_fn(int ID)
assert
(
ID
>=
0
&&
ID
<
(
int
)(
sizeof
(
opencl_fn_list
)
/
sizeof
(
opencl_fn_list
[
0
])));
e
=
opencl_fn_list
[
ID
];
}
#ifdef HAVE_OPENCL_SVM
else
if
(
ID
>=
SVM_FUNCTION_ID_START
&&
ID
<
SVM_FUNCTION_ID_END
)
{
ID
=
ID
-
SVM_FUNCTION_ID_START
;
assert
(
ID
>=
0
&&
ID
<
(
int
)(
sizeof
(
opencl_svm_fn_list
)
/
sizeof
(
opencl_svm_fn_list
[
0
])));
e
=
opencl_svm_fn_list
[
ID
];
}
#endif
else
{
CV_ErrorNoReturn
(
cv
::
Error
::
StsBadArg
,
"Invalid function ID"
);
...
...
modules/core/src/umatrix.cpp
浏览文件 @
0a07d780
...
...
@@ -55,7 +55,7 @@ UMatData::UMatData(const MatAllocator* allocator)
prevAllocator
=
currAllocator
=
allocator
;
urefcount
=
refcount
=
0
;
data
=
origdata
=
0
;
size
=
0
;
capacity
=
0
;
size
=
0
;
flags
=
0
;
handle
=
0
;
userdata
=
0
;
...
...
@@ -67,7 +67,7 @@ UMatData::~UMatData()
prevAllocator
=
currAllocator
=
0
;
urefcount
=
refcount
=
0
;
data
=
origdata
=
0
;
size
=
0
;
capacity
=
0
;
size
=
0
;
flags
=
0
;
handle
=
0
;
userdata
=
0
;
...
...
@@ -221,7 +221,7 @@ UMat Mat::getUMat(int accessFlags, UMatUsageFlags usageFlags) const
temp_u
=
a
->
allocate
(
dims
,
size
.
p
,
type
(),
data
,
step
.
p
,
accessFlags
,
usageFlags
);
temp_u
->
refcount
=
1
;
}
UMat
::
getStdAllocator
()
->
allocate
(
temp_u
,
accessFlags
,
usageFlags
);
UMat
::
getStdAllocator
()
->
allocate
(
temp_u
,
accessFlags
,
usageFlags
);
// TODO result is not checked
hdr
.
flags
=
flags
;
setSize
(
hdr
,
dims
,
size
.
p
,
step
.
p
);
finalizeHdr
(
hdr
);
...
...
@@ -575,7 +575,7 @@ Mat UMat::getMat(int accessFlags) const
{
if
(
!
u
)
return
Mat
();
u
->
currAllocator
->
map
(
u
,
accessFlags
|
ACCESS_READ
);
u
->
currAllocator
->
map
(
u
,
accessFlags
|
ACCESS_READ
);
// TODO Support ACCESS_WRITE without unnecessary data transfers
CV_Assert
(
u
->
data
!=
0
);
Mat
hdr
(
dims
,
size
.
p
,
type
(),
u
->
data
+
offset
,
step
.
p
);
hdr
.
flags
=
flags
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录