提交 485d36d3 编写于 作者: V Vadim Pisarevsky

changed InputArray's enumeration constant for UMat, since it may potentially...

changed InputArray's enumeration constant for UMat, since it may potentially conflict with existing OpenCL module. refactored Kernel's setArg API (now UMat is passed to a kernel as a structure, as Alexander A suggested). removed Kernel's cleanup callback from the external API; now each kernel keeps track of used matrices and they are dereferenced after it's complete.
上级 6416c330
......@@ -83,9 +83,9 @@ public:
CUDA_MEM = 8 << KIND_SHIFT,
GPU_MAT = 9 << KIND_SHIFT,
OCL_MAT =10 << KIND_SHIFT,
UMAT =OCL_MAT,
STD_VECTOR_UMAT =11 << KIND_SHIFT,
UEXPR =12 << KIND_SHIFT
UMAT =11 << KIND_SHIFT,
STD_VECTOR_UMAT =12 << KIND_SHIFT,
UEXPR =13 << KIND_SHIFT
};
_InputArray();
......
......@@ -264,13 +264,6 @@ public:
class CV_EXPORTS Kernel
{
public:
class CV_EXPORTS Callback
{
public:
virtual ~Callback() {}
virtual void operator()() = 0;
};
Kernel();
Kernel(const char* kname, const Program& prog);
Kernel(const char* kname, const ProgramSource& prog,
......@@ -283,118 +276,115 @@ public:
bool create(const char* kname, const ProgramSource& prog,
const String& buildopts, String& errmsg);
int set(int i, const void* value, size_t sz);
int set(int i, const UMat& m);
int set(int i, const KernelArg& arg);
template<typename _Tp> int set(int i, const _Tp& value)
void set(int i, const void* value, size_t sz);
void set(int i, const UMat& m);
void set(int i, const KernelArg& arg);
template<typename _Tp> void set(int i, const _Tp& value)
{ return set(i, &value, sizeof(value)); }
template<typename _Tp1>
Kernel& args(_Tp1 a1)
template<typename _Tp0>
Kernel& args(const _Tp0& a0)
{
set(0, a1); return *this;
set(0, a0); return *this;
}
template<typename _Tp1, typename _Tp2>
Kernel& args(_Tp1 a1, _Tp2 a2)
template<typename _Tp0, typename _Tp1>
Kernel& args(const _Tp0& a0, const _Tp1& a1)
{
int i = set(0, a1); set(i, a2); return *this;
set(0, a0); set(1, a1); return *this;
}
template<typename _Tp1, typename _Tp2, typename _Tp3>
Kernel& args(_Tp1 a1, _Tp2 a2, _Tp3 a3)
template<typename _Tp0, typename _Tp1, typename _Tp2>
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2)
{
int i = set(0, a1); i = set(i, a2); set(i, a3); return *this;
set(0, a0); set(1, a1); set(2, a2); return *this;
}
template<typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4>
Kernel& args(_Tp1 a1, _Tp2 a2, _Tp3 a3, _Tp4 a4)
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3>
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3)
{
int i = set(0, a1); i = set(i, a2); i = set(i, a3); set(i, a4);
return *this;
set(0, a0); set(1, a1); set(2, a2); set(3, a3); return *this;
}
template<typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4, typename _Tp5>
Kernel& args(_Tp1 a1, _Tp2 a2, _Tp3 a3, _Tp4 a4, _Tp5 a5)
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4>
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2,
const _Tp3& a3, const _Tp4& a4)
{
int i = set(0, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); set(i, a5);
return *this;
set(0, a0); set(1, a1); set(2, a2); set(3, a3); set(4, a4); return *this;
}
template<typename _Tp1, typename _Tp2, typename _Tp3,
typename _Tp4, typename _Tp5, typename _Tp6>
Kernel& args(_Tp1 a1, _Tp2 a2, _Tp3 a3, _Tp4 a4, _Tp5 a5, _Tp6 a6)
template<typename _Tp0, typename _Tp1, typename _Tp2,
typename _Tp3, typename _Tp4, typename _Tp5>
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2,
const _Tp3& a3, const _Tp4& a4, const _Tp5& a5)
{
int i = set(0, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4);
i = set(i, a5); set(i, a6); return *this;
set(0, a0); set(1, a1); set(2, a2);
set(3, a3); set(4, a4); set(5, a5); return *this;
}
template<typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4,
typename _Tp5, typename _Tp6, typename _Tp7>
Kernel& args(_Tp1 a1, _Tp2 a2, _Tp3 a3, _Tp4 a4, _Tp5 a5, _Tp6 a6, _Tp7 a7)
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
typename _Tp4, typename _Tp5, typename _Tp6>
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6)
{
int i = set(0, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4);
i = set(i, a5); i = set(i, a6); set(i, a7); return *this;
set(0, a0); set(1, a1); set(2, a2); set(3, a3);
set(4, a4); set(5, a5); set(6, a6); return *this;
}
template<typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4,
typename _Tp5, typename _Tp6, typename _Tp7, typename _Tp8>
Kernel& args(_Tp1 a1, _Tp2 a2, _Tp3 a3, _Tp4 a4, _Tp5 a5, _Tp6 a6, _Tp7 a7, _Tp8 a8)
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7>
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7)
{
int i = set(0, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4);
i = set(i, a5); i = set(i, a6); i = set(i, a7); set(i, a8);
return *this;
set(0, a0); set(1, a1); set(2, a2); set(3, a3);
set(4, a4); set(5, a5); set(6, a6); set(7, a7); return *this;
}
template<typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4, typename _Tp5,
typename _Tp6, typename _Tp7, typename _Tp8, typename _Tp9>
Kernel& args(_Tp1 a1, _Tp2 a2, _Tp3 a3, _Tp4 a4, _Tp5 a5, _Tp6 a6, _Tp7 a7, _Tp8 a8, _Tp9 a9)
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4,
typename _Tp5, typename _Tp6, typename _Tp7, typename _Tp8>
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
const _Tp8& a8)
{
int i = set(0, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4);
i = set(i, a5); i = set(i, a6); i = set(i, a7); i = set(i, a8);
set(i, a9); return *this;
set(0, a0); set(1, a1); set(2, a2); set(3, a3); set(4, a4);
set(5, a5); set(6, a6); set(7, a7); set(8, a8); return *this;
}
template<typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4, typename _Tp5,
typename _Tp6, typename _Tp7, typename _Tp8, typename _Tp9, typename _Tp10>
Kernel& args(_Tp1 a1, _Tp2 a2, _Tp3 a3, _Tp4 a4, _Tp5 a5, _Tp6 a6, _Tp7 a7,
_Tp8 a8, _Tp9 a9, _Tp10 a10)
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4,
typename _Tp5, typename _Tp6, typename _Tp7, typename _Tp8, typename _Tp9>
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
const _Tp8& a8, const _Tp9& a9)
{
int i = set(0, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4);
i = set(i, a5); i = set(i, a6); i = set(i, a7); i = set(i, a8);
i = set(i, a9); set(i, a10); return *this;
set(0, a0); set(1, a1); set(2, a2); set(3, a3); set(4, a4); set(5, a5);
set(6, a6); set(7, a7); set(8, a8); set(9, a9); return *this;
}
template<typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4, typename _Tp5,
typename _Tp6, typename _Tp7, typename _Tp8, typename _Tp9,
typename _Tp10, typename _Tp11>
Kernel& args(_Tp1 a1, _Tp2 a2, _Tp3 a3, _Tp4 a4, _Tp5 a5, _Tp6 a6, _Tp7 a7,
_Tp8 a8, _Tp9 a9, _Tp10 a10, _Tp11 a11)
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
typename _Tp8, typename _Tp9, typename _Tp10>
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
const _Tp8& a8, const _Tp9& a9, const _Tp10& a10)
{
int i = set(0, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4);
i = set(i, a5); i = set(i, a6); i = set(i, a7); i = set(i, a8);
i = set(i, a9); i = set(i, a10); set(i, a11); return *this;
set(0, a0); set(1, a1); set(2, a2); set(3, a3); set(4, a4); set(5, a5);
set(6, a6); set(7, a7); set(8, a8); set(9, a9); set(10, a10); return *this;
}
template<typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4, typename _Tp5,
typename _Tp6, typename _Tp7, typename _Tp8, typename _Tp9,
typename _Tp10, typename _Tp11, typename _Tp12>
Kernel& args(_Tp1 a1, _Tp2 a2, _Tp3 a3, _Tp4 a4, _Tp5 a5, _Tp6 a6, _Tp7 a7,
_Tp8 a8, _Tp9 a9, _Tp10 a10, _Tp11 a11, _Tp12 a12)
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11>
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11)
{
int i = set(0, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4);
i = set(i, a5); i = set(i, a6); i = set(i, a7); i = set(i, a8);
i = set(i, a9); i = set(i, a10); i = set(i, a11); set(i, a12);
return *this;
set(0, a0); set(1, a1); set(2, a2); set(3, a3); set(4, a4); set(5, a5);
set(6, a6); set(7, a7); set(8, a8); set(9, a9); set(10, a10); set(11, a11); return *this;
}
void run(int dims, size_t offset[],
size_t globalsize[], size_t localsize[], bool sync,
const Ptr<Callback>& cleanupCallback=Ptr<Callback>(),
const Queue& q=Queue());
void runTask(bool sync,
const Ptr<Callback>& cleanupCallback=Ptr<Callback>(),
const Queue& q=Queue());
void run(int dims, size_t offset[], size_t globalsize[],
size_t localsize[], bool sync, const Queue& q=Queue());
void runTask(bool sync, const Queue& q=Queue());
size_t workGroupSize() const;
bool compileWorkGroupSize(size_t wsz[]) const;
......
......@@ -1210,6 +1210,46 @@ OCL_FUNC(cl_int, clReleaseEvent, (cl_event event), (event))
namespace cv { namespace ocl {
struct UMat2D
{
UMat2D(const UMat& m, int accessFlags)
{
CV_Assert(m.dims == 2);
data = (cl_mem)m.handle(accessFlags);
offset = m.offset;
step = m.step;
rows = m.rows;
cols = m.cols;
}
cl_mem data;
size_t offset;
size_t step;
int rows;
int cols;
};
struct UMat3D
{
UMat3D(const UMat& m, int accessFlags)
{
CV_Assert(m.dims == 3);
data = (cl_mem)m.handle(accessFlags);
offset = m.offset;
step = m.step.p[1];
slicestep = m.step.p[0];
slices = m.size.p[0];
rows = m.size.p[1];
cols = m.size.p[2];
}
cl_mem data;
size_t offset;
size_t slicestep;
size_t step;
int slices;
int rows;
int cols;
};
// Computes 64-bit "cyclic redundancy check" sum, as specified in ECMA-182
static uint64 crc64( const uchar* data, size_t size, uint64 crc0=0 )
{
......@@ -1266,6 +1306,15 @@ bool useOpenCL()
return data->useOpenCL > 0;
}
void setUseOpenCL(bool flag)
{
if( haveOpenCL() )
{
TLSData* data = TLSData::get();
data->useOpenCL = flag ? 1 : 0;
}
}
void finish()
{
Queue::getDefault().finish();
......@@ -1980,10 +2029,33 @@ struct Kernel::Impl
cl_int retval = 0;
handle = ph != 0 ?
clCreateKernel(ph, kname, &retval) : 0;
for( int i = 0; i < MAX_ARRS; i++ )
u[i] = 0;
}
void cleanupUMats()
{
for( int i = 0; i < MAX_ARRS; i++ )
if( u[i] )
{
if( CV_XADD(&u[i]->urefcount, -1) == 1 )
u[i]->currAllocator->deallocate(u[i]);
u[i] = 0;
}
nu = 0;
}
void addUMat(const UMat& m)
{
CV_Assert(nu < MAX_ARRS && m.u && m.u->urefcount > 0);
u[nu] = m.u;
CV_XADD(&m.u->urefcount, 1);
nu++;
}
void finit()
{
if(!f.empty()) f->operator()();
cleanupUMats();
if(e) { clReleaseEvent(e); e = 0; }
release();
}
......@@ -1998,7 +2070,9 @@ struct Kernel::Impl
cl_kernel handle;
cl_event e;
Ptr<Kernel::Callback> f;
enum { MAX_ARRS = 16 };
UMatData* u[MAX_ARRS];
int nu;
};
}}
......@@ -2086,51 +2160,48 @@ void* Kernel::ptr() const
return p ? p->handle : 0;
}
int Kernel::set(int i, const void* value, size_t sz)
void Kernel::set(int i, const void* value, size_t sz)
{
CV_Assert( p && clSetKernelArg(p->handle, (cl_uint)i, sz, value) >= 0 );
return i+1;
if( i == 0 )
p->cleanupUMats();
}
int Kernel::set(int i, const UMat& m)
void Kernel::set(int i, const UMat& m)
{
return set(i, KernelArg(KernelArg::READ_WRITE, (UMat*)&m, 0, 0));
set(i, KernelArg(KernelArg::READ_WRITE, (UMat*)&m, 0, 0));
}
int Kernel::set(int i, const KernelArg& arg)
void Kernel::set(int i, const KernelArg& arg)
{
CV_Assert( p && p->handle );
if( i == 0 )
p->cleanupUMats();
if( arg.m )
{
int dims = arg.m->dims;
void* h = arg.m->handle(((arg.flags & KernelArg::READ_ONLY) ? ACCESS_READ : 0) +
((arg.flags & KernelArg::WRITE_ONLY) ? ACCESS_WRITE : 0));
clSetKernelArg(p->handle, (cl_uint)i, sizeof(cl_mem), &h);
clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(size_t), &arg.m->offset);
if( dims <= 2 )
int accessFlags = ((arg.flags & KernelArg::READ_ONLY) ? ACCESS_READ : 0) +
((arg.flags & KernelArg::WRITE_ONLY) ? ACCESS_WRITE : 0);
if( arg.m->dims <= 2 )
{
clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(size_t), &arg.m->step.p[0]);
clSetKernelArg(p->handle, (cl_uint)(i+3), sizeof(arg.m->rows), &arg.m->rows);
clSetKernelArg(p->handle, (cl_uint)(i+4), sizeof(arg.m->cols), &arg.m->cols);
return i + 5;
UMat2D u2d(*arg.m, accessFlags);
clSetKernelArg(p->handle, (cl_uint)i, sizeof(u2d), &u2d);
}
else
{
clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(size_t)*(dims-1), &arg.m->step.p[0]);
clSetKernelArg(p->handle, (cl_uint)(i+3), sizeof(cl_int)*dims, &arg.m->size.p[0]);
return i + 4;
UMat3D u3d(*arg.m, accessFlags);
clSetKernelArg(p->handle, (cl_uint)i, sizeof(u3d), &u3d);
}
p->addUMat(*arg.m);
}
else
{
clSetKernelArg(p->handle, (cl_uint)i, arg.sz, arg.obj);
return i+1;
}
}
void Kernel::run(int dims, size_t offset[], size_t globalsize[], size_t localsize[],
bool sync, const Ptr<Callback>& cleanupCallback, const Queue& q)
bool sync, const Queue& q)
{
CV_Assert(p && p->handle && p->e == 0);
cl_command_queue qq = getQueue(q);
......@@ -2140,18 +2211,16 @@ void Kernel::run(int dims, size_t offset[], size_t globalsize[], size_t localsiz
if( sync )
{
clFinish(qq);
if( !cleanupCallback.empty() )
cleanupCallback->operator()();
p->cleanupUMats();
}
else
{
p->f = cleanupCallback;
p->addref();
clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p);
}
}
void Kernel::runTask(bool sync, const Ptr<Callback>& cleanupCallback, const Queue& q)
void Kernel::runTask(bool sync, const Queue& q)
{
CV_Assert(p && p->handle && p->e == 0);
cl_command_queue qq = getQueue(q);
......@@ -2159,12 +2228,10 @@ void Kernel::runTask(bool sync, const Ptr<Callback>& cleanupCallback, const Queu
if( sync )
{
clFinish(qq);
if( !cleanupCallback.empty() )
cleanupCallback->operator()();
p->cleanupUMats();
}
else
{
p->f = cleanupCallback;
p->addref();
clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p);
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册