提交 81c6b46f 编写于 作者: R Roman Donchenko 提交者: OpenCV Buildbot

Merge pull request #974 from jet47:gpu-core-refactoring

......@@ -201,9 +201,9 @@ def process_module(module, path):
hdrlist.append(os.path.join(root, filename))
if module == "gpu":
hdrlist.append(os.path.join(path, "..", "core", "include", "opencv2", "core", "cuda_devptrs.hpp"))
hdrlist.append(os.path.join(path, "..", "core", "include", "opencv2", "core", "gpumat.hpp"))
hdrlist.append(os.path.join(path, "..", "core", "include", "opencv2", "core", "stream_accessor.hpp"))
hdrlist.append(os.path.join(path, "..", "core", "include", "opencv2", "core", "gpu_types.hpp"))
hdrlist.append(os.path.join(path, "..", "core", "include", "opencv2", "core", "gpu.hpp"))
hdrlist.append(os.path.join(path, "..", "core", "include", "opencv2", "core", "gpu_stream_accessor.hpp"))
decls = []
for hname in hdrlist:
......
......@@ -493,6 +493,9 @@ namespace ogl
namespace gpu
{
class CV_EXPORTS GpuMat;
class CV_EXPORTS CudaMem;
class CV_EXPORTS Stream;
class CV_EXPORTS Event;
}
} // cv
......
......@@ -44,7 +44,7 @@
#define __OPENCV_GPU_COMMON_HPP__
#include <cuda_runtime.h>
#include "opencv2/core/cuda_devptrs.hpp"
#include "opencv2/core/gpu_types.hpp"
#include "opencv2/core/cvdef.h"
#include "opencv2/core/base.hpp"
......
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_CORE_GPUINL_HPP__
#define __OPENCV_CORE_GPUINL_HPP__
#include "opencv2/core/gpu.hpp"
namespace cv { namespace gpu {
//////////////////////////////// GpuMat ///////////////////////////////
inline
GpuMat::GpuMat()
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
{}
inline
GpuMat::GpuMat(int rows_, int cols_, int type_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
{
if (rows_ > 0 && cols_ > 0)
create(rows_, cols_, type_);
}
inline
GpuMat::GpuMat(Size size_, int type_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
{
if (size_.height > 0 && size_.width > 0)
create(size_.height, size_.width, type_);
}
inline
GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
{
if (rows_ > 0 && cols_ > 0)
{
create(rows_, cols_, type_);
setTo(s_);
}
}
inline
GpuMat::GpuMat(Size size_, int type_, Scalar s_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
{
if (size_.height > 0 && size_.width > 0)
{
create(size_.height, size_.width, type_);
setTo(s_);
}
}
inline
GpuMat::GpuMat(const GpuMat& m)
: flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend)
{
if (refcount)
CV_XADD(refcount, 1);
}
inline
GpuMat::GpuMat(InputArray arr) :
flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
{
upload(arr);
}
inline
GpuMat::~GpuMat()
{
release();
}
inline
GpuMat& GpuMat::operator =(const GpuMat& m)
{
if (this != &m)
{
GpuMat temp(m);
swap(temp);
}
return *this;
}
inline
void GpuMat::create(Size size_, int type_)
{
create(size_.height, size_.width, type_);
}
inline
void GpuMat::swap(GpuMat& b)
{
std::swap(flags, b.flags);
std::swap(rows, b.rows);
std::swap(cols, b.cols);
std::swap(step, b.step);
std::swap(data, b.data);
std::swap(datastart, b.datastart);
std::swap(dataend, b.dataend);
std::swap(refcount, b.refcount);
}
inline
GpuMat GpuMat::clone() const
{
GpuMat m;
copyTo(m);
return m;
}
inline
void GpuMat::copyTo(OutputArray dst, InputArray mask) const
{
copyTo(dst, mask, Stream::Null());
}
inline
GpuMat& GpuMat::setTo(Scalar s)
{
return setTo(s, Stream::Null());
}
inline
GpuMat& GpuMat::setTo(Scalar s, InputArray mask)
{
return setTo(s, mask, Stream::Null());
}
inline
void GpuMat::convertTo(OutputArray dst, int rtype) const
{
convertTo(dst, rtype, Stream::Null());
}
inline
void GpuMat::convertTo(OutputArray dst, int rtype, double alpha, double beta) const
{
convertTo(dst, rtype, alpha, beta, Stream::Null());
}
inline
void GpuMat::convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const
{
convertTo(dst, rtype, alpha, 0.0, stream);
}
inline
void GpuMat::assignTo(GpuMat& m, int _type) const
{
if (_type < 0)
m = *this;
else
convertTo(m, _type);
}
inline
uchar* GpuMat::ptr(int y)
{
CV_DbgAssert( (unsigned)y < (unsigned)rows );
return data + step * y;
}
inline
const uchar* GpuMat::ptr(int y) const
{
CV_DbgAssert( (unsigned)y < (unsigned)rows );
return data + step * y;
}
template<typename _Tp> inline
_Tp* GpuMat::ptr(int y)
{
return (_Tp*)ptr(y);
}
template<typename _Tp> inline
const _Tp* GpuMat::ptr(int y) const
{
return (const _Tp*)ptr(y);
}
template <class T> inline
GpuMat::operator PtrStepSz<T>() const
{
return PtrStepSz<T>(rows, cols, (T*)data, step);
}
template <class T> inline
GpuMat::operator PtrStep<T>() const
{
return PtrStep<T>((T*)data, step);
}
inline
GpuMat GpuMat::row(int y) const
{
return GpuMat(*this, Range(y, y+1), Range::all());
}
inline
GpuMat GpuMat::col(int x) const
{
return GpuMat(*this, Range::all(), Range(x, x+1));
}
inline
GpuMat GpuMat::rowRange(int startrow, int endrow) const
{
return GpuMat(*this, Range(startrow, endrow), Range::all());
}
inline
GpuMat GpuMat::rowRange(Range r) const
{
return GpuMat(*this, r, Range::all());
}
inline
GpuMat GpuMat::colRange(int startcol, int endcol) const
{
return GpuMat(*this, Range::all(), Range(startcol, endcol));
}
inline
GpuMat GpuMat::colRange(Range r) const
{
return GpuMat(*this, Range::all(), r);
}
inline
GpuMat GpuMat::operator ()(Range rowRange_, Range colRange_) const
{
return GpuMat(*this, rowRange_, colRange_);
}
inline
GpuMat GpuMat::operator ()(Rect roi) const
{
return GpuMat(*this, roi);
}
inline
bool GpuMat::isContinuous() const
{
return (flags & Mat::CONTINUOUS_FLAG) != 0;
}
inline
size_t GpuMat::elemSize() const
{
return CV_ELEM_SIZE(flags);
}
inline
size_t GpuMat::elemSize1() const
{
return CV_ELEM_SIZE1(flags);
}
inline
int GpuMat::type() const
{
return CV_MAT_TYPE(flags);
}
inline
int GpuMat::depth() const
{
return CV_MAT_DEPTH(flags);
}
inline
int GpuMat::channels() const
{
return CV_MAT_CN(flags);
}
inline
size_t GpuMat::step1() const
{
return step / elemSize1();
}
inline
Size GpuMat::size() const
{
return Size(cols, rows);
}
inline
bool GpuMat::empty() const
{
return data == 0;
}
static inline
GpuMat createContinuous(int rows, int cols, int type)
{
GpuMat m;
createContinuous(rows, cols, type, m);
return m;
}
static inline
void createContinuous(Size size, int type, OutputArray arr)
{
createContinuous(size.height, size.width, type, arr);
}
static inline
GpuMat createContinuous(Size size, int type)
{
GpuMat m;
createContinuous(size, type, m);
return m;
}
static inline
void ensureSizeIsEnough(Size size, int type, OutputArray arr)
{
ensureSizeIsEnough(size.height, size.width, type, arr);
}
static inline
void swap(GpuMat& a, GpuMat& b)
{
a.swap(b);
}
//////////////////////////////// CudaMem ////////////////////////////////
inline
CudaMem::CudaMem(AllocType alloc_type_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
{
}
inline
CudaMem::CudaMem(const CudaMem& m)
: flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), alloc_type(m.alloc_type)
{
if( refcount )
CV_XADD(refcount, 1);
}
inline
CudaMem::CudaMem(int rows_, int cols_, int type_, AllocType alloc_type_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
{
if (rows_ > 0 && cols_ > 0)
create(rows_, cols_, type_);
}
inline
CudaMem::CudaMem(Size size_, int type_, AllocType alloc_type_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
{
if (size_.height > 0 && size_.width > 0)
create(size_.height, size_.width, type_);
}
inline
CudaMem::CudaMem(InputArray arr, AllocType alloc_type_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
{
arr.getMat().copyTo(*this);
}
inline
CudaMem::~CudaMem()
{
release();
}
inline
CudaMem& CudaMem::operator =(const CudaMem& m)
{
if (this != &m)
{
CudaMem temp(m);
swap(temp);
}
return *this;
}
inline
void CudaMem::swap(CudaMem& b)
{
std::swap(flags, b.flags);
std::swap(rows, b.rows);
std::swap(cols, b.cols);
std::swap(step, b.step);
std::swap(data, b.data);
std::swap(datastart, b.datastart);
std::swap(dataend, b.dataend);
std::swap(refcount, b.refcount);
std::swap(alloc_type, b.alloc_type);
}
inline
CudaMem CudaMem::clone() const
{
CudaMem m(size(), type(), alloc_type);
createMatHeader().copyTo(m);
return m;
}
inline
void CudaMem::create(Size size_, int type_)
{
create(size_.height, size_.width, type_);
}
inline
Mat CudaMem::createMatHeader() const
{
return Mat(size(), type(), data, step);
}
inline
bool CudaMem::isContinuous() const
{
return (flags & Mat::CONTINUOUS_FLAG) != 0;
}
inline
size_t CudaMem::elemSize() const
{
return CV_ELEM_SIZE(flags);
}
inline
size_t CudaMem::elemSize1() const
{
return CV_ELEM_SIZE1(flags);
}
inline
int CudaMem::type() const
{
return CV_MAT_TYPE(flags);
}
inline
int CudaMem::depth() const
{
return CV_MAT_DEPTH(flags);
}
inline
int CudaMem::channels() const
{
return CV_MAT_CN(flags);
}
inline
size_t CudaMem::step1() const
{
return step / elemSize1();
}
inline
Size CudaMem::size() const
{
return Size(cols, rows);
}
inline
bool CudaMem::empty() const
{
return data == 0;
}
static inline
void swap(CudaMem& a, CudaMem& b)
{
a.swap(b);
}
//////////////////////////////// Stream ///////////////////////////////
inline
void Stream::enqueueDownload(const GpuMat& src, OutputArray dst)
{
src.download(dst, *this);
}
inline
void Stream::enqueueUpload(InputArray src, GpuMat& dst)
{
dst.upload(src, *this);
}
inline
void Stream::enqueueCopy(const GpuMat& src, OutputArray dst)
{
src.copyTo(dst, *this);
}
inline
void Stream::enqueueMemSet(GpuMat& src, Scalar val)
{
src.setTo(val, *this);
}
inline
void Stream::enqueueMemSet(GpuMat& src, Scalar val, InputArray mask)
{
src.setTo(val, mask, *this);
}
inline
void Stream::enqueueConvert(const GpuMat& src, OutputArray dst, int dtype, double alpha, double beta)
{
src.convertTo(dst, dtype, alpha, beta, *this);
}
inline
Stream::Stream(const Ptr<Impl>& impl)
: impl_(impl)
{
}
//////////////////////////////// Initialization & Info ////////////////////////
inline
bool TargetArchs::has(int major, int minor)
{
return hasPtx(major, minor) || hasBin(major, minor);
}
inline
bool TargetArchs::hasEqualOrGreater(int major, int minor)
{
return hasEqualOrGreaterPtx(major, minor) || hasEqualOrGreaterBin(major, minor);
}
inline
DeviceInfo::DeviceInfo()
{
device_id_ = getDevice();
}
inline
DeviceInfo::DeviceInfo(int device_id)
{
CV_Assert( device_id >= 0 && device_id < getCudaEnabledDeviceCount() );
device_id_ = device_id;
}
inline
int DeviceInfo::deviceID() const
{
return device_id_;
}
inline
size_t DeviceInfo::freeMemory() const
{
size_t _totalMemory, _freeMemory;
queryMemory(_totalMemory, _freeMemory);
return _freeMemory;
}
inline
size_t DeviceInfo::totalMemory() const
{
size_t _totalMemory, _freeMemory;
queryMemory(_totalMemory, _freeMemory);
return _totalMemory;
}
inline
bool DeviceInfo::supports(FeatureSet feature_set) const
{
int version = major() * 10 + minor();
return version >= feature_set;
}
}} // namespace cv { namespace gpu {
//////////////////////////////// Mat ////////////////////////////////
namespace cv {
inline
Mat::Mat(const gpu::GpuMat& m)
: flags(0), dims(0), rows(0), cols(0), data(0), refcount(0), datastart(0), dataend(0), datalimit(0), allocator(0), size(&rows)
{
m.download(*this);
}
}
#endif // __OPENCV_CORE_GPUINL_HPP__
......@@ -40,28 +40,38 @@
//
//M*/
#ifndef __OPENCV_CUDA_STREAM_ACCESSOR_HPP__
#define __OPENCV_CUDA_STREAM_ACCESSOR_HPP__
#ifndef __OPENCV_CORE_GPU_STREAM_ACCESSOR_HPP__
#define __OPENCV_CORE_GPU_STREAM_ACCESSOR_HPP__
#include <cuda_runtime.h>
#include "opencv2/core/cvdef.h"
#ifndef __cplusplus
# error gpu_stream_accessor.hpp header must be compiled as C++
#endif
// This is only header file that depends on Cuda. All other headers are independent.
// So if you use OpenCV binaries you do noot need to install Cuda Toolkit.
// But of you wanna use GPU by yourself, may get cuda stream instance using the class below.
// In this case you have to install Cuda Toolkit.
#include <cuda_runtime.h>
#include "opencv2/core/cvdef.h"
namespace cv
{
namespace gpu
{
class Stream;
class Event;
struct StreamAccessor
{
CV_EXPORTS static cudaStream_t getStream(const Stream& stream);
};
struct EventAccessor
{
CV_EXPORTS static cudaEvent_t getEvent(const Event& event);
};
}
}
#endif /* __OPENCV_CUDA_STREAM_ACCESSOR_HPP__ */
#endif /* __OPENCV_CORE_GPU_STREAM_ACCESSOR_HPP__ */
......@@ -40,10 +40,12 @@
//
//M*/
#ifndef __OPENCV_CORE_DEVPTRS_HPP__
#define __OPENCV_CORE_DEVPTRS_HPP__
#ifndef __OPENCV_CORE_GPU_TYPES_HPP__
#define __OPENCV_CORE_GPU_TYPES_HPP__
#ifdef __cplusplus
#ifndef __cplusplus
# error gpu_types.hpp header must be compiled as C++
#endif
#ifdef __CUDACC__
#define __CV_GPU_HOST_DEVICE__ __host__ __device__ __forceinline__
......@@ -58,7 +60,7 @@ namespace cv
// Simple lightweight structures that encapsulates information about an image on device.
// It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
template<typename T> struct DevPtr
template <typename T> struct DevPtr
{
typedef T elem_type;
typedef int index_type;
......@@ -75,7 +77,7 @@ namespace cv
__CV_GPU_HOST_DEVICE__ operator const T*() const { return data; }
};
template<typename T> struct PtrSz : public DevPtr<T>
template <typename T> struct PtrSz : public DevPtr<T>
{
__CV_GPU_HOST_DEVICE__ PtrSz() : size(0) {}
__CV_GPU_HOST_DEVICE__ PtrSz(T* data_, size_t size_) : DevPtr<T>(data_), size(size_) {}
......@@ -83,12 +85,12 @@ namespace cv
size_t size;
};
template<typename T> struct PtrStep : public DevPtr<T>
template <typename T> struct PtrStep : public DevPtr<T>
{
__CV_GPU_HOST_DEVICE__ PtrStep() : step(0) {}
__CV_GPU_HOST_DEVICE__ PtrStep(T* data_, size_t step_) : DevPtr<T>(data_), step(step_) {}
/** \brief stride between two consecutive rows in bytes. Step is stored always and everywhere in bytes!!! */
//! stride between two consecutive rows in bytes. Step is stored always and everywhere in bytes!!!
size_t step;
__CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return ( T*)( ( char*)DevPtr<T>::data + y * step); }
......@@ -118,36 +120,7 @@ namespace cv
typedef PtrStep<unsigned char> PtrStepb;
typedef PtrStep<float> PtrStepf;
typedef PtrStep<int> PtrStepi;
#if defined __GNUC__
#define __CV_GPU_DEPR_BEFORE__
#define __CV_GPU_DEPR_AFTER__ __attribute__ ((deprecated))
#elif defined(__MSVC__) //|| defined(__CUDACC__)
#pragma deprecated(DevMem2D_)
#define __CV_GPU_DEPR_BEFORE__ __declspec(deprecated)
#define __CV_GPU_DEPR_AFTER__
#else
#define __CV_GPU_DEPR_BEFORE__
#define __CV_GPU_DEPR_AFTER__
#endif
template <typename T> struct __CV_GPU_DEPR_BEFORE__ DevMem2D_ : public PtrStepSz<T>
{
DevMem2D_() {}
DevMem2D_(int rows_, int cols_, T* data_, size_t step_) : PtrStepSz<T>(rows_, cols_, data_, step_) {}
template <typename U>
explicit __CV_GPU_DEPR_BEFORE__ DevMem2D_(const DevMem2D_<U>& d) : PtrStepSz<T>(d.rows, d.cols, (T*)d.data, d.step) {}
} __CV_GPU_DEPR_AFTER__ ;
typedef DevMem2D_<unsigned char> DevMem2Db;
typedef DevMem2Db DevMem2D;
typedef DevMem2D_<float> DevMem2Df;
typedef DevMem2D_<int> DevMem2Di;
}
}
#endif // __cplusplus
#endif /* __OPENCV_CORE_DEVPTRS_HPP__ */
#endif /* __OPENCV_CORE_GPU_TYPES_HPP__ */
......@@ -77,7 +77,7 @@ public:
STD_VECTOR_MAT = 5 << KIND_SHIFT,
EXPR = 6 << KIND_SHIFT,
OPENGL_BUFFER = 7 << KIND_SHIFT,
OPENGL_TEXTURE = 8 << KIND_SHIFT,
CUDA_MEM = 8 << KIND_SHIFT,
GPU_MAT = 9 << KIND_SHIFT
};
......@@ -94,13 +94,12 @@ public:
_InputArray(const double& val);
_InputArray(const gpu::GpuMat& d_mat);
_InputArray(const ogl::Buffer& buf);
_InputArray(const ogl::Texture2D& tex);
_InputArray(const gpu::CudaMem& cuda_mem);
virtual Mat getMat(int i=-1) const;
virtual void getMatVector(std::vector<Mat>& mv) const;
virtual gpu::GpuMat getGpuMat() const;
virtual ogl::Buffer getOGlBuffer() const;
virtual ogl::Texture2D getOGlTexture2D() const;
virtual int kind() const;
virtual Size size(int i=-1) const;
......@@ -143,7 +142,7 @@ public:
_OutputArray(std::vector<Mat>& vec);
_OutputArray(gpu::GpuMat& d_mat);
_OutputArray(ogl::Buffer& buf);
_OutputArray(ogl::Texture2D& tex);
_OutputArray(gpu::CudaMem& cuda_mem);
template<typename _Tp> _OutputArray(std::vector<_Tp>& vec);
template<typename _Tp> _OutputArray(std::vector<std::vector<_Tp> >& vec);
template<typename _Tp> _OutputArray(std::vector<Mat_<_Tp> >& vec);
......@@ -155,7 +154,7 @@ public:
_OutputArray(const std::vector<Mat>& vec);
_OutputArray(const gpu::GpuMat& d_mat);
_OutputArray(const ogl::Buffer& buf);
_OutputArray(const ogl::Texture2D& tex);
_OutputArray(const gpu::CudaMem& cuda_mem);
template<typename _Tp> _OutputArray(const std::vector<_Tp>& vec);
template<typename _Tp> _OutputArray(const std::vector<std::vector<_Tp> >& vec);
template<typename _Tp> _OutputArray(const std::vector<Mat_<_Tp> >& vec);
......@@ -169,7 +168,7 @@ public:
virtual Mat& getMatRef(int i=-1) const;
virtual gpu::GpuMat& getGpuMatRef() const;
virtual ogl::Buffer& getOGlBufferRef() const;
virtual ogl::Texture2D& getOGlTexture2DRef() const;
virtual gpu::CudaMem& getCudaMemRef() const;
virtual void create(Size sz, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
virtual void create(int rows, int cols, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
virtual void create(int dims, const int* size, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
......
......@@ -40,8 +40,12 @@
//
//M*/
#ifndef __OPENCV_OPENGL_INTEROP_HPP__
#define __OPENCV_OPENGL_INTEROP_HPP__
#ifndef __OPENCV_CORE_OPENGL_HPP__
#define __OPENCV_CORE_OPENGL_HPP__
#ifndef __cplusplus
# error opengl.hpp header must be compiled as C++
#endif
#include "opencv2/core.hpp"
......@@ -84,7 +88,7 @@ public:
//! create buffer
void create(int arows, int acols, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false);
void create(Size asize, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false) { create(asize.height, asize.width, atype, target, autoRelease); }
void create(Size asize, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false);
//! release memory and delete buffer object
void release();
......@@ -92,11 +96,15 @@ public:
//! set auto release mode (if true, release will be called in object's destructor)
void setAutoRelease(bool flag);
//! copy from host/device memory
//! copy from host/device memory (blocking)
void copyFrom(InputArray arr, Target target = ARRAY_BUFFER, bool autoRelease = false);
//! copy from device memory (non blocking)
void copyFrom(InputArray arr, gpu::Stream& stream, Target target = ARRAY_BUFFER, bool autoRelease = false);
//! copy to host/device memory
void copyTo(OutputArray arr, Target target = ARRAY_BUFFER, bool autoRelease = false) const;
//! copy to host/device memory (blocking)
void copyTo(OutputArray arr) const;
//! copy to device memory (non blocking)
void copyTo(OutputArray arr, gpu::Stream& stream) const;
//! create copy of current buffer
Buffer clone(Target target = ARRAY_BUFFER, bool autoRelease = false) const;
......@@ -111,21 +119,26 @@ public:
Mat mapHost(Access access);
void unmapHost();
//! map to device memory
//! map to device memory (blocking)
gpu::GpuMat mapDevice();
void unmapDevice();
int rows() const { return rows_; }
int cols() const { return cols_; }
Size size() const { return Size(cols_, rows_); }
bool empty() const { return rows_ == 0 || cols_ == 0; }
//! map to device memory (non blocking)
gpu::GpuMat mapDevice(gpu::Stream& stream);
void unmapDevice(gpu::Stream& stream);
int type() const { return type_; }
int depth() const { return CV_MAT_DEPTH(type_); }
int channels() const { return CV_MAT_CN(type_); }
int elemSize() const { return CV_ELEM_SIZE(type_); }
int elemSize1() const { return CV_ELEM_SIZE1(type_); }
int rows() const;
int cols() const;
Size size() const;
bool empty() const;
int type() const;
int depth() const;
int channels() const;
int elemSize() const;
int elemSize1() const;
//! get OpenGL opject id
unsigned int bufId() const;
class Impl;
......@@ -165,7 +178,7 @@ public:
//! create texture
void create(int arows, int acols, Format aformat, bool autoRelease = false);
void create(Size asize, Format aformat, bool autoRelease = false) { create(asize.height, asize.width, aformat, autoRelease); }
void create(Size asize, Format aformat, bool autoRelease = false);
//! release memory and delete texture object
void release();
......@@ -182,13 +195,14 @@ public:
//! bind texture to current active texture unit for GL_TEXTURE_2D target
void bind() const;
int rows() const { return rows_; }
int cols() const { return cols_; }
Size size() const { return Size(cols_, rows_); }
bool empty() const { return rows_ == 0 || cols_ == 0; }
int rows() const;
int cols() const;
Size size() const;
bool empty() const;
Format format() const { return format_; }
Format format() const;
//! get OpenGL opject id
unsigned int texId() const;
class Impl;
......@@ -224,8 +238,8 @@ public:
void bind() const;
int size() const { return size_; }
bool empty() const { return size_ == 0; }
int size() const;
bool empty() const;
private:
int size_;
......@@ -260,14 +274,14 @@ enum {
CV_EXPORTS void render(const Arrays& arr, int mode = POINTS, Scalar color = Scalar::all(255));
CV_EXPORTS void render(const Arrays& arr, InputArray indices, int mode = POINTS, Scalar color = Scalar::all(255));
}} // namespace cv::gl
}} // namespace cv::ogl
namespace cv { namespace gpu {
//! set a CUDA device to use OpenGL interoperability
CV_EXPORTS void setGlDevice(int device = 0);
}} // cv::gpu
}}
namespace cv {
......@@ -276,4 +290,149 @@ template <> CV_EXPORTS void Ptr<cv::ogl::Texture2D::Impl>::delete_obj();
}
#endif // __OPENCV_OPENGL_INTEROP_HPP__
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
inline
cv::ogl::Buffer::Buffer(int arows, int acols, int atype, Target target, bool autoRelease) : rows_(0), cols_(0), type_(0)
{
create(arows, acols, atype, target, autoRelease);
}
inline
cv::ogl::Buffer::Buffer(Size asize, int atype, Target target, bool autoRelease) : rows_(0), cols_(0), type_(0)
{
create(asize, atype, target, autoRelease);
}
inline
void cv::ogl::Buffer::create(Size asize, int atype, Target target, bool autoRelease)
{
create(asize.height, asize.width, atype, target, autoRelease);
}
inline
int cv::ogl::Buffer::rows() const
{
return rows_;
}
inline
int cv::ogl::Buffer::cols() const
{
return cols_;
}
inline
cv::Size cv::ogl::Buffer::size() const
{
return Size(cols_, rows_);
}
inline
bool cv::ogl::Buffer::empty() const
{
return rows_ == 0 || cols_ == 0;
}
inline
int cv::ogl::Buffer::type() const
{
return type_;
}
inline
int cv::ogl::Buffer::depth() const
{
return CV_MAT_DEPTH(type_);
}
inline
int cv::ogl::Buffer::channels() const
{
return CV_MAT_CN(type_);
}
inline
int cv::ogl::Buffer::elemSize() const
{
return CV_ELEM_SIZE(type_);
}
inline
int cv::ogl::Buffer::elemSize1() const
{
return CV_ELEM_SIZE1(type_);
}
///////
inline
cv::ogl::Texture2D::Texture2D(int arows, int acols, Format aformat, bool autoRelease) : rows_(0), cols_(0), format_(NONE)
{
create(arows, acols, aformat, autoRelease);
}
inline
cv::ogl::Texture2D::Texture2D(Size asize, Format aformat, bool autoRelease) : rows_(0), cols_(0), format_(NONE)
{
create(asize, aformat, autoRelease);
}
inline
void cv::ogl::Texture2D::create(Size asize, Format aformat, bool autoRelease)
{
create(asize.height, asize.width, aformat, autoRelease);
}
inline
int cv::ogl::Texture2D::rows() const
{
return rows_;
}
inline
int cv::ogl::Texture2D::cols() const
{
return cols_;
}
inline
cv::Size cv::ogl::Texture2D::size() const
{
return Size(cols_, rows_);
}
inline
bool cv::ogl::Texture2D::empty() const
{
return rows_ == 0 || cols_ == 0;
}
inline
cv::ogl::Texture2D::Format cv::ogl::Texture2D::format() const
{
return format_;
}
///////
inline
cv::ogl::Arrays::Arrays() : size_(0)
{
}
inline
int cv::ogl::Arrays::size() const
{
return size_;
}
inline
bool cv::ogl::Arrays::empty() const
{
return size_ == 0;
}
#endif /* __OPENCV_CORE_OPENGL_HPP__ */
......@@ -41,8 +41,8 @@
//
//M*/
#ifndef __OPENCV_CORE_GPU_PRIVATE_HPP__
#define __OPENCV_CORE_GPU_PRIVATE_HPP__
#ifndef __OPENCV_CORE_PRIVATE_GPU_HPP__
#define __OPENCV_CORE_PRIVATE_GPU_HPP__
#ifndef __OPENCV_BUILD
# error this is a private header which should not be used from outside of the OpenCV library
......@@ -53,11 +53,13 @@
#include "opencv2/core/cvdef.h"
#include "opencv2/core/base.hpp"
#include "opencv2/core/gpu.hpp"
#ifdef HAVE_CUDA
# include <cuda.h>
# include <cuda_runtime.h>
# include <npp.h>
# include "opencv2/core/stream_accessor.hpp"
# include "opencv2/core/gpu_stream_accessor.hpp"
# include "opencv2/core/cuda/common.hpp"
# define NPP_VERSION (NPP_VERSION_MAJOR * 1000 + NPP_VERSION_MINOR * 100 + NPP_VERSION_BUILD)
......
......@@ -44,188 +44,113 @@
#include "opencv2/core/cuda/transform.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/type_traits.hpp"
#include "opencv2/core/cuda/vec_traits.hpp"
namespace cv { namespace gpu { namespace cudev
{
void writeScalar(const uchar*);
void writeScalar(const schar*);
void writeScalar(const ushort*);
void writeScalar(const short int*);
void writeScalar(const int*);
void writeScalar(const float*);
void writeScalar(const double*);
void copyToWithMask_gpu(PtrStepSzb src, PtrStepSzb dst, size_t elemSize1, int cn, PtrStepSzb mask, bool colorMask, cudaStream_t stream);
void convert_gpu(PtrStepSzb, int, PtrStepSzb, int, double, double, cudaStream_t);
}}}
#include "matrix_operations.hpp"
namespace cv { namespace gpu { namespace cudev
{
template <typename T> struct shift_and_sizeof;
template <> struct shift_and_sizeof<signed char> { enum { shift = 0 }; };
template <> struct shift_and_sizeof<unsigned char> { enum { shift = 0 }; };
template <> struct shift_and_sizeof<short> { enum { shift = 1 }; };
template <> struct shift_and_sizeof<unsigned short> { enum { shift = 1 }; };
template <> struct shift_and_sizeof<int> { enum { shift = 2 }; };
template <> struct shift_and_sizeof<float> { enum { shift = 2 }; };
template <> struct shift_and_sizeof<double> { enum { shift = 3 }; };
///////////////////////////////////////////////////////////////////////////
////////////////////////////////// CopyTo /////////////////////////////////
///////////////////////////////////////////////////////////////////////////
// copyWithMask
template <typename T> void copyToWithMask(PtrStepSzb src, PtrStepSzb dst, int cn, PtrStepSzb mask, bool colorMask, cudaStream_t stream)
template <typename T>
void copyWithMask(PtrStepSzb src, PtrStepSzb dst, int cn, PtrStepSzb mask, bool multiChannelMask, cudaStream_t stream)
{
if (colorMask)
cv::gpu::cudev::transform((PtrStepSz<T>)src, (PtrStepSz<T>)dst, identity<T>(), SingleMask(mask), stream);
if (multiChannelMask)
cv::gpu::cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, identity<T>(), SingleMask(mask), stream);
else
cv::gpu::cudev::transform((PtrStepSz<T>)src, (PtrStepSz<T>)dst, identity<T>(), SingleMaskChannels(mask, cn), stream);
cv::gpu::cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, identity<T>(), SingleMaskChannels(mask, cn), stream);
}
void copyToWithMask_gpu(PtrStepSzb src, PtrStepSzb dst, size_t elemSize1, int cn, PtrStepSzb mask, bool colorMask, cudaStream_t stream)
void copyWithMask(PtrStepSzb src, PtrStepSzb dst, size_t elemSize1, int cn, PtrStepSzb mask, bool multiChannelMask, cudaStream_t stream)
{
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, int cn, PtrStepSzb mask, bool colorMask, cudaStream_t stream);
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, int cn, PtrStepSzb mask, bool multiChannelMask, cudaStream_t stream);
static func_t tab[] =
static const func_t tab[] =
{
0,
copyToWithMask<unsigned char>,
copyToWithMask<unsigned short>,
copyWithMask<uchar>,
copyWithMask<ushort>,
0,
copyToWithMask<int>,
copyWithMask<int>,
0,
0,
0,
copyToWithMask<double>
copyWithMask<double>
};
tab[elemSize1](src, dst, cn, mask, colorMask, stream);
const func_t func = tab[elemSize1];
CV_DbgAssert( func != 0 );
func(src, dst, cn, mask, multiChannelMask, stream);
}
///////////////////////////////////////////////////////////////////////////
////////////////////////////////// SetTo //////////////////////////////////
///////////////////////////////////////////////////////////////////////////
__constant__ uchar scalar_8u[4];
__constant__ schar scalar_8s[4];
__constant__ ushort scalar_16u[4];
__constant__ short scalar_16s[4];
__constant__ int scalar_32s[4];
__constant__ float scalar_32f[4];
__constant__ double scalar_64f[4];
template <typename T> __device__ __forceinline__ T readScalar(int i);
template <> __device__ __forceinline__ uchar readScalar<uchar>(int i) {return scalar_8u[i];}
template <> __device__ __forceinline__ schar readScalar<schar>(int i) {return scalar_8s[i];}
template <> __device__ __forceinline__ ushort readScalar<ushort>(int i) {return scalar_16u[i];}
template <> __device__ __forceinline__ short readScalar<short>(int i) {return scalar_16s[i];}
template <> __device__ __forceinline__ int readScalar<int>(int i) {return scalar_32s[i];}
template <> __device__ __forceinline__ float readScalar<float>(int i) {return scalar_32f[i];}
template <> __device__ __forceinline__ double readScalar<double>(int i) {return scalar_64f[i];}
void writeScalar(const uchar* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_8u, vals, sizeof(uchar) * 4) );
}
void writeScalar(const schar* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_8s, vals, sizeof(schar) * 4) );
}
void writeScalar(const ushort* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_16u, vals, sizeof(ushort) * 4) );
}
void writeScalar(const short* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_16s, vals, sizeof(short) * 4) );
}
void writeScalar(const int* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_32s, vals, sizeof(int) * 4) );
}
void writeScalar(const float* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_32f, vals, sizeof(float) * 4) );
}
void writeScalar(const double* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_64f, vals, sizeof(double) * 4) );
}
// set
template<typename T>
__global__ void set_to_without_mask(T* mat, int cols, int rows, size_t step, int channels)
template<typename T, class Mask>
__global__ void set(PtrStepSz<T> mat, const Mask mask, const int channels, const typename TypeVec<T, 4>::vec_type value)
{
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if ((x < cols * channels ) && (y < rows))
{
size_t idx = y * ( step >> shift_and_sizeof<T>::shift ) + x;
mat[idx] = readScalar<T>(x % channels);
}
}
if (x >= mat.cols * channels || y >= mat.rows)
return;
template<typename T>
__global__ void set_to_with_mask(T* mat, const uchar* mask, int cols, int rows, size_t step, int channels, size_t step_mask)
{
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
const T scalar[4] = {value.x, value.y, value.z, value.w};
if ((x < cols * channels ) && (y < rows))
if (mask[y * step_mask + x / channels] != 0)
{
size_t idx = y * ( step >> shift_and_sizeof<T>::shift ) + x;
mat[idx] = readScalar<T>(x % channels);
}
if (mask(y, x / channels))
mat(y, x) = scalar[x % channels];
}
template <typename T>
void set_to_gpu(PtrStepSzb mat, const T* scalar, PtrStepSzb mask, int channels, cudaStream_t stream)
void set(PtrStepSz<T> mat, const T* scalar, int channels, cudaStream_t stream)
{
writeScalar(scalar);
typedef typename TypeVec<T, 4>::vec_type scalar_t;
dim3 threadsPerBlock(32, 8, 1);
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
dim3 block(32, 8);
dim3 grid(divUp(mat.cols * channels, block.x), divUp(mat.rows, block.y));
set_to_with_mask<T><<<numBlocks, threadsPerBlock, 0, stream>>>((T*)mat.data, (uchar*)mask.data, mat.cols, mat.rows, mat.step, channels, mask.step);
set<T><<<grid, block, 0, stream>>>(mat, WithOutMask(), channels, VecTraits<scalar_t>::make(scalar));
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall ( cudaDeviceSynchronize() );
}
template void set_to_gpu<uchar >(PtrStepSzb mat, const uchar* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set_to_gpu<schar >(PtrStepSzb mat, const schar* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set_to_gpu<ushort>(PtrStepSzb mat, const ushort* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set_to_gpu<short >(PtrStepSzb mat, const short* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set_to_gpu<int >(PtrStepSzb mat, const int* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set_to_gpu<float >(PtrStepSzb mat, const float* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set_to_gpu<double>(PtrStepSzb mat, const double* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set<uchar >(PtrStepSz<uchar > mat, const uchar* scalar, int channels, cudaStream_t stream);
template void set<schar >(PtrStepSz<schar > mat, const schar* scalar, int channels, cudaStream_t stream);
template void set<ushort>(PtrStepSz<ushort> mat, const ushort* scalar, int channels, cudaStream_t stream);
template void set<short >(PtrStepSz<short > mat, const short* scalar, int channels, cudaStream_t stream);
template void set<int >(PtrStepSz<int > mat, const int* scalar, int channels, cudaStream_t stream);
template void set<float >(PtrStepSz<float > mat, const float* scalar, int channels, cudaStream_t stream);
template void set<double>(PtrStepSz<double> mat, const double* scalar, int channels, cudaStream_t stream);
template <typename T>
void set_to_gpu(PtrStepSzb mat, const T* scalar, int channels, cudaStream_t stream)
void set(PtrStepSz<T> mat, const T* scalar, PtrStepSzb mask, int channels, cudaStream_t stream)
{
writeScalar(scalar);
typedef typename TypeVec<T, 4>::vec_type scalar_t;
dim3 threadsPerBlock(32, 8, 1);
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
dim3 block(32, 8);
dim3 grid(divUp(mat.cols * channels, block.x), divUp(mat.rows, block.y));
set_to_without_mask<T><<<numBlocks, threadsPerBlock, 0, stream>>>((T*)mat.data, mat.cols, mat.rows, mat.step, channels);
set<T><<<grid, block, 0, stream>>>(mat, SingleMask(mask), channels, VecTraits<scalar_t>::make(scalar));
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall ( cudaDeviceSynchronize() );
}
template void set_to_gpu<uchar >(PtrStepSzb mat, const uchar* scalar, int channels, cudaStream_t stream);
template void set_to_gpu<schar >(PtrStepSzb mat, const schar* scalar, int channels, cudaStream_t stream);
template void set_to_gpu<ushort>(PtrStepSzb mat, const ushort* scalar, int channels, cudaStream_t stream);
template void set_to_gpu<short >(PtrStepSzb mat, const short* scalar, int channels, cudaStream_t stream);
template void set_to_gpu<int >(PtrStepSzb mat, const int* scalar, int channels, cudaStream_t stream);
template void set_to_gpu<float >(PtrStepSzb mat, const float* scalar, int channels, cudaStream_t stream);
template void set_to_gpu<double>(PtrStepSzb mat, const double* scalar, int channels, cudaStream_t stream);
template void set<uchar >(PtrStepSz<uchar > mat, const uchar* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set<schar >(PtrStepSz<schar > mat, const schar* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set<ushort>(PtrStepSz<ushort> mat, const ushort* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set<short >(PtrStepSz<short > mat, const short* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set<int >(PtrStepSz<int > mat, const int* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set<float >(PtrStepSz<float > mat, const float* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set<double>(PtrStepSz<double> mat, const double* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
///////////////////////////////////////////////////////////////////////////
//////////////////////////////// ConvertTo ////////////////////////////////
///////////////////////////////////////////////////////////////////////////
// convert
template <typename T, typename D, typename S> struct Convertor : unary_function<T, D>
{
......@@ -290,18 +215,11 @@ namespace cv { namespace gpu { namespace cudev
template<typename T, typename D, typename S>
void cvt_(PtrStepSzb src, PtrStepSzb dst, double alpha, double beta, cudaStream_t stream)
{
cudaSafeCall( cudaSetDoubleForDevice(&alpha) );
cudaSafeCall( cudaSetDoubleForDevice(&beta) );
Convertor<T, D, S> op(static_cast<S>(alpha), static_cast<S>(beta));
cv::gpu::cudev::transform((PtrStepSz<T>)src, (PtrStepSz<D>)dst, op, WithOutMask(), stream);
}
#if defined __clang__
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wmissing-declarations"
#endif
void convert_gpu(PtrStepSzb src, int sdepth, PtrStepSzb dst, int ddepth, double alpha, double beta, cudaStream_t stream)
void convert(PtrStepSzb src, int sdepth, PtrStepSzb dst, int ddepth, double alpha, double beta, cudaStream_t stream)
{
typedef void (*caller_t)(PtrStepSzb src, PtrStepSzb dst, double alpha, double beta, cudaStream_t stream);
......@@ -372,11 +290,7 @@ namespace cv { namespace gpu { namespace cudev
}
};
caller_t func = tab[sdepth][ddepth];
const caller_t func = tab[sdepth][ddepth];
func(src, dst, alpha, beta, stream);
}
#if defined __clang__
# pragma clang diagnostic pop
#endif
}}} // namespace cv { namespace gpu { namespace cudev
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "opencv2/core/cuda/common.hpp"
namespace cv { namespace gpu { namespace cudev
{
void copyWithMask(PtrStepSzb src, PtrStepSzb dst, size_t elemSize1, int cn, PtrStepSzb mask, bool multiChannelMask, cudaStream_t stream);
template <typename T>
void set(PtrStepSz<T> mat, const T* scalar, int channels, cudaStream_t stream);
template <typename T>
void set(PtrStepSz<T> mat, const T* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
void convert(PtrStepSzb src, int sdepth, PtrStepSzb dst, int ddepth, double alpha, double beta, cudaStream_t stream);
}}}
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::gpu;
#if !defined (HAVE_CUDA)
cv::gpu::Stream::Stream() { throw_no_cuda(); }
cv::gpu::Stream::~Stream() {}
cv::gpu::Stream::Stream(const Stream&) { throw_no_cuda(); }
Stream& cv::gpu::Stream::operator=(const Stream&) { throw_no_cuda(); return *this; }
bool cv::gpu::Stream::queryIfComplete() { throw_no_cuda(); return false; }
void cv::gpu::Stream::waitForCompletion() { throw_no_cuda(); }
void cv::gpu::Stream::enqueueDownload(const GpuMat&, Mat&) { throw_no_cuda(); }
void cv::gpu::Stream::enqueueDownload(const GpuMat&, CudaMem&) { throw_no_cuda(); }
void cv::gpu::Stream::enqueueUpload(const CudaMem&, GpuMat&) { throw_no_cuda(); }
void cv::gpu::Stream::enqueueUpload(const Mat&, GpuMat&) { throw_no_cuda(); }
void cv::gpu::Stream::enqueueCopy(const GpuMat&, GpuMat&) { throw_no_cuda(); }
void cv::gpu::Stream::enqueueMemSet(GpuMat&, Scalar) { throw_no_cuda(); }
void cv::gpu::Stream::enqueueMemSet(GpuMat&, Scalar, const GpuMat&) { throw_no_cuda(); }
void cv::gpu::Stream::enqueueConvert(const GpuMat&, GpuMat&, int, double, double) { throw_no_cuda(); }
void cv::gpu::Stream::enqueueHostCallback(StreamCallback, void*) { throw_no_cuda(); }
Stream& cv::gpu::Stream::Null() { throw_no_cuda(); static Stream s; return s; }
cv::gpu::Stream::operator bool() const { throw_no_cuda(); return false; }
cv::gpu::Stream::Stream(Impl*) { throw_no_cuda(); }
void cv::gpu::Stream::create() { throw_no_cuda(); }
void cv::gpu::Stream::release() { throw_no_cuda(); }
#else /* !defined (HAVE_CUDA) */
namespace cv { namespace gpu
{
void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream);
void convertTo(const GpuMat& src, GpuMat& dst, double alpha, double beta, cudaStream_t stream);
void setTo(GpuMat& src, Scalar s, cudaStream_t stream);
void setTo(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream);
}}
struct Stream::Impl
{
static cudaStream_t getStream(const Impl* impl)
{
return impl ? impl->stream : 0;
}
cudaStream_t stream;
int ref_counter;
};
cudaStream_t cv::gpu::StreamAccessor::getStream(const Stream& stream)
{
return Stream::Impl::getStream(stream.impl);
}
cv::gpu::Stream::Stream() : impl(0)
{
create();
}
cv::gpu::Stream::~Stream()
{
release();
}
cv::gpu::Stream::Stream(const Stream& stream) : impl(stream.impl)
{
if (impl)
CV_XADD(&impl->ref_counter, 1);
}
Stream& cv::gpu::Stream::operator =(const Stream& stream)
{
if (this != &stream)
{
release();
impl = stream.impl;
if (impl)
CV_XADD(&impl->ref_counter, 1);
}
return *this;
}
bool cv::gpu::Stream::queryIfComplete()
{
cudaStream_t stream = Impl::getStream(impl);
cudaError_t err = cudaStreamQuery(stream);
if (err == cudaErrorNotReady || err == cudaSuccess)
return err == cudaSuccess;
cudaSafeCall(err);
return false;
}
void cv::gpu::Stream::waitForCompletion()
{
cudaStream_t stream = Impl::getStream(impl);
cudaSafeCall( cudaStreamSynchronize(stream) );
}
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst)
{
// if not -> allocation will be done, but after that dst will not point to page locked memory
CV_Assert( src.size() == dst.size() && src.type() == dst.type() );
cudaStream_t stream = Impl::getStream(impl);
size_t bwidth = src.cols * src.elemSize();
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) );
}
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst)
{
dst.create(src.size(), src.type(), CudaMem::ALLOC_PAGE_LOCKED);
cudaStream_t stream = Impl::getStream(impl);
size_t bwidth = src.cols * src.elemSize();
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) );
}
void cv::gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst)
{
dst.create(src.size(), src.type());
cudaStream_t stream = Impl::getStream(impl);
size_t bwidth = src.cols * src.elemSize();
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) );
}
void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst)
{
dst.create(src.size(), src.type());
cudaStream_t stream = Impl::getStream(impl);
size_t bwidth = src.cols * src.elemSize();
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) );
}
void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst)
{
dst.create(src.size(), src.type());
cudaStream_t stream = Impl::getStream(impl);
size_t bwidth = src.cols * src.elemSize();
cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToDevice, stream) );
}
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val)
{
const int sdepth = src.depth();
if (sdepth == CV_64F)
{
if (!deviceSupports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
cudaStream_t stream = Impl::getStream(impl);
if (val[0] == 0.0 && val[1] == 0.0 && val[2] == 0.0 && val[3] == 0.0)
{
cudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, stream) );
return;
}
if (sdepth == CV_8U)
{
int cn = src.channels();
if (cn == 1 || (cn == 2 && val[0] == val[1]) || (cn == 3 && val[0] == val[1] && val[0] == val[2]) || (cn == 4 && val[0] == val[1] && val[0] == val[2] && val[0] == val[3]))
{
int ival = saturate_cast<uchar>(val[0]);
cudaSafeCall( cudaMemset2DAsync(src.data, src.step, ival, src.cols * src.elemSize(), src.rows, stream) );
return;
}
}
setTo(src, val, stream);
}
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask)
{
const int sdepth = src.depth();
if (sdepth == CV_64F)
{
if (!deviceSupports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
CV_Assert(mask.type() == CV_8UC1);
cudaStream_t stream = Impl::getStream(impl);
setTo(src, val, mask, stream);
}
void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int dtype, double alpha, double beta)
{
if (dtype < 0)
dtype = src.type();
else
dtype = CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels());
const int sdepth = src.depth();
const int ddepth = CV_MAT_DEPTH(dtype);
if (sdepth == CV_64F || ddepth == CV_64F)
{
if (!deviceSupports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
bool noScale = fabs(alpha - 1) < std::numeric_limits<double>::epsilon()
&& fabs(beta) < std::numeric_limits<double>::epsilon();
if (sdepth == ddepth && noScale)
{
enqueueCopy(src, dst);
return;
}
dst.create(src.size(), dtype);
cudaStream_t stream = Impl::getStream(impl);
convertTo(src, dst, alpha, beta, stream);
}
#if CUDART_VERSION >= 5000
namespace
{
struct CallbackData
{
cv::gpu::Stream::StreamCallback callback;
void* userData;
Stream stream;
};
void CUDART_CB cudaStreamCallback(cudaStream_t, cudaError_t status, void* userData)
{
CallbackData* data = reinterpret_cast<CallbackData*>(userData);
data->callback(data->stream, static_cast<int>(status), data->userData);
delete data;
}
}
#endif
void cv::gpu::Stream::enqueueHostCallback(StreamCallback callback, void* userData)
{
#if CUDART_VERSION >= 5000
CallbackData* data = new CallbackData;
data->callback = callback;
data->userData = userData;
data->stream = *this;
cudaStream_t stream = Impl::getStream(impl);
cudaSafeCall( cudaStreamAddCallback(stream, cudaStreamCallback, data, 0) );
#else
(void) callback;
(void) userData;
CV_Error(CV_StsNotImplemented, "This function requires CUDA 5.0");
#endif
}
cv::gpu::Stream& cv::gpu::Stream::Null()
{
static Stream s((Impl*) 0);
return s;
}
cv::gpu::Stream::operator bool() const
{
return impl && impl->stream;
}
cv::gpu::Stream::Stream(Impl* impl_) : impl(impl_)
{
}
void cv::gpu::Stream::create()
{
if (impl)
release();
cudaStream_t stream;
cudaSafeCall( cudaStreamCreate( &stream ) );
impl = (Stream::Impl*) fastMalloc(sizeof(Stream::Impl));
impl->stream = stream;
impl->ref_counter = 1;
}
void cv::gpu::Stream::release()
{
if (impl && CV_XADD(&impl->ref_counter, -1) == 1)
{
cudaSafeCall( cudaStreamDestroy(impl->stream) );
cv::fastFree(impl);
}
}
#endif /* !defined (HAVE_CUDA) */
......@@ -7,11 +7,12 @@
// copy or use the software.
//
//
// License Agreement
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
......@@ -45,217 +46,70 @@
using namespace cv;
using namespace cv::gpu;
cv::gpu::CudaMem::CudaMem()
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(0)
{
}
cv::gpu::CudaMem::CudaMem(int _rows, int _cols, int _type, int _alloc_type)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(0)
{
if( _rows > 0 && _cols > 0 )
create( _rows, _cols, _type, _alloc_type);
}
cv::gpu::CudaMem::CudaMem(Size _size, int _type, int _alloc_type)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(0)
{
if( _size.height > 0 && _size.width > 0 )
create( _size.height, _size.width, _type, _alloc_type);
}
cv::gpu::CudaMem::CudaMem(const CudaMem& m)
: flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), alloc_type(m.alloc_type)
{
if( refcount )
CV_XADD(refcount, 1);
}
cv::gpu::CudaMem::CudaMem(const Mat& m, int _alloc_type)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(0)
{
if( m.rows > 0 && m.cols > 0 )
create( m.size(), m.type(), _alloc_type);
Mat tmp = createMatHeader();
m.copyTo(tmp);
}
cv::gpu::CudaMem::~CudaMem()
{
release();
}
CudaMem& cv::gpu::CudaMem::operator = (const CudaMem& m)
{
if( this != &m )
{
if( m.refcount )
CV_XADD(m.refcount, 1);
release();
flags = m.flags;
rows = m.rows; cols = m.cols;
step = m.step; data = m.data;
datastart = m.datastart;
dataend = m.dataend;
refcount = m.refcount;
alloc_type = m.alloc_type;
}
return *this;
}
CudaMem cv::gpu::CudaMem::clone() const
{
CudaMem m(size(), type(), alloc_type);
Mat to = m;
Mat from = *this;
from.copyTo(to);
return m;
}
void cv::gpu::CudaMem::create(Size _size, int _type, int _alloc_type)
{
create(_size.height, _size.width, _type, _alloc_type);
}
Mat cv::gpu::CudaMem::createMatHeader() const
{
return Mat(size(), type(), data, step);
}
cv::gpu::CudaMem::operator Mat() const
{
return createMatHeader();
}
cv::gpu::CudaMem::operator GpuMat() const
{
return createGpuMatHeader();
}
bool cv::gpu::CudaMem::isContinuous() const
{
return (flags & Mat::CONTINUOUS_FLAG) != 0;
}
size_t cv::gpu::CudaMem::elemSize() const
{
return CV_ELEM_SIZE(flags);
}
size_t cv::gpu::CudaMem::elemSize1() const
{
return CV_ELEM_SIZE1(flags);
}
int cv::gpu::CudaMem::type() const
{
return CV_MAT_TYPE(flags);
}
int cv::gpu::CudaMem::depth() const
{
return CV_MAT_DEPTH(flags);
}
int cv::gpu::CudaMem::channels() const
{
return CV_MAT_CN(flags);
}
size_t cv::gpu::CudaMem::step1() const
{
return step/elemSize1();
}
Size cv::gpu::CudaMem::size() const
{
return Size(cols, rows);
}
bool cv::gpu::CudaMem::empty() const
{
return data == 0;
}
#if !defined (HAVE_CUDA)
void cv::gpu::registerPageLocked(Mat&) { throw_no_cuda(); }
void cv::gpu::unregisterPageLocked(Mat&) { throw_no_cuda(); }
void cv::gpu::CudaMem::create(int, int, int, int) { throw_no_cuda(); }
bool cv::gpu::CudaMem::canMapHostMemory() { throw_no_cuda(); return false; }
void cv::gpu::CudaMem::release() { throw_no_cuda(); }
GpuMat cv::gpu::CudaMem::createGpuMatHeader () const { throw_no_cuda(); return GpuMat(); }
#else /* !defined (HAVE_CUDA) */
void cv::gpu::registerPageLocked(Mat& m)
{
cudaSafeCall( cudaHostRegister(m.ptr(), m.step * m.rows, cudaHostRegisterPortable) );
}
void cv::gpu::unregisterPageLocked(Mat& m)
{
cudaSafeCall( cudaHostUnregister(m.ptr()) );
}
bool cv::gpu::CudaMem::canMapHostMemory()
{
cudaDeviceProp prop;
cudaSafeCall( cudaGetDeviceProperties(&prop, getDevice()) );
return (prop.canMapHostMemory != 0) ? true : false;
}
namespace
{
size_t alignUpStep(size_t what, size_t alignment)
{
size_t alignMask = alignment-1;
size_t alignMask = alignment - 1;
size_t inverseAlignMask = ~alignMask;
size_t res = (what + alignMask) & inverseAlignMask;
return res;
}
}
void cv::gpu::CudaMem::create(int _rows, int _cols, int _type, int _alloc_type)
void cv::gpu::CudaMem::create(int rows_, int cols_, int type_)
{
if (_alloc_type == ALLOC_ZEROCOPY && !canMapHostMemory())
CV_Error(cv::Error::GpuApiCallError, "ZeroCopy is not supported by current device");
#ifndef HAVE_CUDA
(void) rows_;
(void) cols_;
(void) type_;
throw_no_cuda();
#else
if (alloc_type == SHARED)
{
DeviceInfo devInfo;
CV_Assert( devInfo.canMapHostMemory() );
}
type_ &= Mat::TYPE_MASK;
_type &= Mat::TYPE_MASK;
if( rows == _rows && cols == _cols && type() == _type && data )
if (rows == rows_ && cols == cols_ && type() == type_ && data)
return;
if( data )
if (data)
release();
CV_DbgAssert( _rows >= 0 && _cols >= 0 );
if( _rows > 0 && _cols > 0 )
CV_DbgAssert( rows_ >= 0 && cols_ >= 0 );
if (rows_ > 0 && cols_ > 0)
{
flags = Mat::MAGIC_VAL + Mat::CONTINUOUS_FLAG + _type;
rows = _rows;
cols = _cols;
step = elemSize()*cols;
if (_alloc_type == ALLOC_ZEROCOPY)
flags = Mat::MAGIC_VAL + Mat::CONTINUOUS_FLAG + type_;
rows = rows_;
cols = cols_;
step = elemSize() * cols;
if (alloc_type == SHARED)
{
cudaDeviceProp prop;
cudaSafeCall( cudaGetDeviceProperties(&prop, getDevice()) );
step = alignUpStep(step, prop.textureAlignment);
DeviceInfo devInfo;
step = alignUpStep(step, devInfo.textureAlignment());
}
int64 _nettosize = (int64)step*rows;
size_t nettosize = (size_t)_nettosize;
if( _nettosize != (int64)nettosize )
CV_Error(CV_StsNoMem, "Too big buffer is allocated");
if (_nettosize != (int64)nettosize)
CV_Error(cv::Error::StsNoMem, "Too big buffer is allocated");
size_t datasize = alignSize(nettosize, (int)sizeof(*refcount));
//datastart = data = (uchar*)fastMalloc(datasize + sizeof(*refcount));
alloc_type = _alloc_type;
void *ptr = 0;
void* ptr = 0;
switch (alloc_type)
{
case ALLOC_PAGE_LOCKED: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocDefault) ); break;
case ALLOC_ZEROCOPY: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocMapped) ); break;
case ALLOC_WRITE_COMBINED: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocWriteCombined) ); break;
default: CV_Error(cv::Error::StsBadFlag, "Invalid alloc type");
case PAGE_LOCKED: cudaSafeCall( cudaHostAlloc(&ptr, datasize, cudaHostAllocDefault) ); break;
case SHARED: cudaSafeCall( cudaHostAlloc(&ptr, datasize, cudaHostAllocMapped) ); break;
case WRITE_COMBINED: cudaSafeCall( cudaHostAlloc(&ptr, datasize, cudaHostAllocWriteCombined) ); break;
default: CV_Error(cv::Error::StsBadFlag, "Invalid alloc type");
}
datastart = data = (uchar*)ptr;
......@@ -264,31 +118,98 @@ void cv::gpu::CudaMem::create(int _rows, int _cols, int _type, int _alloc_type)
refcount = (int*)cv::fastMalloc(sizeof(*refcount));
*refcount = 1;
}
#endif
}
GpuMat cv::gpu::CudaMem::createGpuMatHeader () const
CudaMem cv::gpu::CudaMem::reshape(int new_cn, int new_rows) const
{
CV_Assert( alloc_type == ALLOC_ZEROCOPY );
CudaMem hdr = *this;
GpuMat res;
int cn = channels();
if (new_cn == 0)
new_cn = cn;
void *pdev;
cudaSafeCall( cudaHostGetDevicePointer( &pdev, data, 0 ) );
res = GpuMat(rows, cols, type(), pdev, step);
int total_width = cols * cn;
if ((new_cn > total_width || total_width % new_cn != 0) && new_rows == 0)
new_rows = rows * total_width / new_cn;
if (new_rows != 0 && new_rows != rows)
{
int total_size = total_width * rows;
if (!isContinuous())
CV_Error(cv::Error::BadStep, "The matrix is not continuous, thus its number of rows can not be changed");
if ((unsigned)new_rows > (unsigned)total_size)
CV_Error(cv::Error::StsOutOfRange, "Bad new number of rows");
total_width = total_size / new_rows;
if (total_width * new_rows != total_size)
CV_Error(cv::Error::StsBadArg, "The total number of matrix elements is not divisible by the new number of rows");
hdr.rows = new_rows;
hdr.step = total_width * elemSize1();
}
int new_width = total_width / new_cn;
if (new_width * new_cn != total_width)
CV_Error(cv::Error::BadNumChannels, "The total width is not divisible by the new number of channels");
hdr.cols = new_width;
hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn - 1) << CV_CN_SHIFT);
return res;
return hdr;
}
void cv::gpu::CudaMem::release()
{
if( refcount && CV_XADD(refcount, -1) == 1 )
#ifdef HAVE_CUDA
if (refcount && CV_XADD(refcount, -1) == 1)
{
cudaSafeCall( cudaFreeHost(datastart ) );
cudaFreeHost(datastart);
fastFree(refcount);
}
data = datastart = dataend = 0;
step = rows = cols = 0;
refcount = 0;
#endif
}
GpuMat cv::gpu::CudaMem::createGpuMatHeader() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return GpuMat();
#else
CV_Assert( alloc_type == SHARED );
void *pdev;
cudaSafeCall( cudaHostGetDevicePointer(&pdev, data, 0) );
return GpuMat(rows, cols, type(), pdev, step);
#endif
}
#endif /* !defined (HAVE_CUDA) */
void cv::gpu::registerPageLocked(Mat& m)
{
#ifndef HAVE_CUDA
(void) m;
throw_no_cuda();
#else
CV_Assert( m.isContinuous() );
cudaSafeCall( cudaHostRegister(m.data, m.step * m.rows, cudaHostRegisterPortable) );
#endif
}
void cv::gpu::unregisterPageLocked(Mat& m)
{
#ifndef HAVE_CUDA
(void) m;
#else
cudaSafeCall( cudaHostUnregister(m.data) );
#endif
}
此差异已折叠。
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::gpu;
////////////////////////////////////////////////////////////////
// Stream
#ifndef HAVE_CUDA
class cv::gpu::Stream::Impl
{
public:
Impl(void* ptr = 0)
{
(void) ptr;
throw_no_cuda();
}
};
#else
class cv::gpu::Stream::Impl
{
public:
cudaStream_t stream;
Impl();
Impl(cudaStream_t stream);
~Impl();
};
cv::gpu::Stream::Impl::Impl() : stream(0)
{
cudaSafeCall( cudaStreamCreate(&stream) );
}
cv::gpu::Stream::Impl::Impl(cudaStream_t stream_) : stream(stream_)
{
}
cv::gpu::Stream::Impl::~Impl()
{
if (stream)
cudaStreamDestroy(stream);
}
cudaStream_t cv::gpu::StreamAccessor::getStream(const Stream& stream)
{
return stream.impl_->stream;
}
#endif
cv::gpu::Stream::Stream()
{
#ifndef HAVE_CUDA
throw_no_cuda();
#else
impl_ = new Impl;
#endif
}
bool cv::gpu::Stream::queryIfComplete() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return false;
#else
cudaError_t err = cudaStreamQuery(impl_->stream);
if (err == cudaErrorNotReady || err == cudaSuccess)
return err == cudaSuccess;
cudaSafeCall(err);
return false;
#endif
}
void cv::gpu::Stream::waitForCompletion()
{
#ifndef HAVE_CUDA
throw_no_cuda();
#else
cudaSafeCall( cudaStreamSynchronize(impl_->stream) );
#endif
}
void cv::gpu::Stream::waitEvent(const Event& event)
{
#ifndef HAVE_CUDA
(void) event;
throw_no_cuda();
#else
cudaSafeCall( cudaStreamWaitEvent(impl_->stream, EventAccessor::getEvent(event), 0) );
#endif
}
#if defined(HAVE_CUDA) && (CUDART_VERSION >= 5000)
namespace
{
struct CallbackData
{
Stream::StreamCallback callback;
void* userData;
CallbackData(Stream::StreamCallback callback_, void* userData_) : callback(callback_), userData(userData_) {}
};
void CUDART_CB cudaStreamCallback(cudaStream_t, cudaError_t status, void* userData)
{
CallbackData* data = reinterpret_cast<CallbackData*>(userData);
data->callback(static_cast<int>(status), data->userData);
delete data;
}
}
#endif
void cv::gpu::Stream::enqueueHostCallback(StreamCallback callback, void* userData)
{
#ifndef HAVE_CUDA
(void) callback;
(void) userData;
throw_no_cuda();
#else
#if CUDART_VERSION < 5000
(void) callback;
(void) userData;
CV_Error(cv::Error::StsNotImplemented, "This function requires CUDA 5.0");
#else
CallbackData* data = new CallbackData(callback, userData);
cudaSafeCall( cudaStreamAddCallback(impl_->stream, cudaStreamCallback, data, 0) );
#endif
#endif
}
Stream& cv::gpu::Stream::Null()
{
static Stream s(new Impl(0));
return s;
}
cv::gpu::Stream::operator bool_type() const
{
#ifndef HAVE_CUDA
return 0;
#else
return (impl_->stream != 0) ? &Stream::this_type_does_not_support_comparisons : 0;
#endif
}
template <> void cv::Ptr<Stream::Impl>::delete_obj()
{
if (obj) delete obj;
}
////////////////////////////////////////////////////////////////
// Stream
#ifndef HAVE_CUDA
class cv::gpu::Event::Impl
{
public:
Impl(unsigned int)
{
throw_no_cuda();
}
};
#else
class cv::gpu::Event::Impl
{
public:
cudaEvent_t event;
Impl(unsigned int flags);
~Impl();
};
cv::gpu::Event::Impl::Impl(unsigned int flags) : event(0)
{
cudaSafeCall( cudaEventCreateWithFlags(&event, flags) );
}
cv::gpu::Event::Impl::~Impl()
{
if (event)
cudaEventDestroy(event);
}
cudaEvent_t cv::gpu::EventAccessor::getEvent(const Event& event)
{
return event.impl_->event;
}
#endif
cv::gpu::Event::Event(CreateFlags flags)
{
#ifndef HAVE_CUDA
(void) flags;
throw_no_cuda();
#else
impl_ = new Impl(flags);
#endif
}
void cv::gpu::Event::record(Stream& stream)
{
#ifndef HAVE_CUDA
(void) stream;
throw_no_cuda();
#else
cudaSafeCall( cudaEventRecord(impl_->event, StreamAccessor::getStream(stream)) );
#endif
}
bool cv::gpu::Event::queryIfComplete() const
{
#ifndef HAVE_CUDA
throw_no_cuda();
return false;
#else
cudaError_t err = cudaEventQuery(impl_->event);
if (err == cudaErrorNotReady || err == cudaSuccess)
return err == cudaSuccess;
cudaSafeCall(err);
return false;
#endif
}
void cv::gpu::Event::waitForCompletion()
{
#ifndef HAVE_CUDA
throw_no_cuda();
#else
cudaSafeCall( cudaEventSynchronize(impl_->event) );
#endif
}
float cv::gpu::Event::elapsedTime(const Event& start, const Event& end)
{
#ifndef HAVE_CUDA
(void) start;
(void) end;
throw_no_cuda();
return 0.0f;
#else
float ms;
cudaSafeCall( cudaEventElapsedTime(&ms, start.impl_->event, end.impl_->event) );
return ms;
#endif
}
template <> void cv::Ptr<Event::Impl>::delete_obj()
{
if (obj) delete obj;
}
......@@ -41,8 +41,6 @@
//M*/
#include "precomp.hpp"
#include "opencv2/core/gpumat.hpp"
#include "opencv2/core/opengl.hpp"
/****************************************************************************************\
* [scaled] Identity matrix initialization *
......@@ -941,14 +939,15 @@ void scalarToRawData(const Scalar& s, void* _buf, int type, int unroll_to)
\*************************************************************************************************/
_InputArray::_InputArray() : flags(0), obj(0) {}
_InputArray::~_InputArray() {}
_InputArray::_InputArray(const Mat& m) : flags(MAT), obj((void*)&m) {}
_InputArray::_InputArray(const std::vector<Mat>& vec) : flags(STD_VECTOR_MAT), obj((void*)&vec) {}
_InputArray::_InputArray(const double& val) : flags(FIXED_TYPE + FIXED_SIZE + MATX + CV_64F), obj((void*)&val), sz(Size(1,1)) {}
_InputArray::_InputArray(const MatExpr& expr) : flags(FIXED_TYPE + FIXED_SIZE + EXPR), obj((void*)&expr) {}
_InputArray::_InputArray(const gpu::GpuMat& d_mat) : flags(GPU_MAT), obj((void*)&d_mat) {}
_InputArray::_InputArray(const ogl::Buffer& buf) : flags(OPENGL_BUFFER), obj((void*)&buf) {}
_InputArray::_InputArray(const ogl::Texture2D& tex) : flags(OPENGL_TEXTURE), obj((void*)&tex) {}
_InputArray::_InputArray(const gpu::CudaMem& cuda_mem) : flags(CUDA_MEM), obj((void*)&cuda_mem) {}
_InputArray::~_InputArray() {}
Mat _InputArray::getMat(int i) const
{
......@@ -996,14 +995,37 @@ Mat _InputArray::getMat(int i) const
return !v.empty() ? Mat(size(i), t, (void*)&v[0]) : Mat();
}
CV_Assert( k == STD_VECTOR_MAT );
//if( k == STD_VECTOR_MAT )
if( k == STD_VECTOR_MAT )
{
const std::vector<Mat>& v = *(const std::vector<Mat>*)obj;
CV_Assert( 0 <= i && i < (int)v.size() );
return v[i];
}
if( k == OPENGL_BUFFER )
{
CV_Assert( i < 0 );
CV_Error(cv::Error::StsNotImplemented, "You should explicitly call mapHost/unmapHost methods for ogl::Buffer object");
return Mat();
}
if( k == GPU_MAT )
{
CV_Assert( i < 0 );
CV_Error(cv::Error::StsNotImplemented, "You should explicitly call download method for gpu::GpuMat object");
return Mat();
}
CV_Assert( k == CUDA_MEM );
//if( k == CUDA_MEM )
{
CV_Assert( i < 0 );
const gpu::CudaMem* cuda_mem = (const gpu::CudaMem*)obj;
return cuda_mem->createMatHeader();
}
}
......@@ -1092,10 +1114,29 @@ gpu::GpuMat _InputArray::getGpuMat() const
{
int k = kind();
CV_Assert(k == GPU_MAT);
if (k == GPU_MAT)
{
const gpu::GpuMat* d_mat = (const gpu::GpuMat*)obj;
return *d_mat;
}
if (k == CUDA_MEM)
{
const gpu::CudaMem* cuda_mem = (const gpu::CudaMem*)obj;
return cuda_mem->createGpuMatHeader();
}
if (k == OPENGL_BUFFER)
{
CV_Error(cv::Error::StsNotImplemented, "You should explicitly call mapDevice/unmapDevice methods for ogl::Buffer object");
return gpu::GpuMat();
}
const gpu::GpuMat* d_mat = (const gpu::GpuMat*)obj;
return *d_mat;
if (k == NONE)
return gpu::GpuMat();
CV_Error(cv::Error::StsNotImplemented, "getGpuMat is available only for gpu::GpuMat and gpu::CudaMem");
return gpu::GpuMat();
}
ogl::Buffer _InputArray::getOGlBuffer() const
......@@ -1108,16 +1149,6 @@ ogl::Buffer _InputArray::getOGlBuffer() const
return *gl_buf;
}
ogl::Texture2D _InputArray::getOGlTexture2D() const
{
int k = kind();
CV_Assert(k == OPENGL_TEXTURE);
const ogl::Texture2D* gl_tex = (const ogl::Texture2D*)obj;
return *gl_tex;
}
int _InputArray::kind() const
{
return flags & KIND_MASK;
......@@ -1186,19 +1217,19 @@ Size _InputArray::size(int i) const
return buf->size();
}
if( k == OPENGL_TEXTURE )
if( k == GPU_MAT )
{
CV_Assert( i < 0 );
const ogl::Texture2D* tex = (const ogl::Texture2D*)obj;
return tex->size();
const gpu::GpuMat* d_mat = (const gpu::GpuMat*)obj;
return d_mat->size();
}
CV_Assert( k == GPU_MAT );
//if( k == GPU_MAT )
CV_Assert( k == CUDA_MEM );
//if( k == CUDA_MEM )
{
CV_Assert( i < 0 );
const gpu::GpuMat* d_mat = (const gpu::GpuMat*)obj;
return d_mat->size();
const gpu::CudaMem* cuda_mem = (const gpu::CudaMem*)obj;
return cuda_mem->size();
}
}
......@@ -1252,9 +1283,12 @@ int _InputArray::type(int i) const
if( k == OPENGL_BUFFER )
return ((const ogl::Buffer*)obj)->type();
CV_Assert( k == GPU_MAT );
//if( k == GPU_MAT )
if( k == GPU_MAT )
return ((const gpu::GpuMat*)obj)->type();
CV_Assert( k == CUDA_MEM );
//if( k == CUDA_MEM )
return ((const gpu::CudaMem*)obj)->type();
}
int _InputArray::depth(int i) const
......@@ -1304,29 +1338,29 @@ bool _InputArray::empty() const
if( k == OPENGL_BUFFER )
return ((const ogl::Buffer*)obj)->empty();
if( k == OPENGL_TEXTURE )
return ((const ogl::Texture2D*)obj)->empty();
CV_Assert( k == GPU_MAT );
//if( k == GPU_MAT )
if( k == GPU_MAT )
return ((const gpu::GpuMat*)obj)->empty();
CV_Assert( k == CUDA_MEM );
//if( k == CUDA_MEM )
return ((const gpu::CudaMem*)obj)->empty();
}
_OutputArray::_OutputArray() {}
_OutputArray::~_OutputArray() {}
_OutputArray::_OutputArray(Mat& m) : _InputArray(m) {}
_OutputArray::_OutputArray(std::vector<Mat>& vec) : _InputArray(vec) {}
_OutputArray::_OutputArray(gpu::GpuMat& d_mat) : _InputArray(d_mat) {}
_OutputArray::_OutputArray(ogl::Buffer& buf) : _InputArray(buf) {}
_OutputArray::_OutputArray(ogl::Texture2D& tex) : _InputArray(tex) {}
_OutputArray::_OutputArray(gpu::CudaMem& cuda_mem) : _InputArray(cuda_mem) {}
_OutputArray::_OutputArray(const Mat& m) : _InputArray(m) {flags |= FIXED_SIZE|FIXED_TYPE;}
_OutputArray::_OutputArray(const std::vector<Mat>& vec) : _InputArray(vec) {flags |= FIXED_SIZE;}
_OutputArray::_OutputArray(const gpu::GpuMat& d_mat) : _InputArray(d_mat) {flags |= FIXED_SIZE|FIXED_TYPE;}
_OutputArray::_OutputArray(const ogl::Buffer& buf) : _InputArray(buf) {flags |= FIXED_SIZE|FIXED_TYPE;}
_OutputArray::_OutputArray(const ogl::Texture2D& tex) : _InputArray(tex) {flags |= FIXED_SIZE|FIXED_TYPE;}
_OutputArray::_OutputArray(const gpu::CudaMem& cuda_mem) : _InputArray(cuda_mem) {flags |= FIXED_SIZE|FIXED_TYPE;}
_OutputArray::~_OutputArray() {}
bool _OutputArray::fixedSize() const
{
......@@ -1362,6 +1396,13 @@ void _OutputArray::create(Size _sz, int mtype, int i, bool allowTransposed, int
((ogl::Buffer*)obj)->create(_sz, mtype);
return;
}
if( k == CUDA_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 )
{
CV_Assert(!fixedSize() || ((gpu::CudaMem*)obj)->size() == _sz);
CV_Assert(!fixedType() || ((gpu::CudaMem*)obj)->type() == mtype);
((gpu::CudaMem*)obj)->create(_sz, mtype);
return;
}
int sizes[] = {_sz.height, _sz.width};
create(2, sizes, mtype, i, allowTransposed, fixedDepthMask);
}
......@@ -1390,6 +1431,13 @@ void _OutputArray::create(int rows, int cols, int mtype, int i, bool allowTransp
((ogl::Buffer*)obj)->create(rows, cols, mtype);
return;
}
if( k == CUDA_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 )
{
CV_Assert(!fixedSize() || ((gpu::CudaMem*)obj)->size() == Size(cols, rows));
CV_Assert(!fixedType() || ((gpu::CudaMem*)obj)->type() == mtype);
((gpu::CudaMem*)obj)->create(rows, cols, mtype);
return;
}
int sizes[] = {rows, cols};
create(2, sizes, mtype, i, allowTransposed, fixedDepthMask);
}
......@@ -1609,15 +1657,15 @@ void _OutputArray::release() const
return;
}
if( k == OPENGL_BUFFER )
if( k == CUDA_MEM )
{
((ogl::Buffer*)obj)->release();
((gpu::CudaMem*)obj)->release();
return;
}
if( k == OPENGL_TEXTURE )
if( k == OPENGL_BUFFER )
{
((ogl::Texture2D*)obj)->release();
((ogl::Buffer*)obj)->release();
return;
}
......@@ -1693,11 +1741,11 @@ ogl::Buffer& _OutputArray::getOGlBufferRef() const
return *(ogl::Buffer*)obj;
}
ogl::Texture2D& _OutputArray::getOGlTexture2DRef() const
gpu::CudaMem& _OutputArray::getCudaMemRef() const
{
int k = kind();
CV_Assert( k == OPENGL_TEXTURE );
return *(ogl::Texture2D*)obj;
CV_Assert( k == CUDA_MEM );
return *(gpu::CudaMem*)obj;
}
static _OutputArray _none;
......
此差异已折叠。
此差异已折叠。
......@@ -47,7 +47,7 @@
# error gpu.hpp header must be compiled as C++
#endif
#include "opencv2/core/gpumat.hpp"
#include "opencv2/core/gpu.hpp"
#if !defined(__OPENCV_BUILD) && !defined(OPENCV_GPU_SKIP_INCLUDE)
#include "opencv2/opencv_modules.hpp"
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册