提交 eaea6782 编写于 作者: V Vladislav Vinogradov

added more assertion on device features to gpu functions and tests

moved TargerArchs and DeviceInfo to core
fixed bug in GpuMat::copy with mask (incorrect index in function tab)
上级 e8fab91d
...@@ -50,6 +50,96 @@ ...@@ -50,6 +50,96 @@
namespace cv { namespace gpu namespace cv { namespace gpu
{ {
//////////////////////////////// Initialization & Info ////////////////////////
//! This is the only function that do not throw exceptions if the library is compiled without Cuda.
CV_EXPORTS int getCudaEnabledDeviceCount();
//! Functions below throw cv::Expception if the library is compiled without Cuda.
CV_EXPORTS void setDevice(int device);
CV_EXPORTS int getDevice();
//! Explicitly destroys and cleans up all resources associated with the current device in the current process.
//! Any subsequent API call to this device will reinitialize the device.
CV_EXPORTS void resetDevice();
enum FeatureSet
{
FEATURE_SET_COMPUTE_10 = 10,
FEATURE_SET_COMPUTE_11 = 11,
FEATURE_SET_COMPUTE_12 = 12,
FEATURE_SET_COMPUTE_13 = 13,
FEATURE_SET_COMPUTE_20 = 20,
FEATURE_SET_COMPUTE_21 = 21,
GLOBAL_ATOMICS = FEATURE_SET_COMPUTE_11,
SHARED_ATOMICS = FEATURE_SET_COMPUTE_12,
NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13
};
// Gives information about what GPU archs this OpenCV GPU module was
// compiled for
class CV_EXPORTS TargetArchs
{
public:
static bool builtWith(FeatureSet feature_set);
static bool has(int major, int minor);
static bool hasPtx(int major, int minor);
static bool hasBin(int major, int minor);
static bool hasEqualOrLessPtx(int major, int minor);
static bool hasEqualOrGreater(int major, int minor);
static bool hasEqualOrGreaterPtx(int major, int minor);
static bool hasEqualOrGreaterBin(int major, int minor);
private:
TargetArchs();
};
// Gives information about the given GPU
class CV_EXPORTS DeviceInfo
{
public:
// Creates DeviceInfo object for the current GPU
DeviceInfo() : device_id_(getDevice()) { query(); }
// Creates DeviceInfo object for the given GPU
DeviceInfo(int device_id) : device_id_(device_id) { query(); }
std::string name() const { return name_; }
// Return compute capability versions
int majorVersion() const { return majorVersion_; }
int minorVersion() const { return minorVersion_; }
int multiProcessorCount() const { return multi_processor_count_; }
size_t freeMemory() const;
size_t totalMemory() const;
// Checks whether device supports the given feature
bool supports(FeatureSet feature_set) const;
// Checks whether the GPU module can be run on the given device
bool isCompatible() const;
int deviceID() const { return device_id_; }
private:
void query();
void queryMemory(size_t& free_memory, size_t& total_memory) const;
int device_id_;
std::string name_;
int multi_processor_count_;
int majorVersion_;
int minorVersion_;
};
CV_EXPORTS void printCudaDeviceInfo(int device);
CV_EXPORTS void printShortCudaDeviceInfo(int device);
//////////////////////////////// GpuMat ///////////////////////////////
//! Smart pointer for GPU memory with reference counting. Its interface is mostly similar with cv::Mat. //! Smart pointer for GPU memory with reference counting. Its interface is mostly similar with cv::Mat.
class CV_EXPORTS GpuMat class CV_EXPORTS GpuMat
{ {
...@@ -75,7 +165,7 @@ namespace cv { namespace gpu ...@@ -75,7 +165,7 @@ namespace cv { namespace gpu
//! creates a matrix header for a part of the bigger matrix //! creates a matrix header for a part of the bigger matrix
GpuMat(const GpuMat& m, Range rowRange, Range colRange); GpuMat(const GpuMat& m, Range rowRange, Range colRange);
GpuMat(const GpuMat& m, Rect roi); GpuMat(const GpuMat& m, Rect roi);
//! builds GpuMat from Mat. Perfom blocking upload to device. //! builds GpuMat from Mat. Perfom blocking upload to device.
explicit GpuMat(const Mat& m); explicit GpuMat(const Mat& m);
...@@ -84,7 +174,7 @@ namespace cv { namespace gpu ...@@ -84,7 +174,7 @@ namespace cv { namespace gpu
//! assignment operators //! assignment operators
GpuMat& operator = (const GpuMat& m); GpuMat& operator = (const GpuMat& m);
//! pefroms blocking upload data to GpuMat. //! pefroms blocking upload data to GpuMat.
void upload(const Mat& m); void upload(const Mat& m);
...@@ -225,26 +315,26 @@ namespace cv { namespace gpu ...@@ -225,26 +315,26 @@ namespace cv { namespace gpu
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
inline GpuMat::GpuMat() inline GpuMat::GpuMat()
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
{ {
} }
inline GpuMat::GpuMat(int rows_, int cols_, int type_) inline GpuMat::GpuMat(int rows_, int cols_, int type_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
{ {
if (rows_ > 0 && cols_ > 0) if (rows_ > 0 && cols_ > 0)
create(rows_, cols_, type_); create(rows_, cols_, type_);
} }
inline GpuMat::GpuMat(Size size_, int type_) inline GpuMat::GpuMat(Size size_, int type_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
{ {
if (size_.height > 0 && size_.width > 0) if (size_.height > 0 && size_.width > 0)
create(size_.height, size_.width, type_); create(size_.height, size_.width, type_);
} }
inline GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_) inline GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
{ {
if (rows_ > 0 && cols_ > 0) if (rows_ > 0 && cols_ > 0)
...@@ -254,7 +344,7 @@ namespace cv { namespace gpu ...@@ -254,7 +344,7 @@ namespace cv { namespace gpu
} }
} }
inline GpuMat::GpuMat(Size size_, int type_, Scalar s_) inline GpuMat::GpuMat(Size size_, int type_, Scalar s_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
{ {
if (size_.height > 0 && size_.width > 0) if (size_.height > 0 && size_.width > 0)
...@@ -262,11 +352,11 @@ namespace cv { namespace gpu ...@@ -262,11 +352,11 @@ namespace cv { namespace gpu
create(size_.height, size_.width, type_); create(size_.height, size_.width, type_);
setTo(s_); setTo(s_);
} }
} }
inline GpuMat::~GpuMat() inline GpuMat::~GpuMat()
{ {
release(); release();
} }
inline GpuMat GpuMat::clone() const inline GpuMat GpuMat::clone() const
...@@ -284,14 +374,14 @@ namespace cv { namespace gpu ...@@ -284,14 +374,14 @@ namespace cv { namespace gpu
convertTo(m, type); convertTo(m, type);
} }
inline size_t GpuMat::step1() const inline size_t GpuMat::step1() const
{ {
return step / elemSize1(); return step / elemSize1();
} }
inline bool GpuMat::empty() const inline bool GpuMat::empty() const
{ {
return data == 0; return data == 0;
} }
template<typename _Tp> inline _Tp* GpuMat::ptr(int y) template<typename _Tp> inline _Tp* GpuMat::ptr(int y)
...@@ -304,89 +394,89 @@ namespace cv { namespace gpu ...@@ -304,89 +394,89 @@ namespace cv { namespace gpu
return (const _Tp*)ptr(y); return (const _Tp*)ptr(y);
} }
inline void swap(GpuMat& a, GpuMat& b) inline void swap(GpuMat& a, GpuMat& b)
{ {
a.swap(b); a.swap(b);
} }
inline GpuMat GpuMat::row(int y) const inline GpuMat GpuMat::row(int y) const
{ {
return GpuMat(*this, Range(y, y+1), Range::all()); return GpuMat(*this, Range(y, y+1), Range::all());
} }
inline GpuMat GpuMat::col(int x) const inline GpuMat GpuMat::col(int x) const
{ {
return GpuMat(*this, Range::all(), Range(x, x+1)); return GpuMat(*this, Range::all(), Range(x, x+1));
} }
inline GpuMat GpuMat::rowRange(int startrow, int endrow) const inline GpuMat GpuMat::rowRange(int startrow, int endrow) const
{ {
return GpuMat(*this, Range(startrow, endrow), Range::all()); return GpuMat(*this, Range(startrow, endrow), Range::all());
} }
inline GpuMat GpuMat::rowRange(Range r) const inline GpuMat GpuMat::rowRange(Range r) const
{ {
return GpuMat(*this, r, Range::all()); return GpuMat(*this, r, Range::all());
} }
inline GpuMat GpuMat::colRange(int startcol, int endcol) const inline GpuMat GpuMat::colRange(int startcol, int endcol) const
{ {
return GpuMat(*this, Range::all(), Range(startcol, endcol)); return GpuMat(*this, Range::all(), Range(startcol, endcol));
} }
inline GpuMat GpuMat::colRange(Range r) const inline GpuMat GpuMat::colRange(Range r) const
{ {
return GpuMat(*this, Range::all(), r); return GpuMat(*this, Range::all(), r);
} }
inline void GpuMat::create(Size size_, int type_) inline void GpuMat::create(Size size_, int type_)
{ {
create(size_.height, size_.width, type_); create(size_.height, size_.width, type_);
} }
inline GpuMat GpuMat::operator()(Range rowRange, Range colRange) const inline GpuMat GpuMat::operator()(Range rowRange, Range colRange) const
{ {
return GpuMat(*this, rowRange, colRange); return GpuMat(*this, rowRange, colRange);
} }
inline GpuMat GpuMat::operator()(Rect roi) const inline GpuMat GpuMat::operator()(Rect roi) const
{ {
return GpuMat(*this, roi); return GpuMat(*this, roi);
} }
inline bool GpuMat::isContinuous() const inline bool GpuMat::isContinuous() const
{ {
return (flags & Mat::CONTINUOUS_FLAG) != 0; return (flags & Mat::CONTINUOUS_FLAG) != 0;
} }
inline size_t GpuMat::elemSize() const inline size_t GpuMat::elemSize() const
{ {
return CV_ELEM_SIZE(flags); return CV_ELEM_SIZE(flags);
} }
inline size_t GpuMat::elemSize1() const inline size_t GpuMat::elemSize1() const
{ {
return CV_ELEM_SIZE1(flags); return CV_ELEM_SIZE1(flags);
} }
inline int GpuMat::type() const inline int GpuMat::type() const
{ {
return CV_MAT_TYPE(flags); return CV_MAT_TYPE(flags);
} }
inline int GpuMat::depth() const inline int GpuMat::depth() const
{ {
return CV_MAT_DEPTH(flags); return CV_MAT_DEPTH(flags);
} }
inline int GpuMat::channels() const inline int GpuMat::channels() const
{ {
return CV_MAT_CN(flags); return CV_MAT_CN(flags);
} }
inline Size GpuMat::size() const inline Size GpuMat::size() const
{ {
return Size(cols, rows); return Size(cols, rows);
} }
inline uchar* GpuMat::ptr(int y) inline uchar* GpuMat::ptr(int y)
...@@ -407,19 +497,19 @@ namespace cv { namespace gpu ...@@ -407,19 +497,19 @@ namespace cv { namespace gpu
return *this; return *this;
} }
template <class T> inline GpuMat::operator DevMem2D_<T>() const template <class T> inline GpuMat::operator DevMem2D_<T>() const
{ {
return DevMem2D_<T>(rows, cols, (T*)data, step); return DevMem2D_<T>(rows, cols, (T*)data, step);
} }
template <class T> inline GpuMat::operator PtrStep_<T>() const template <class T> inline GpuMat::operator PtrStep_<T>() const
{ {
return PtrStep_<T>(static_cast< DevMem2D_<T> >(*this)); return PtrStep_<T>(static_cast< DevMem2D_<T> >(*this));
} }
template <class T> inline GpuMat::operator PtrStep<T>() const template <class T> inline GpuMat::operator PtrStep<T>() const
{ {
return PtrStep<T>((T*)data, step); return PtrStep<T>((T*)data, step);
} }
inline GpuMat createContinuous(int rows, int cols, int type) inline GpuMat createContinuous(int rows, int cols, int type)
......
此差异已折叠。
...@@ -54,94 +54,6 @@ ...@@ -54,94 +54,6 @@
namespace cv { namespace gpu { namespace cv { namespace gpu {
//////////////////////////////// Initialization & Info ////////////////////////
//! This is the only function that do not throw exceptions if the library is compiled without Cuda.
CV_EXPORTS int getCudaEnabledDeviceCount();
//! Functions below throw cv::Expception if the library is compiled without Cuda.
CV_EXPORTS void setDevice(int device);
CV_EXPORTS int getDevice();
//! Explicitly destroys and cleans up all resources associated with the current device in the current process.
//! Any subsequent API call to this device will reinitialize the device.
CV_EXPORTS void resetDevice();
enum FeatureSet
{
FEATURE_SET_COMPUTE_10 = 10,
FEATURE_SET_COMPUTE_11 = 11,
FEATURE_SET_COMPUTE_12 = 12,
FEATURE_SET_COMPUTE_13 = 13,
FEATURE_SET_COMPUTE_20 = 20,
FEATURE_SET_COMPUTE_21 = 21,
GLOBAL_ATOMICS = FEATURE_SET_COMPUTE_11,
SHARED_ATOMICS = FEATURE_SET_COMPUTE_12,
NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13
};
// Gives information about what GPU archs this OpenCV GPU module was
// compiled for
class CV_EXPORTS TargetArchs
{
public:
static bool builtWith(FeatureSet feature_set);
static bool has(int major, int minor);
static bool hasPtx(int major, int minor);
static bool hasBin(int major, int minor);
static bool hasEqualOrLessPtx(int major, int minor);
static bool hasEqualOrGreater(int major, int minor);
static bool hasEqualOrGreaterPtx(int major, int minor);
static bool hasEqualOrGreaterBin(int major, int minor);
private:
TargetArchs();
};
// Gives information about the given GPU
class CV_EXPORTS DeviceInfo
{
public:
// Creates DeviceInfo object for the current GPU
DeviceInfo() : device_id_(getDevice()) { query(); }
// Creates DeviceInfo object for the given GPU
DeviceInfo(int device_id) : device_id_(device_id) { query(); }
std::string name() const { return name_; }
// Return compute capability versions
int majorVersion() const { return majorVersion_; }
int minorVersion() const { return minorVersion_; }
int multiProcessorCount() const { return multi_processor_count_; }
size_t freeMemory() const;
size_t totalMemory() const;
// Checks whether device supports the given feature
bool supports(FeatureSet feature_set) const;
// Checks whether the GPU module can be run on the given device
bool isCompatible() const;
int deviceID() const { return device_id_; }
private:
void query();
void queryMemory(size_t& free_memory, size_t& total_memory) const;
int device_id_;
std::string name_;
int multi_processor_count_;
int majorVersion_;
int minorVersion_;
};
CV_EXPORTS void printCudaDeviceInfo(int device);
CV_EXPORTS void printShortCudaDeviceInfo(int device);
//////////////////////////////// CudaMem //////////////////////////////// //////////////////////////////// CudaMem ////////////////////////////////
// CudaMem is limited cv::Mat with page locked memory allocation. // CudaMem is limited cv::Mat with page locked memory allocation.
// Page locked memory is only needed for async and faster coping to GPU. // Page locked memory is only needed for async and faster coping to GPU.
......
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::gpu;
namespace
{
// Compares value to set using the given comparator. Returns true if
// there is at least one element x in the set satisfying to: x cmp value
// predicate.
template <typename Comparer>
bool compareToSet(const std::string& set_as_str, int value, Comparer cmp)
{
if (set_as_str.find_first_not_of(" ") == string::npos)
return false;
std::stringstream stream(set_as_str);
int cur_value;
while (!stream.eof())
{
stream >> cur_value;
if (cmp(cur_value, value))
return true;
}
return false;
}
}
bool cv::gpu::TargetArchs::builtWith(cv::gpu::FeatureSet feature_set)
{
#if defined (HAVE_CUDA)
return ::compareToSet(CUDA_ARCH_FEATURES, feature_set, std::greater_equal<int>());
#else
(void)feature_set;
return false;
#endif
}
bool cv::gpu::TargetArchs::has(int major, int minor)
{
return hasPtx(major, minor) || hasBin(major, minor);
}
bool cv::gpu::TargetArchs::hasPtx(int major, int minor)
{
#if defined (HAVE_CUDA)
return ::compareToSet(CUDA_ARCH_PTX, major * 10 + minor, std::equal_to<int>());
#else
(void)major;
(void)minor;
return false;
#endif
}
bool cv::gpu::TargetArchs::hasBin(int major, int minor)
{
#if defined (HAVE_CUDA)
return ::compareToSet(CUDA_ARCH_BIN, major * 10 + minor, std::equal_to<int>());
#else
(void)major;
(void)minor;
return false;
#endif
}
bool cv::gpu::TargetArchs::hasEqualOrLessPtx(int major, int minor)
{
#if defined (HAVE_CUDA)
return ::compareToSet(CUDA_ARCH_PTX, major * 10 + minor,
std::less_equal<int>());
#else
(void)major;
(void)minor;
return false;
#endif
}
bool cv::gpu::TargetArchs::hasEqualOrGreater(int major, int minor)
{
return hasEqualOrGreaterPtx(major, minor) ||
hasEqualOrGreaterBin(major, minor);
}
bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int major, int minor)
{
#if defined (HAVE_CUDA)
return ::compareToSet(CUDA_ARCH_PTX, major * 10 + minor,
std::greater_equal<int>());
#else
(void)major;
(void)minor;
return false;
#endif
}
bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int major, int minor)
{
#if defined (HAVE_CUDA)
return ::compareToSet(CUDA_ARCH_BIN, major * 10 + minor,
std::greater_equal<int>());
#else
(void)major;
(void)minor;
return false;
#endif
}
#if !defined (HAVE_CUDA)
int cv::gpu::getCudaEnabledDeviceCount() { return 0; }
void cv::gpu::setDevice(int) { throw_nogpu(); }
int cv::gpu::getDevice() { throw_nogpu(); return 0; }
void cv::gpu::resetDevice() { throw_nogpu(); }
size_t cv::gpu::DeviceInfo::freeMemory() const { throw_nogpu(); return 0; }
size_t cv::gpu::DeviceInfo::totalMemory() const { throw_nogpu(); return 0; }
bool cv::gpu::DeviceInfo::supports(cv::gpu::FeatureSet) const { throw_nogpu(); return false; }
bool cv::gpu::DeviceInfo::isCompatible() const { throw_nogpu(); return false; }
void cv::gpu::DeviceInfo::query() { throw_nogpu(); }
void cv::gpu::DeviceInfo::queryMemory(size_t&, size_t&) const { throw_nogpu(); }
void cv::gpu::printCudaDeviceInfo(int) { throw_nogpu(); }
void cv::gpu::printShortCudaDeviceInfo(int) { throw_nogpu(); }
#else /* !defined (HAVE_CUDA) */
int cv::gpu::getCudaEnabledDeviceCount()
{
int count;
cudaError_t error = cudaGetDeviceCount( &count );
if (error == cudaErrorInsufficientDriver)
return -1;
if (error == cudaErrorNoDevice)
return 0;
cudaSafeCall(error);
return count;
}
void cv::gpu::setDevice(int device)
{
cudaSafeCall( cudaSetDevice( device ) );
}
int cv::gpu::getDevice()
{
int device;
cudaSafeCall( cudaGetDevice( &device ) );
return device;
}
void cv::gpu::resetDevice()
{
cudaSafeCall( cudaDeviceReset() );
}
size_t cv::gpu::DeviceInfo::freeMemory() const
{
size_t free_memory, total_memory;
queryMemory(free_memory, total_memory);
return free_memory;
}
size_t cv::gpu::DeviceInfo::totalMemory() const
{
size_t free_memory, total_memory;
queryMemory(free_memory, total_memory);
return total_memory;
}
bool cv::gpu::DeviceInfo::supports(cv::gpu::FeatureSet feature_set) const
{
int version = majorVersion() * 10 + minorVersion();
return version >= feature_set;
}
bool cv::gpu::DeviceInfo::isCompatible() const
{
// Check PTX compatibility
if (TargetArchs::hasEqualOrLessPtx(majorVersion(), minorVersion()))
return true;
// Check BIN compatibility
for (int i = minorVersion(); i >= 0; --i)
if (TargetArchs::hasBin(majorVersion(), i))
return true;
return false;
}
void cv::gpu::DeviceInfo::query()
{
cudaDeviceProp prop;
cudaSafeCall(cudaGetDeviceProperties(&prop, device_id_));
name_ = prop.name;
multi_processor_count_ = prop.multiProcessorCount;
majorVersion_ = prop.major;
minorVersion_ = prop.minor;
}
void cv::gpu::DeviceInfo::queryMemory(size_t& free_memory, size_t& total_memory) const
{
int prev_device_id = getDevice();
if (prev_device_id != device_id_)
setDevice(device_id_);
cudaSafeCall(cudaMemGetInfo(&free_memory, &total_memory));
if (prev_device_id != device_id_)
setDevice(prev_device_id);
}
namespace
{
template <class T> void getCudaAttribute(T *attribute, CUdevice_attribute device_attribute, int device)
{
*attribute = T();
CUresult error = CUDA_SUCCESS;// = cuDeviceGetAttribute( attribute, device_attribute, device ); why link erros under ubuntu??
if( CUDA_SUCCESS == error )
return;
printf("Driver API error = %04d\n", error);
cv::gpu::error("driver API error", __FILE__, __LINE__);
}
int convertSMVer2Cores(int major, int minor)
{
// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
typedef struct {
int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
int Cores;
} SMtoCores;
SMtoCores gpuArchCoresPerSM[] = { { 0x10, 8 }, { 0x11, 8 }, { 0x12, 8 }, { 0x13, 8 }, { 0x20, 32 }, { 0x21, 48 }, { -1, -1 } };
int index = 0;
while (gpuArchCoresPerSM[index].SM != -1)
{
if (gpuArchCoresPerSM[index].SM == ((major << 4) + minor) )
return gpuArchCoresPerSM[index].Cores;
index++;
}
printf("MapSMtoCores undefined SMversion %d.%d!\n", major, minor);
return -1;
}
}
void cv::gpu::printCudaDeviceInfo(int device)
{
int count = getCudaEnabledDeviceCount();
bool valid = (device >= 0) && (device < count);
int beg = valid ? device : 0;
int end = valid ? device+1 : count;
printf("*** CUDA Device Query (Runtime API) version (CUDART static linking) *** \n\n");
printf("Device count: %d\n", count);
int driverVersion = 0, runtimeVersion = 0;
cudaSafeCall( cudaDriverGetVersion(&driverVersion) );
cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
const char *computeMode[] = {
"Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)",
"Exclusive (only one host thread in one process is able to use ::cudaSetDevice() with this device)",
"Prohibited (no host thread can use ::cudaSetDevice() with this device)",
"Exclusive Process (many threads in one process is able to use ::cudaSetDevice() with this device)",
"Unknown",
NULL
};
for(int dev = beg; dev < end; ++dev)
{
cudaDeviceProp prop;
cudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
printf("\nDevice %d: \"%s\"\n", dev, prop.name);
printf(" CUDA Driver Version / Runtime Version %d.%d / %d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100);
printf(" CUDA Capability Major/Minor version number: %d.%d\n", prop.major, prop.minor);
printf(" Total amount of global memory: %.0f MBytes (%llu bytes)\n", (float)prop.totalGlobalMem/1048576.0f, (unsigned long long) prop.totalGlobalMem);
printf(" (%2d) Multiprocessors x (%2d) CUDA Cores/MP: %d CUDA Cores\n",
prop.multiProcessorCount, convertSMVer2Cores(prop.major, prop.minor),
convertSMVer2Cores(prop.major, prop.minor) * prop.multiProcessorCount);
printf(" GPU Clock Speed: %.2f GHz\n", prop.clockRate * 1e-6f);
// This is not available in the CUDA Runtime API, so we make the necessary calls the driver API to support this for output
int memoryClock, memBusWidth, L2CacheSize;
getCudaAttribute<int>( &memoryClock, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, dev );
getCudaAttribute<int>( &memBusWidth, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, dev );
getCudaAttribute<int>( &L2CacheSize, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, dev );
printf(" Memory Clock rate: %.2f Mhz\n", memoryClock * 1e-3f);
printf(" Memory Bus Width: %d-bit\n", memBusWidth);
if (L2CacheSize)
printf(" L2 Cache Size: %d bytes\n", L2CacheSize);
printf(" Max Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d,%d), 3D=(%d,%d,%d)\n",
prop.maxTexture1D, prop.maxTexture2D[0], prop.maxTexture2D[1],
prop.maxTexture3D[0], prop.maxTexture3D[1], prop.maxTexture3D[2]);
printf(" Max Layered Texture Size (dim) x layers 1D=(%d) x %d, 2D=(%d,%d) x %d\n",
prop.maxTexture1DLayered[0], prop.maxTexture1DLayered[1],
prop.maxTexture2DLayered[0], prop.maxTexture2DLayered[1], prop.maxTexture2DLayered[2]);
printf(" Total amount of constant memory: %u bytes\n", (int)prop.totalConstMem);
printf(" Total amount of shared memory per block: %u bytes\n", (int)prop.sharedMemPerBlock);
printf(" Total number of registers available per block: %d\n", prop.regsPerBlock);
printf(" Warp size: %d\n", prop.warpSize);
printf(" Maximum number of threads per block: %d\n", prop.maxThreadsPerBlock);
printf(" Maximum sizes of each dimension of a block: %d x %d x %d\n", prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2]);
printf(" Maximum sizes of each dimension of a grid: %d x %d x %d\n", prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2]);
printf(" Maximum memory pitch: %u bytes\n", (int)prop.memPitch);
printf(" Texture alignment: %u bytes\n", (int)prop.textureAlignment);
printf(" Concurrent copy and execution: %s with %d copy engine(s)\n", (prop.deviceOverlap ? "Yes" : "No"), prop.asyncEngineCount);
printf(" Run time limit on kernels: %s\n", prop.kernelExecTimeoutEnabled ? "Yes" : "No");
printf(" Integrated GPU sharing Host Memory: %s\n", prop.integrated ? "Yes" : "No");
printf(" Support host page-locked memory mapping: %s\n", prop.canMapHostMemory ? "Yes" : "No");
printf(" Concurrent kernel execution: %s\n", prop.concurrentKernels ? "Yes" : "No");
printf(" Alignment requirement for Surfaces: %s\n", prop.surfaceAlignment ? "Yes" : "No");
printf(" Device has ECC support enabled: %s\n", prop.ECCEnabled ? "Yes" : "No");
printf(" Device is using TCC driver mode: %s\n", prop.tccDriver ? "Yes" : "No");
printf(" Device supports Unified Addressing (UVA): %s\n", prop.unifiedAddressing ? "Yes" : "No");
printf(" Device PCI Bus ID / PCI location ID: %d / %d\n", prop.pciBusID, prop.pciDeviceID );
printf(" Compute Mode:\n");
printf(" %s \n", computeMode[prop.computeMode]);
}
printf("\n");
printf("deviceQuery, CUDA Driver = CUDART");
printf(", CUDA Driver Version = %d.%d", driverVersion / 1000, driverVersion % 100);
printf(", CUDA Runtime Version = %d.%d", runtimeVersion/1000, runtimeVersion%100);
printf(", NumDevs = %d\n\n", count);
fflush(stdout);
}
void cv::gpu::printShortCudaDeviceInfo(int device)
{
int count = getCudaEnabledDeviceCount();
bool valid = (device >= 0) && (device < count);
int beg = valid ? device : 0;
int end = valid ? device+1 : count;
int driverVersion = 0, runtimeVersion = 0;
cudaSafeCall( cudaDriverGetVersion(&driverVersion) );
cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
for(int dev = beg; dev < end; ++dev)
{
cudaDeviceProp prop;
cudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
const char *arch_str = prop.major < 2 ? " (not Fermi)" : "";
printf("Device %d: \"%s\" %.0fMb", dev, prop.name, (float)prop.totalGlobalMem/1048576.0f);
printf(", sm_%d%d%s, %d cores", prop.major, prop.minor, arch_str, convertSMVer2Cores(prop.major, prop.minor) * prop.multiProcessorCount);
printf(", Driver/Runtime ver.%d.%d/%d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100);
}
fflush(stdout);
}
#endif
...@@ -118,6 +118,9 @@ void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev, GpuMat ...@@ -118,6 +118,9 @@ void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev, GpuMat
{ {
CV_Assert(src.type() == CV_8UC1); CV_Assert(src.type() == CV_8UC1);
if (!TargetArchs::builtWith(FEATURE_SET_COMPUTE_13) || !DeviceInfo().supports(FEATURE_SET_COMPUTE_13))
CV_Error(CV_StsNotImplemented, "Not sufficient compute capebility");
NppiSize sz; NppiSize sz;
sz.width = src.cols; sz.width = src.cols;
sz.height = src.rows; sz.height = src.rows;
......
...@@ -55,10 +55,10 @@ void cv::gpu::split(const GpuMat& /*src*/, vector<GpuMat>& /*dst*/, Stream& /*st ...@@ -55,10 +55,10 @@ void cv::gpu::split(const GpuMat& /*src*/, vector<GpuMat>& /*dst*/, Stream& /*st
#else /* !defined (HAVE_CUDA) */ #else /* !defined (HAVE_CUDA) */
namespace cv { namespace gpu { namespace device namespace cv { namespace gpu { namespace device
{ {
namespace split_merge namespace split_merge
{ {
void merge_caller(const DevMem2Db* src, DevMem2Db& dst, int total_channels, size_t elem_size, const cudaStream_t& stream); void merge_caller(const DevMem2Db* src, DevMem2Db& dst, int total_channels, size_t elem_size, const cudaStream_t& stream);
void split_caller(const DevMem2Db& src, DevMem2Db* dst, int num_channels, size_t elem_size1, const cudaStream_t& stream); void split_caller(const DevMem2Db& src, DevMem2Db* dst, int num_channels, size_t elem_size1, const cudaStream_t& stream);
} }
...@@ -66,7 +66,7 @@ namespace cv { namespace gpu { namespace device ...@@ -66,7 +66,7 @@ namespace cv { namespace gpu { namespace device
namespace namespace
{ {
void merge(const GpuMat* src, size_t n, GpuMat& dst, const cudaStream_t& stream) void merge(const GpuMat* src, size_t n, GpuMat& dst, const cudaStream_t& stream)
{ {
using namespace ::cv::gpu::device::split_merge; using namespace ::cv::gpu::device::split_merge;
...@@ -76,6 +76,12 @@ namespace ...@@ -76,6 +76,12 @@ namespace
int depth = src[0].depth(); int depth = src[0].depth();
Size size = src[0].size(); Size size = src[0].size();
if (depth == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
bool single_channel_only = true; bool single_channel_only = true;
int total_channels = 0; int total_channels = 0;
...@@ -90,9 +96,9 @@ namespace ...@@ -90,9 +96,9 @@ namespace
CV_Assert(single_channel_only); CV_Assert(single_channel_only);
CV_Assert(total_channels <= 4); CV_Assert(total_channels <= 4);
if (total_channels == 1) if (total_channels == 1)
src[0].copyTo(dst); src[0].copyTo(dst);
else else
{ {
dst.create(size, CV_MAKETYPE(depth, total_channels)); dst.create(size, CV_MAKETYPE(depth, total_channels));
...@@ -102,10 +108,10 @@ namespace ...@@ -102,10 +108,10 @@ namespace
DevMem2Db dst_as_devmem(dst); DevMem2Db dst_as_devmem(dst);
merge_caller(src_as_devmem, dst_as_devmem, total_channels, CV_ELEM_SIZE(depth), stream); merge_caller(src_as_devmem, dst_as_devmem, total_channels, CV_ELEM_SIZE(depth), stream);
} }
} }
void split(const GpuMat& src, GpuMat* dst, const cudaStream_t& stream) void split(const GpuMat& src, GpuMat* dst, const cudaStream_t& stream)
{ {
using namespace ::cv::gpu::device::split_merge; using namespace ::cv::gpu::device::split_merge;
...@@ -115,6 +121,12 @@ namespace ...@@ -115,6 +121,12 @@ namespace
int num_channels = src.channels(); int num_channels = src.channels();
Size size = src.size(); Size size = src.size();
if (depth == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
if (num_channels == 1) if (num_channels == 1)
{ {
src.copyTo(dst[0]); src.copyTo(dst[0]);
...@@ -135,23 +147,23 @@ namespace ...@@ -135,23 +147,23 @@ namespace
} }
} }
void cv::gpu::merge(const GpuMat* src, size_t n, GpuMat& dst, Stream& stream) void cv::gpu::merge(const GpuMat* src, size_t n, GpuMat& dst, Stream& stream)
{ {
::merge(src, n, dst, StreamAccessor::getStream(stream)); ::merge(src, n, dst, StreamAccessor::getStream(stream));
} }
void cv::gpu::merge(const vector<GpuMat>& src, GpuMat& dst, Stream& stream) void cv::gpu::merge(const vector<GpuMat>& src, GpuMat& dst, Stream& stream)
{ {
::merge(&src[0], src.size(), dst, StreamAccessor::getStream(stream)); ::merge(&src[0], src.size(), dst, StreamAccessor::getStream(stream));
} }
void cv::gpu::split(const GpuMat& src, GpuMat* dst, Stream& stream) void cv::gpu::split(const GpuMat& src, GpuMat* dst, Stream& stream)
{ {
::split(src, dst, StreamAccessor::getStream(stream)); ::split(src, dst, StreamAccessor::getStream(stream));
} }
void cv::gpu::split(const GpuMat& src, vector<GpuMat>& dst, Stream& stream) void cv::gpu::split(const GpuMat& src, vector<GpuMat>& dst, Stream& stream)
{ {
dst.resize(src.channels()); dst.resize(src.channels());
if(src.channels() > 0) if(src.channels() > 0)
......
...@@ -43,6 +43,138 @@ ...@@ -43,6 +43,138 @@
namespace { namespace {
////////////////////////////////////////////////////////////////////////////////
// Merge
PARAM_TEST_CASE(Merge, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, UseRoi)
{
cv::gpu::DeviceInfo devInfo;
cv::Size size;
int depth;
int channels;
bool useRoi;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
size = GET_PARAM(1);
depth = GET_PARAM(2);
channels = GET_PARAM(3);
useRoi = GET_PARAM(4);
cv::gpu::setDevice(devInfo.deviceID());
}
};
TEST_P(Merge, Accuracy)
{
std::vector<cv::Mat> src;
src.reserve(channels);
for (int i = 0; i < channels; ++i)
src.push_back(cv::Mat(size, depth, cv::Scalar::all(i)));
std::vector<cv::gpu::GpuMat> d_src;
for (int i = 0; i < channels; ++i)
d_src.push_back(loadMat(src[i], useRoi));
if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
cv::gpu::GpuMat dst;
cv::gpu::merge(d_src, dst);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat dst;
cv::gpu::merge(d_src, dst);
cv::Mat dst_gold;
cv::merge(src, dst_gold);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
}
INSTANTIATE_TEST_CASE_P(GPU_Core, Merge, testing::Combine(
ALL_DEVICES,
DIFFERENT_SIZES,
ALL_DEPTH,
testing::Values(1, 2, 3, 4),
WHOLE_SUBMAT));
////////////////////////////////////////////////////////////////////////////////
// Split
PARAM_TEST_CASE(Split, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, UseRoi)
{
cv::gpu::DeviceInfo devInfo;
cv::Size size;
int depth;
int channels;
bool useRoi;
int type;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
size = GET_PARAM(1);
depth = GET_PARAM(2);
channels = GET_PARAM(3);
useRoi = GET_PARAM(4);
cv::gpu::setDevice(devInfo.deviceID());
type = CV_MAKE_TYPE(depth, channels);
}
};
TEST_P(Split, Accuracy)
{
cv::Mat src = randomMat(size, type);
if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
std::vector<cv::gpu::GpuMat> dst;
cv::gpu::split(loadMat(src), dst);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
std::vector<cv::gpu::GpuMat> dst;
cv::gpu::split(loadMat(src, useRoi), dst);
std::vector<cv::Mat> dst_gold;
cv::split(src, dst_gold);
ASSERT_EQ(dst_gold.size(), dst.size());
for (size_t i = 0; i < dst_gold.size(); ++i)
{
EXPECT_MAT_NEAR(dst_gold[i], dst[i], 0.0);
}
}
}
INSTANTIATE_TEST_CASE_P(GPU_Core, Split, testing::Combine(
ALL_DEVICES,
DIFFERENT_SIZES,
ALL_DEPTH,
testing::Values(1, 2, 3, 4),
WHOLE_SUBMAT));
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Add_Array // Add_Array
...@@ -1974,7 +2106,7 @@ TEST_P(AddWeighted, Accuracy) ...@@ -1974,7 +2106,7 @@ TEST_P(AddWeighted, Accuracy)
cv::Mat dst_gold; cv::Mat dst_gold;
cv::addWeighted(src1, alpha, src2, beta, gamma, dst_gold, dst_depth); cv::addWeighted(src1, alpha, src2, beta, gamma, dst_gold, dst_depth);
EXPECT_MAT_NEAR(dst_gold, dst, dst_depth < CV_32F ? 1.0 : 1e-12); EXPECT_MAT_NEAR(dst_gold, dst, dst_depth < CV_32F ? 1.0 : 1e-3);
} }
} }
...@@ -2487,16 +2619,32 @@ TEST_P(MeanStdDev, Accuracy) ...@@ -2487,16 +2619,32 @@ TEST_P(MeanStdDev, Accuracy)
{ {
cv::Mat src = randomMat(size, CV_8UC1); cv::Mat src = randomMat(size, CV_8UC1);
cv::Scalar mean; if (!supportFeature(devInfo, cv::gpu::FEATURE_SET_COMPUTE_13))
cv::Scalar stddev; {
cv::gpu::meanStdDev(loadMat(src, useRoi), mean, stddev); try
{
cv::Scalar mean;
cv::Scalar stddev;
cv::gpu::meanStdDev(loadMat(src, useRoi), mean, stddev);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsNotImplemented, e.code);
}
}
else
{
cv::Scalar mean;
cv::Scalar stddev;
cv::gpu::meanStdDev(loadMat(src, useRoi), mean, stddev);
cv::Scalar mean_gold; cv::Scalar mean_gold;
cv::Scalar stddev_gold; cv::Scalar stddev_gold;
cv::meanStdDev(src, mean_gold, stddev_gold); cv::meanStdDev(src, mean_gold, stddev_gold);
EXPECT_SCALAR_NEAR(mean_gold, mean, 1e-5); EXPECT_SCALAR_NEAR(mean_gold, mean, 1e-5);
EXPECT_SCALAR_NEAR(stddev_gold, stddev, 1e-5); EXPECT_SCALAR_NEAR(stddev_gold, stddev, 1e-5);
}
} }
INSTANTIATE_TEST_CASE_P(GPU_Core, MeanStdDev, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, MeanStdDev, testing::Combine(
......
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other GpuMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
namespace {
////////////////////////////////////////////////////////////////////////////////
// SetTo
PARAM_TEST_CASE(SetTo, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
{
cv::gpu::DeviceInfo devInfo;
cv::Size size;
int type;
bool useRoi;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
size = GET_PARAM(1);
type = GET_PARAM(2);
useRoi = GET_PARAM(3);
cv::gpu::setDevice(devInfo.deviceID());
}
};
TEST_P(SetTo, Zero)
{
cv::Scalar zero = cv::Scalar::all(0);
cv::gpu::GpuMat mat = createMat(size, type, useRoi);
mat.setTo(zero);
EXPECT_MAT_NEAR(cv::Mat::zeros(size, type), mat, 0.0);
}
TEST_P(SetTo, SameVal)
{
cv::Scalar val = cv::Scalar::all(randomDouble(0.0, 255.0));
if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
cv::gpu::GpuMat mat = createMat(size, type, useRoi);
mat.setTo(val);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat mat = createMat(size, type, useRoi);
mat.setTo(val);
EXPECT_MAT_NEAR(cv::Mat(size, type, val), mat, 0.0);
}
}
TEST_P(SetTo, DifferentVal)
{
cv::Scalar val = randomScalar(0.0, 255.0);
if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
cv::gpu::GpuMat mat = createMat(size, type, useRoi);
mat.setTo(val);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat mat = createMat(size, type, useRoi);
mat.setTo(val);
EXPECT_MAT_NEAR(cv::Mat(size, type, val), mat, 0.0);
}
}
TEST_P(SetTo, Masked)
{
cv::Scalar val = randomScalar(0.0, 255.0);
cv::Mat mat_gold = randomMat(size, type);
cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0);
if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
cv::gpu::GpuMat mat = createMat(size, type, useRoi);
mat.setTo(val, loadMat(mask));
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat mat = loadMat(mat_gold, useRoi);
mat.setTo(val, loadMat(mask, useRoi));
mat_gold.setTo(val, mask);
EXPECT_MAT_NEAR(mat_gold, mat, 0.0);
}
}
INSTANTIATE_TEST_CASE_P(GPU_GpuMat, SetTo, testing::Combine(
ALL_DEVICES,
DIFFERENT_SIZES,
ALL_TYPES,
WHOLE_SUBMAT));
////////////////////////////////////////////////////////////////////////////////
// CopyTo
PARAM_TEST_CASE(CopyTo, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
{
cv::gpu::DeviceInfo devInfo;
cv::Size size;
int type;
bool useRoi;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
size = GET_PARAM(1);
type = GET_PARAM(2);
useRoi = GET_PARAM(3);
cv::gpu::setDevice(devInfo.deviceID());
}
};
TEST_P(CopyTo, WithOutMask)
{
cv::Mat src = randomMat(size, type);
cv::gpu::GpuMat d_src = loadMat(src, useRoi);
cv::gpu::GpuMat dst = createMat(size, type, useRoi);
d_src.copyTo(dst);
EXPECT_MAT_NEAR(src, dst, 0.0);
}
TEST_P(CopyTo, Masked)
{
cv::Mat src = randomMat(size, type);
cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0);
if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
cv::gpu::GpuMat d_src = loadMat(src);
cv::gpu::GpuMat dst;
d_src.copyTo(dst, loadMat(mask, useRoi));
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat d_src = loadMat(src, useRoi);
cv::gpu::GpuMat dst = loadMat(cv::Mat::zeros(size, type), useRoi);
d_src.copyTo(dst, loadMat(mask, useRoi));
cv::Mat dst_gold = cv::Mat::zeros(size, type);
src.copyTo(dst_gold, mask);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
}
INSTANTIATE_TEST_CASE_P(GPU_GpuMat, CopyTo, testing::Combine(
ALL_DEVICES,
DIFFERENT_SIZES,
ALL_TYPES,
WHOLE_SUBMAT));
////////////////////////////////////////////////////////////////////////////////
// ConvertTo
PARAM_TEST_CASE(ConvertTo, cv::gpu::DeviceInfo, cv::Size, MatDepth, MatDepth, UseRoi)
{
cv::gpu::DeviceInfo devInfo;
cv::Size size;
int depth1;
int depth2;
bool useRoi;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
size = GET_PARAM(1);
depth1 = GET_PARAM(2);
depth2 = GET_PARAM(3);
useRoi = GET_PARAM(4);
cv::gpu::setDevice(devInfo.deviceID());
}
};
TEST_P(ConvertTo, WithOutScaling)
{
cv::Mat src = randomMat(size, depth1);
if ((depth1 == CV_64F || depth2 == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
cv::gpu::GpuMat d_src = loadMat(src);
cv::gpu::GpuMat dst;
d_src.convertTo(dst, depth2);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat d_src = loadMat(src, useRoi);
cv::gpu::GpuMat dst = createMat(size, depth2, useRoi);
d_src.convertTo(dst, depth2);
cv::Mat dst_gold;
src.convertTo(dst_gold, depth2);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
}
TEST_P(ConvertTo, WithScaling)
{
cv::Mat src = randomMat(size, depth1);
double a = randomDouble(0.0, 1.0);
double b = randomDouble(-10.0, 10.0);
if ((depth1 == CV_64F || depth2 == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
cv::gpu::GpuMat d_src = loadMat(src);
cv::gpu::GpuMat dst;
d_src.convertTo(dst, depth2, a, b);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat d_src = loadMat(src, useRoi);
cv::gpu::GpuMat dst = createMat(size, depth2, useRoi);
d_src.convertTo(dst, depth2, a, b);
cv::Mat dst_gold;
src.convertTo(dst_gold, depth2, a, b);
EXPECT_MAT_NEAR(dst_gold, dst, depth2 < CV_32F ? 0.0 : 1e-4);
}
}
INSTANTIATE_TEST_CASE_P(GPU_GpuMat, ConvertTo, testing::Combine(
ALL_DEVICES,
DIFFERENT_SIZES,
ALL_DEPTH,
ALL_DEPTH,
WHOLE_SUBMAT));
} // namespace
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other GpuMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#ifdef HAVE_CUDA
using namespace cvtest;
using namespace testing;
////////////////////////////////////////////////////////////////////////////////
// merge
PARAM_TEST_CASE(Merge, cv::gpu::DeviceInfo, MatType, UseRoi)
{
cv::gpu::DeviceInfo devInfo;
int type;
bool useRoi;
cv::Size size;
std::vector<cv::Mat> src;
cv::Mat dst_gold;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
type = GET_PARAM(1);
useRoi = GET_PARAM(2);
cv::gpu::setDevice(devInfo.deviceID());
cv::RNG& rng = TS::ptr()->get_rng();
size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150));
int depth = CV_MAT_DEPTH(type);
int num_channels = CV_MAT_CN(type);
src.reserve(num_channels);
for (int i = 0; i < num_channels; ++i)
src.push_back(cv::Mat(size, depth, cv::Scalar::all(i)));
cv::merge(src, dst_gold);
}
};
TEST_P(Merge, Accuracy)
{
if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
return;
cv::Mat dst;
std::vector<cv::gpu::GpuMat> dev_src;
cv::gpu::GpuMat dev_dst;
for (size_t i = 0; i < src.size(); ++i)
dev_src.push_back(loadMat(src[i], useRoi));
cv::gpu::merge(dev_src, dev_dst);
dev_dst.download(dst);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
INSTANTIATE_TEST_CASE_P(MatOp, Merge, Combine(
ALL_DEVICES,
ALL_TYPES,
WHOLE_SUBMAT));
////////////////////////////////////////////////////////////////////////////////
// split
PARAM_TEST_CASE(Split, cv::gpu::DeviceInfo, MatType, UseRoi)
{
cv::gpu::DeviceInfo devInfo;
int type;
bool useRoi;
cv::Size size;
cv::Mat src;
std::vector<cv::Mat> dst_gold;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
type = GET_PARAM(1);
useRoi = GET_PARAM(2);
cv::gpu::setDevice(devInfo.deviceID());
cv::RNG& rng = TS::ptr()->get_rng();
size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150));
src.create(size, type);
src.setTo(cv::Scalar(1.0, 2.0, 3.0, 4.0));
cv::split(src, dst_gold);
}
};
TEST_P(Split, Accuracy)
{
if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
return;
std::vector<cv::Mat> dst;
std::vector<cv::gpu::GpuMat> dev_dst;
cv::gpu::split(loadMat(src, useRoi), dev_dst);
dst.resize(dev_dst.size());
for (size_t i = 0; i < dev_dst.size(); ++i)
dev_dst[i].download(dst[i]);
ASSERT_EQ(dst_gold.size(), dst.size());
for (size_t i = 0; i < dst_gold.size(); ++i)
{
EXPECT_MAT_NEAR(dst_gold[i], dst[i], 0.0);
}
}
INSTANTIATE_TEST_CASE_P(MatOp, Split, Combine(
ALL_DEVICES,
ALL_TYPES,
WHOLE_SUBMAT));
////////////////////////////////////////////////////////////////////////////////
// split_merge_consistency
PARAM_TEST_CASE(SplitMerge, cv::gpu::DeviceInfo, MatType)
{
cv::gpu::DeviceInfo devInfo;
int type;
cv::Size size;
cv::Mat orig;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
type = GET_PARAM(1);
cv::gpu::setDevice(devInfo.deviceID());
cv::RNG& rng = TS::ptr()->get_rng();
size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150));
orig.create(size, type);
orig.setTo(cv::Scalar(1.0, 2.0, 3.0, 4.0));
}
};
TEST_P(SplitMerge, Consistency)
{
if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
return;
cv::Mat final;
std::vector<cv::gpu::GpuMat> dev_vec;
cv::gpu::GpuMat dev_final;
cv::gpu::split(loadMat(orig), dev_vec);
cv::gpu::merge(dev_vec, dev_final);
dev_final.download(final);
EXPECT_MAT_NEAR(orig, final, 0.0);
}
INSTANTIATE_TEST_CASE_P(MatOp, SplitMerge, Combine(
ALL_DEVICES,
ALL_TYPES));
////////////////////////////////////////////////////////////////////////////////
// setTo
PARAM_TEST_CASE(SetTo, cv::gpu::DeviceInfo, MatType, UseRoi)
{
cv::gpu::DeviceInfo devInfo;
int type;
bool useRoi;
cv::Size size;
cv::Mat mat_gold;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
type = GET_PARAM(1);
useRoi = GET_PARAM(2);
cv::gpu::setDevice(devInfo.deviceID());
cv::RNG& rng = TS::ptr()->get_rng();
size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150));
mat_gold.create(size, type);
}
};
TEST_P(SetTo, Zero)
{
if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
return;
cv::Scalar zero = cv::Scalar::all(0);
cv::Mat mat;
cv::gpu::GpuMat dev_mat = loadMat(mat_gold, useRoi);
mat_gold.setTo(zero);
dev_mat.setTo(zero);
dev_mat.download(mat);
EXPECT_MAT_NEAR(mat_gold, mat, 0.0);
}
TEST_P(SetTo, SameVal)
{
if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
return;
cv::Scalar s = cv::Scalar::all(1);
cv::Mat mat;
cv::gpu::GpuMat dev_mat(mat_gold);
mat_gold.setTo(s);
dev_mat.setTo(s);
dev_mat.download(mat);
EXPECT_MAT_NEAR(mat_gold, mat, 0.0);
}
TEST_P(SetTo, DifferentVal)
{
if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
return;
cv::Scalar s = cv::Scalar(1, 2, 3, 4);
cv::Mat mat;
cv::gpu::GpuMat dev_mat = loadMat(mat_gold, useRoi);
mat_gold.setTo(s);
dev_mat.setTo(s);
dev_mat.download(mat);
EXPECT_MAT_NEAR(mat_gold, mat, 0.0);
}
TEST_P(SetTo, Masked)
{
if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
return;
cv::Scalar s = cv::Scalar(1, 2, 3, 4);
cv::RNG& rng = TS::ptr()->get_rng();
cv::Mat mask = randomMat(rng, mat_gold.size(), CV_8UC1, 0.0, 1.5, false);
cv::Mat mat;
cv::gpu::GpuMat dev_mat = loadMat(mat_gold, useRoi);
mat_gold.setTo(s, mask);
dev_mat.setTo(s, loadMat(mask, useRoi));
dev_mat.download(mat);
EXPECT_MAT_NEAR(mat_gold, mat, 0.0);
}
INSTANTIATE_TEST_CASE_P(MatOp, SetTo, Combine(
ALL_DEVICES,
ALL_TYPES,
WHOLE_SUBMAT));
////////////////////////////////////////////////////////////////////////////////
// copyTo
PARAM_TEST_CASE(CopyTo, cv::gpu::DeviceInfo, MatType, UseRoi)
{
cv::gpu::DeviceInfo devInfo;
int type;
bool useRoi;
cv::Size size;
cv::Mat src;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
type = GET_PARAM(1);
useRoi = GET_PARAM(2);
cv::gpu::setDevice(devInfo.deviceID());
cv::RNG& rng = TS::ptr()->get_rng();
size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150));
src = randomMat(rng, size, type, 0.0, 127.0, false);
}
};
TEST_P(CopyTo, WithoutMask)
{
if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
return;
cv::Mat dst_gold;
src.copyTo(dst_gold);
cv::Mat dst;
cv::gpu::GpuMat dev_src = loadMat(src, useRoi);
cv::gpu::GpuMat dev_dst = loadMat(src, useRoi);
dev_src.copyTo(dev_dst);
dev_dst.download(dst);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
TEST_P(CopyTo, Masked)
{
if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
return;
cv::RNG& rng = TS::ptr()->get_rng();
cv::Mat mask = randomMat(rng, src.size(), CV_8UC1, 0.0, 2.0, false);
cv::Mat zeroMat(src.size(), src.type(), cv::Scalar::all(0));
cv::Mat dst_gold = zeroMat.clone();
src.copyTo(dst_gold, mask);
cv::Mat dst;
cv::gpu::GpuMat dev_src = loadMat(src, useRoi);
cv::gpu::GpuMat dev_dst = loadMat(zeroMat, useRoi);
dev_src.copyTo(dev_dst, loadMat(mask, useRoi));
dev_dst.download(dst);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
INSTANTIATE_TEST_CASE_P(MatOp, CopyTo, Combine(
ALL_DEVICES,
ALL_TYPES,
WHOLE_SUBMAT));
////////////////////////////////////////////////////////////////////////////////
// convertTo
PARAM_TEST_CASE(ConvertTo, cv::gpu::DeviceInfo, MatType, MatType, UseRoi)
{
cv::gpu::DeviceInfo devInfo;
int depth1;
int depth2;
bool useRoi;
cv::Size size;
cv::Mat src;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
depth1 = GET_PARAM(1);
depth2 = GET_PARAM(2);
useRoi = GET_PARAM(3);
cv::gpu::setDevice(devInfo.deviceID());
cv::RNG& rng = TS::ptr()->get_rng();
size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150));
src = randomMat(rng, size, depth1, 0.0, 127.0, false);
}
};
TEST_P(ConvertTo, WithoutScaling)
{
if ((depth1 == CV_64F || depth2 == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
return;
cv::Mat dst_gold;
src.convertTo(dst_gold, depth2);
cv::Mat dst;
cv::gpu::GpuMat dev_src = loadMat(src, useRoi);
cv::gpu::GpuMat dev_dst;
dev_src.convertTo(dev_dst, depth2);
dev_dst.download(dst);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
TEST_P(ConvertTo, WithScaling)
{
if ((depth1 == CV_64F || depth2 == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
return;
cv::RNG& rng = TS::ptr()->get_rng();
const double a = rng.uniform(0.0, 1.0);
const double b = rng.uniform(-10.0, 10.0);
cv::Mat dst_gold;
src.convertTo(dst_gold, depth2, a, b);
cv::Mat dst;
cv::gpu::GpuMat dev_src = loadMat(src, useRoi);
cv::gpu::GpuMat dev_dst;
dev_src.convertTo(dev_dst, depth2, a, b);
dev_dst.download(dst);
const double eps = depth2 < CV_32F ? 1 : 1e-4;
EXPECT_MAT_NEAR(dst_gold, dst, eps);
}
INSTANTIATE_TEST_CASE_P(MatOp, ConvertTo, Combine(
ALL_DEVICES,
TYPES(CV_8U, CV_64F, 1, 1),
TYPES(CV_8U, CV_64F, 1, 1),
WHOLE_SUBMAT));
////////////////////////////////////////////////////////////////////////////////
// async
struct Async : TestWithParam<cv::gpu::DeviceInfo>
{
cv::gpu::DeviceInfo devInfo;
cv::gpu::CudaMem src;
cv::Mat dst_gold0;
cv::Mat dst_gold1;
virtual void SetUp()
{
devInfo = GetParam();
cv::gpu::setDevice(devInfo.deviceID());
cv::RNG& rng = TS::ptr()->get_rng();
int rows = rng.uniform(100, 200);
int cols = rng.uniform(100, 200);
src = cv::gpu::CudaMem(cv::Mat::zeros(rows, cols, CV_8UC1));
dst_gold0 = cv::Mat(rows, cols, CV_8UC1, cv::Scalar::all(255));
dst_gold1 = cv::Mat(rows, cols, CV_8UC1, cv::Scalar::all(128));
}
};
TEST_P(Async, Accuracy)
{
cv::Mat dst0, dst1;
cv::gpu::CudaMem cpudst0;
cv::gpu::CudaMem cpudst1;
cv::gpu::GpuMat gpusrc;
cv::gpu::GpuMat gpudst0;
cv::gpu::GpuMat gpudst1(src.rows, src.cols, CV_8UC1);
cv::gpu::Stream stream0;
cv::gpu::Stream stream1;
stream0.enqueueUpload(src, gpusrc);
cv::gpu::bitwise_not(gpusrc, gpudst0, cv::gpu::GpuMat(), stream0);
stream0.enqueueDownload(gpudst0, cpudst0);
stream1.enqueueMemSet(gpudst1, cv::Scalar::all(128));
stream1.enqueueDownload(gpudst1, cpudst1);
stream0.waitForCompletion();
stream1.waitForCompletion();
dst0 = cpudst0.createMatHeader();
dst1 = cpudst1.createMatHeader();
EXPECT_MAT_NEAR(dst_gold0, dst0, 0.0);
EXPECT_MAT_NEAR(dst_gold1, dst1, 0.0);
}
INSTANTIATE_TEST_CASE_P(MatOp, Async, ALL_DEVICES);
#endif // HAVE_CUDA
...@@ -41,45 +41,39 @@ ...@@ -41,45 +41,39 @@
#include "precomp.hpp" #include "precomp.hpp"
#ifdef HAVE_CUDA namespace {
using namespace cvtest;
using namespace testing;
//#define DUMP //#define DUMP
struct CV_GpuHogDetectTestRunner : cv::gpu::HOGDescriptor struct HOG : testing::TestWithParam<cv::gpu::DeviceInfo>, cv::gpu::HOGDescriptor
{ {
void run() cv::gpu::DeviceInfo devInfo;
{
cv::Mat img_rgb = readImage("hog/road.png");
ASSERT_FALSE(img_rgb.empty());
#ifdef DUMP #ifdef DUMP
f.open((std::string(cvtest::TS::ptr()->get_data_path()) + "hog/expected_output.bin").c_str(), std::ios_base::binary); std::ofstream f;
ASSERT_TRUE(f.is_open());
#else #else
f.open((std::string(cvtest::TS::ptr()->get_data_path()) + "hog/expected_output.bin").c_str(), std::ios_base::binary); std::ifstream f;
ASSERT_TRUE(f.is_open());
#endif #endif
// Test on color image int wins_per_img_x;
cv::Mat img; int wins_per_img_y;
cv::cvtColor(img_rgb, img, CV_BGR2BGRA); int blocks_per_win_x;
test(img); int blocks_per_win_y;
int block_hist_size;
// Test on gray image virtual void SetUp()
cv::cvtColor(img_rgb, img, CV_BGR2GRAY); {
test(img); devInfo = GetParam();
f.close(); cv::gpu::setDevice(devInfo.deviceID());
} }
#ifdef DUMP #ifdef DUMP
void dump(const cv::Mat& block_hists, const std::vector<cv::Point>& locations) void dump(const cv::Mat& block_hists, const std::vector<cv::Point>& locations)
{ {
f.write((char*)&block_hists.rows, sizeof(block_hists.rows)); f.write((char*)&block_hists.rows, sizeof(block_hists.rows));
f.write((char*)&block_hists.cols, sizeof(block_hists.cols)); f.write((char*)&block_hists.cols, sizeof(block_hists.cols));
for (int i = 0; i < block_hists.rows; ++i) for (int i = 0; i < block_hists.rows; ++i)
{ {
for (int j = 0; j < block_hists.cols; ++j) for (int j = 0; j < block_hists.cols; ++j)
...@@ -88,21 +82,22 @@ struct CV_GpuHogDetectTestRunner : cv::gpu::HOGDescriptor ...@@ -88,21 +82,22 @@ struct CV_GpuHogDetectTestRunner : cv::gpu::HOGDescriptor
f.write((char*)&val, sizeof(val)); f.write((char*)&val, sizeof(val));
} }
} }
int nlocations = locations.size(); int nlocations = locations.size();
f.write((char*)&nlocations, sizeof(nlocations)); f.write((char*)&nlocations, sizeof(nlocations));
for (int i = 0; i < locations.size(); ++i) for (int i = 0; i < locations.size(); ++i)
f.write((char*)&locations[i], sizeof(locations[i])); f.write((char*)&locations[i], sizeof(locations[i]));
} }
#else #else
void compare(const cv::Mat& block_hists, const std::vector<cv::Point>& locations) void compare(const cv::Mat& block_hists, const std::vector<cv::Point>& locations)
{ {
int rows, cols; int rows, cols;
int nlocations;
f.read((char*)&rows, sizeof(rows)); f.read((char*)&rows, sizeof(rows));
f.read((char*)&cols, sizeof(cols)); f.read((char*)&cols, sizeof(cols));
ASSERT_EQ(rows, block_hists.rows); ASSERT_EQ(rows, block_hists.rows);
ASSERT_EQ(cols, block_hists.cols); ASSERT_EQ(cols, block_hists.cols);
for (int i = 0; i < block_hists.rows; ++i) for (int i = 0; i < block_hists.rows; ++i)
{ {
for (int j = 0; j < block_hists.cols; ++j) for (int j = 0; j < block_hists.cols; ++j)
...@@ -112,8 +107,11 @@ struct CV_GpuHogDetectTestRunner : cv::gpu::HOGDescriptor ...@@ -112,8 +107,11 @@ struct CV_GpuHogDetectTestRunner : cv::gpu::HOGDescriptor
ASSERT_NEAR(val, block_hists.at<float>(i, j), 1e-3); ASSERT_NEAR(val, block_hists.at<float>(i, j), 1e-3);
} }
} }
int nlocations;
f.read((char*)&nlocations, sizeof(nlocations)); f.read((char*)&nlocations, sizeof(nlocations));
ASSERT_EQ(nlocations, static_cast<int>(locations.size())); ASSERT_EQ(nlocations, static_cast<int>(locations.size()));
for (int i = 0; i < nlocations; ++i) for (int i = 0; i < nlocations; ++i)
{ {
cv::Point location; cv::Point location;
...@@ -123,163 +121,44 @@ struct CV_GpuHogDetectTestRunner : cv::gpu::HOGDescriptor ...@@ -123,163 +121,44 @@ struct CV_GpuHogDetectTestRunner : cv::gpu::HOGDescriptor
} }
#endif #endif
void test(const cv::Mat& img) void testDetect(const cv::Mat& img)
{ {
cv::gpu::GpuMat d_img(img);
gamma_correction = false; gamma_correction = false;
setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector()); setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector());
//cpu detector may be updated soon
//hog.setSVMDetector(cv::HOGDescriptor::getDefaultPeopleDetector());
std::vector<cv::Point> locations; std::vector<cv::Point> locations;
// Test detect // Test detect
detect(d_img, locations, 0); detect(loadMat(img), locations, 0);
#ifdef DUMP #ifdef DUMP
dump(block_hists, locations); dump(cv::Mat(block_hists), locations);
#else #else
compare(cv::Mat(block_hists), locations); compare(cv::Mat(block_hists), locations);
#endif #endif
// Test detect on smaller image // Test detect on smaller image
cv::Mat img2; cv::Mat img2;
cv::resize(img, img2, cv::Size(img.cols / 2, img.rows / 2)); cv::resize(img, img2, cv::Size(img.cols / 2, img.rows / 2));
detect(cv::gpu::GpuMat(img2), locations, 0); detect(loadMat(img2), locations, 0);
#ifdef DUMP #ifdef DUMP
dump(block_hists, locations); dump(cv::Mat(block_hists), locations);
#else #else
compare(cv::Mat(block_hists), locations); compare(cv::Mat(block_hists), locations);
#endif #endif
// Test detect on greater image // Test detect on greater image
cv::resize(img, img2, cv::Size(img.cols * 2, img.rows * 2)); cv::resize(img, img2, cv::Size(img.cols * 2, img.rows * 2));
detect(cv::gpu::GpuMat(img2), locations, 0); detect(loadMat(img2), locations, 0);
#ifdef DUMP #ifdef DUMP
dump(block_hists, locations); dump(cv::Mat(block_hists), locations);
#else #else
compare(cv::Mat(block_hists), locations); compare(cv::Mat(block_hists), locations);
#endif #endif
} }
#ifdef DUMP
std::ofstream f;
#else
std::ifstream f;
#endif
};
struct Detect : TestWithParam<cv::gpu::DeviceInfo>
{
cv::gpu::DeviceInfo devInfo;
virtual void SetUp()
{
devInfo = GetParam();
cv::gpu::setDevice(devInfo.deviceID());
}
};
TEST_P(Detect, Accuracy)
{
CV_GpuHogDetectTestRunner runner;
runner.run();
}
INSTANTIATE_TEST_CASE_P(HOG, Detect, ALL_DEVICES);
struct CV_GpuHogGetDescriptorsTestRunner : cv::gpu::HOGDescriptor
{
CV_GpuHogGetDescriptorsTestRunner(): cv::gpu::HOGDescriptor(cv::Size(64, 128)) {}
void run()
{
// Load image (e.g. train data, composed from windows)
cv::Mat img_rgb = readImage("hog/train_data.png");
ASSERT_FALSE(img_rgb.empty());
// Convert to C4
cv::Mat img;
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
cv::gpu::GpuMat d_img(img);
// Convert train images into feature vectors (train table)
cv::gpu::GpuMat descriptors, descriptors_by_cols;
getDescriptors(d_img, win_size, descriptors, DESCR_FORMAT_ROW_BY_ROW);
getDescriptors(d_img, win_size, descriptors_by_cols, DESCR_FORMAT_COL_BY_COL);
// Check size of the result train table
wins_per_img_x = 3;
wins_per_img_y = 2;
blocks_per_win_x = 7;
blocks_per_win_y = 15;
block_hist_size = 36;
cv::Size descr_size_expected = cv::Size(blocks_per_win_x * blocks_per_win_y * block_hist_size,
wins_per_img_x * wins_per_img_y);
ASSERT_EQ(descr_size_expected, descriptors.size());
// Check both formats of output descriptors are handled correctly
cv::Mat dr(descriptors);
cv::Mat dc(descriptors_by_cols);
for (int i = 0; i < wins_per_img_x * wins_per_img_y; ++i)
{
const float* l = dr.rowRange(i, i + 1).ptr<float>();
const float* r = dc.rowRange(i, i + 1).ptr<float>();
for (int y = 0; y < blocks_per_win_y; ++y)
for (int x = 0; x < blocks_per_win_x; ++x)
for (int k = 0; k < block_hist_size; ++k)
ASSERT_EQ(l[(y * blocks_per_win_x + x) * block_hist_size + k],
r[(x * blocks_per_win_y + y) * block_hist_size + k]);
}
/* Now we want to extract the same feature vectors, but from single images. NOTE: results will
be defferent, due to border values interpolation. Using of many small images is slower, however we
wont't call getDescriptors and will use computeBlockHistograms instead of. computeBlockHistograms
works good, it can be checked in the gpu_hog sample */
img_rgb = readImage("hog/positive1.png");
ASSERT_TRUE(!img_rgb.empty());
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
computeBlockHistograms(cv::gpu::GpuMat(img));
// Everything is fine with interpolation for left top subimage
ASSERT_EQ(0.0, cv::norm((cv::Mat)block_hists, (cv::Mat)descriptors.rowRange(0, 1)));
img_rgb = readImage("hog/positive2.png");
ASSERT_TRUE(!img_rgb.empty());
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
computeBlockHistograms(cv::gpu::GpuMat(img));
compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(1, 2)));
img_rgb = readImage("hog/negative1.png");
ASSERT_TRUE(!img_rgb.empty());
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
computeBlockHistograms(cv::gpu::GpuMat(img));
compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(2, 3)));
img_rgb = readImage("hog/negative2.png");
ASSERT_TRUE(!img_rgb.empty());
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
computeBlockHistograms(cv::gpu::GpuMat(img));
compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(3, 4)));
img_rgb = readImage("hog/positive3.png");
ASSERT_TRUE(!img_rgb.empty());
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
computeBlockHistograms(cv::gpu::GpuMat(img));
compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(4, 5)));
img_rgb = readImage("hog/negative3.png");
ASSERT_TRUE(!img_rgb.empty());
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
computeBlockHistograms(cv::gpu::GpuMat(img));
compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(5, 6)));
}
// Does not compare border value, as interpolation leads to delta // Does not compare border value, as interpolation leads to delta
void compare_inner_parts(cv::Mat d1, cv::Mat d2) void compare_inner_parts(cv::Mat d1, cv::Mat d2)
{ {
...@@ -292,32 +171,117 @@ struct CV_GpuHogGetDescriptorsTestRunner : cv::gpu::HOGDescriptor ...@@ -292,32 +171,117 @@ struct CV_GpuHogGetDescriptorsTestRunner : cv::gpu::HOGDescriptor
ASSERT_FLOAT_EQ(a, b); ASSERT_FLOAT_EQ(a, b);
} }
} }
int wins_per_img_x;
int wins_per_img_y;
int blocks_per_win_x;
int blocks_per_win_y;
int block_hist_size;
}; };
struct GetDescriptors : TestWithParam<cv::gpu::DeviceInfo> TEST_P(HOG, Detect)
{ {
cv::gpu::DeviceInfo devInfo; cv::Mat img_rgb = readImage("hog/road.png");
ASSERT_FALSE(img_rgb.empty());
virtual void SetUp()
{
devInfo = GetParam();
cv::gpu::setDevice(devInfo.deviceID()); #ifdef DUMP
} f.open((std::string(cvtest::TS::ptr()->get_data_path()) + "hog/expected_output.bin").c_str(), std::ios_base::binary);
}; ASSERT_TRUE(f.is_open());
#else
f.open((std::string(cvtest::TS::ptr()->get_data_path()) + "hog/expected_output.bin").c_str(), std::ios_base::binary);
ASSERT_TRUE(f.is_open());
#endif
// Test on color image
cv::Mat img;
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
testDetect(img);
// Test on gray image
cv::cvtColor(img_rgb, img, CV_BGR2GRAY);
testDetect(img);
f.close();
}
TEST_P(GetDescriptors, Accuracy) TEST_P(HOG, GetDescriptors)
{ {
CV_GpuHogGetDescriptorsTestRunner runner; // Load image (e.g. train data, composed from windows)
runner.run(); cv::Mat img_rgb = readImage("hog/train_data.png");
ASSERT_FALSE(img_rgb.empty());
// Convert to C4
cv::Mat img;
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
cv::gpu::GpuMat d_img(img);
// Convert train images into feature vectors (train table)
cv::gpu::GpuMat descriptors, descriptors_by_cols;
getDescriptors(d_img, win_size, descriptors, DESCR_FORMAT_ROW_BY_ROW);
getDescriptors(d_img, win_size, descriptors_by_cols, DESCR_FORMAT_COL_BY_COL);
// Check size of the result train table
wins_per_img_x = 3;
wins_per_img_y = 2;
blocks_per_win_x = 7;
blocks_per_win_y = 15;
block_hist_size = 36;
cv::Size descr_size_expected = cv::Size(blocks_per_win_x * blocks_per_win_y * block_hist_size,
wins_per_img_x * wins_per_img_y);
ASSERT_EQ(descr_size_expected, descriptors.size());
// Check both formats of output descriptors are handled correctly
cv::Mat dr(descriptors);
cv::Mat dc(descriptors_by_cols);
for (int i = 0; i < wins_per_img_x * wins_per_img_y; ++i)
{
const float* l = dr.rowRange(i, i + 1).ptr<float>();
const float* r = dc.rowRange(i, i + 1).ptr<float>();
for (int y = 0; y < blocks_per_win_y; ++y)
for (int x = 0; x < blocks_per_win_x; ++x)
for (int k = 0; k < block_hist_size; ++k)
ASSERT_EQ(l[(y * blocks_per_win_x + x) * block_hist_size + k],
r[(x * blocks_per_win_y + y) * block_hist_size + k]);
}
/* Now we want to extract the same feature vectors, but from single images. NOTE: results will
be defferent, due to border values interpolation. Using of many small images is slower, however we
wont't call getDescriptors and will use computeBlockHistograms instead of. computeBlockHistograms
works good, it can be checked in the gpu_hog sample */
img_rgb = readImage("hog/positive1.png");
ASSERT_TRUE(!img_rgb.empty());
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
computeBlockHistograms(cv::gpu::GpuMat(img));
// Everything is fine with interpolation for left top subimage
ASSERT_EQ(0.0, cv::norm((cv::Mat)block_hists, (cv::Mat)descriptors.rowRange(0, 1)));
img_rgb = readImage("hog/positive2.png");
ASSERT_TRUE(!img_rgb.empty());
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
computeBlockHistograms(cv::gpu::GpuMat(img));
compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(1, 2)));
img_rgb = readImage("hog/negative1.png");
ASSERT_TRUE(!img_rgb.empty());
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
computeBlockHistograms(cv::gpu::GpuMat(img));
compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(2, 3)));
img_rgb = readImage("hog/negative2.png");
ASSERT_TRUE(!img_rgb.empty());
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
computeBlockHistograms(cv::gpu::GpuMat(img));
compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(3, 4)));
img_rgb = readImage("hog/positive3.png");
ASSERT_TRUE(!img_rgb.empty());
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
computeBlockHistograms(cv::gpu::GpuMat(img));
compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(4, 5)));
img_rgb = readImage("hog/negative3.png");
ASSERT_TRUE(!img_rgb.empty());
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
computeBlockHistograms(cv::gpu::GpuMat(img));
compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(5, 6)));
} }
INSTANTIATE_TEST_CASE_P(HOG, GetDescriptors, ALL_DEVICES); INSTANTIATE_TEST_CASE_P(GPU_ObjDetect, HOG, ALL_DEVICES);
#endif // HAVE_CUDA } // namespace
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册