提交 464a010f 编写于 作者: R Roman Donchenko

Merge remote-tracking branch 'origin/2.4' into merge-2.4

Conflicts:
	modules/core/include/opencv2/core/operations.hpp
	modules/core/include/opencv2/core/version.hpp
	modules/core/src/gpumat.cpp
	modules/cudaimgproc/src/color.cpp
	modules/features2d/src/orb.cpp
	modules/imgproc/src/samplers.cpp
	modules/ocl/include/opencv2/ocl/matrix_operations.hpp
	modules/ocl/include/opencv2/ocl/ocl.hpp
	samples/ocl/facedetect.cpp
......@@ -454,7 +454,7 @@ typedef unsigned int cl_GLenum;
/* Define alignment keys */
#if defined( __GNUC__ )
#define CL_ALIGNED(_x) __attribute__ ((aligned(_x)))
#elif defined( _WIN32) && (_MSC_VER)
#elif defined( _WIN32) && defined(_MSC_VER)
/* Alignment keys neutered on windows because MSVC can't swallow function arguments with alignment requirements */
/* http://msdn.microsoft.com/en-us/library/373ak2y1%28VS.71%29.aspx */
/* #include <crtdefs.h> */
......
......@@ -70,7 +70,7 @@ bool CvCascadeImageReader::NegReader::nextImg()
_offset.x = std::min( (int)round % winSize.width, src.cols - winSize.width );
_offset.y = std::min( (int)round / winSize.width, src.rows - winSize.height );
if( !src.empty() && src.type() == CV_8UC1
&& offset.x >= 0 && offset.y >= 0 )
&& _offset.x >= 0 && _offset.y >= 0 )
break;
}
......
......@@ -105,7 +105,7 @@ Building OpenCV
Enable hardware optimizations
-----------------------------
Depending on target platfrom architecture different instruction sets can be used. By default
Depending on target platform architecture different instruction sets can be used. By default
compiler generates code for armv5l without VFPv3 and NEON extensions. Add ``-DUSE_VFPV3=ON``
to cmake command line to enable code generation for VFPv3 and ``-DUSE_NEON=ON`` for using
NEON SIMD extensions.
......
......@@ -2110,6 +2110,8 @@ void cv::cuda::cvtColor(InputArray src, OutputArray dst, int code, int dcn, Stre
void cv::cuda::demosaicing(InputArray _src, OutputArray _dst, int code, int dcn, Stream& stream)
{
CV_Assert( !_src.empty() );
switch (code)
{
case cv::COLOR_BayerBG2GRAY: case cv::COLOR_BayerGB2GRAY: case cv::COLOR_BayerRG2GRAY: case cv::COLOR_BayerGR2GRAY:
......
......@@ -2357,6 +2357,7 @@ struct Demosaicing : testing::TestWithParam<cv::cuda::DeviceInfo>
CUDA_TEST_P(Demosaicing, BayerBG2BGR)
{
cv::Mat img = readImage("stereobm/aloe-L.png");
ASSERT_FALSE(img.empty()) << "Can't load input image";
cv::Mat_<uchar> src;
mosaic(img, src, cv::Point(1, 1));
......@@ -2370,6 +2371,7 @@ CUDA_TEST_P(Demosaicing, BayerBG2BGR)
CUDA_TEST_P(Demosaicing, BayerGB2BGR)
{
cv::Mat img = readImage("stereobm/aloe-L.png");
ASSERT_FALSE(img.empty()) << "Can't load input image";
cv::Mat_<uchar> src;
mosaic(img, src, cv::Point(0, 1));
......@@ -2383,6 +2385,7 @@ CUDA_TEST_P(Demosaicing, BayerGB2BGR)
CUDA_TEST_P(Demosaicing, BayerRG2BGR)
{
cv::Mat img = readImage("stereobm/aloe-L.png");
ASSERT_FALSE(img.empty()) << "Can't load input image";
cv::Mat_<uchar> src;
mosaic(img, src, cv::Point(0, 0));
......@@ -2396,6 +2399,7 @@ CUDA_TEST_P(Demosaicing, BayerRG2BGR)
CUDA_TEST_P(Demosaicing, BayerGR2BGR)
{
cv::Mat img = readImage("stereobm/aloe-L.png");
ASSERT_FALSE(img.empty()) << "Can't load input image";
cv::Mat_<uchar> src;
mosaic(img, src, cv::Point(1, 0));
......@@ -2409,6 +2413,7 @@ CUDA_TEST_P(Demosaicing, BayerGR2BGR)
CUDA_TEST_P(Demosaicing, BayerBG2BGR_MHT)
{
cv::Mat img = readImage("stereobm/aloe-L.png");
ASSERT_FALSE(img.empty()) << "Can't load input image";
cv::Mat_<uchar> src;
mosaic(img, src, cv::Point(1, 1));
......@@ -2422,6 +2427,7 @@ CUDA_TEST_P(Demosaicing, BayerBG2BGR_MHT)
CUDA_TEST_P(Demosaicing, BayerGB2BGR_MHT)
{
cv::Mat img = readImage("stereobm/aloe-L.png");
ASSERT_FALSE(img.empty()) << "Can't load input image";
cv::Mat_<uchar> src;
mosaic(img, src, cv::Point(0, 1));
......@@ -2435,6 +2441,7 @@ CUDA_TEST_P(Demosaicing, BayerGB2BGR_MHT)
CUDA_TEST_P(Demosaicing, BayerRG2BGR_MHT)
{
cv::Mat img = readImage("stereobm/aloe-L.png");
ASSERT_FALSE(img.empty()) << "Can't load input image";
cv::Mat_<uchar> src;
mosaic(img, src, cv::Point(0, 0));
......@@ -2448,6 +2455,7 @@ CUDA_TEST_P(Demosaicing, BayerRG2BGR_MHT)
CUDA_TEST_P(Demosaicing, BayerGR2BGR_MHT)
{
cv::Mat img = readImage("stereobm/aloe-L.png");
ASSERT_FALSE(img.empty()) << "Can't load input image";
cv::Mat_<uchar> src;
mosaic(img, src, cv::Point(1, 0));
......
......@@ -141,12 +141,12 @@ static void computeOrbDescriptor(const KeyPoint& kpt,
float x, y;
int ix, iy;
#if 1
#define GET_VALUE(idx) \
(x = pattern[idx].x*a - pattern[idx].y*b, \
y = pattern[idx].x*b + pattern[idx].y*a, \
ix = cvRound(x), \
iy = cvRound(y), \
*(center + iy*step + ix) )
#define GET_VALUE(idx) \
(x = pattern[idx].x*a - pattern[idx].y*b, \
y = pattern[idx].x*b + pattern[idx].y*a, \
ix = cvRound(x), \
iy = cvRound(y), \
*(center + iy*step + ix) )
#else
#define GET_VALUE(idx) \
(x = pattern[idx].x*a - pattern[idx].y*b, \
......
......@@ -1552,9 +1552,9 @@ static gboolean icvOnMouse( GtkWidget *widget, GdkEvent *event, gpointer user_da
// image origin is not necessarily at (0,0)
int x0 = (widget->allocation.width - image_widget->scaled_image->cols)/2;
int y0 = (widget->allocation.height - image_widget->scaled_image->rows)/2;
pt.x = cvRound( ((pt32f.x-x0)*image_widget->original_image->cols)/
pt.x = cvFloor( ((pt32f.x-x0)*image_widget->original_image->cols)/
image_widget->scaled_image->cols );
pt.y = cvRound( ((pt32f.y-y0)*image_widget->original_image->rows)/
pt.y = cvFloor( ((pt32f.y-y0)*image_widget->original_image->rows)/
image_widget->scaled_image->rows );
}
else{
......
......@@ -64,7 +64,7 @@ adjustRect( const uchar* src, size_t src_step, int pix_size,
rect.x = win_size.width;
}
if( ip.x + win_size.width < src_size.width )
if( ip.x < src_size.width - win_size.width )
rect.width = win_size.width;
else
{
......@@ -85,7 +85,7 @@ adjustRect( const uchar* src, size_t src_step, int pix_size,
else
rect.y = -ip.y;
if( ip.y + win_size.height < src_size.height )
if( ip.y < src_size.height - win_size.height )
rect.height = win_size.height;
else
{
......@@ -155,8 +155,8 @@ void getRectSubPix_Cn_(const _Tp* src, size_t src_step, Size src_size,
src_step /= sizeof(src[0]);
dst_step /= sizeof(dst[0]);
if( 0 <= ip.x && ip.x + win_size.width < src_size.width &&
0 <= ip.y && ip.y + win_size.height < src_size.height )
if( 0 <= ip.x && ip.x < src_size.width - win_size.width &&
0 <= ip.y && ip.y < src_size.height - win_size.height)
{
// extracted rectangle is totally inside the image
src += ip.y * src_step + ip.x*cn;
......
......@@ -144,14 +144,6 @@ OpenCV C++ 1-D or 2-D dense array class ::
//! returns true if oclMatrix data is NULL
bool empty() const;
//! returns pointer to y-th row
uchar* ptr(int y = 0);
const uchar *ptr(int y = 0) const;
//! template version of the above method
template<typename _Tp> _Tp *ptr(int y = 0);
template<typename _Tp> const _Tp *ptr(int y = 0) const;
//! matrix transposition
oclMat t() const;
......
......@@ -378,14 +378,6 @@ namespace cv
//! returns true if oclMatrix data is NULL
bool empty() const;
//! returns pointer to y-th row
uchar* ptr(int y = 0);
const uchar *ptr(int y = 0) const;
//! template version of the above method
template<typename _Tp> _Tp *ptr(int y = 0);
template<typename _Tp> const _Tp *ptr(int y = 0) const;
//! matrix transposition
oclMat t() const;
......
......@@ -456,36 +456,6 @@ namespace cv
return data == 0;
}
inline uchar *oclMat::ptr(int y)
{
CV_DbgAssert( (unsigned)y < (unsigned)rows );
CV_Error(Error::GpuNotSupported, "This function hasn't been supported yet.\n");
return data + step * y;
}
inline const uchar *oclMat::ptr(int y) const
{
CV_DbgAssert( (unsigned)y < (unsigned)rows );
CV_Error(Error::GpuNotSupported, "This function hasn't been supported yet.\n");
return data + step * y;
}
template<typename _Tp> inline _Tp *oclMat::ptr(int y)
{
CV_DbgAssert( (unsigned)y < (unsigned)rows );
CV_Error(Error::GpuNotSupported, "This function hasn't been supported yet.\n");
return (_Tp *)(data + step * y);
}
template<typename _Tp> inline const _Tp *oclMat::ptr(int y) const
{
CV_DbgAssert( (unsigned)y < (unsigned)rows );
CV_Error(Error::GpuNotSupported, "This function hasn't been supported yet.\n");
return (const _Tp *)(data + step * y);
}
inline oclMat oclMat::t() const
{
oclMat tmp;
......
......@@ -73,10 +73,10 @@ PERF_TEST_P(MomentsFixture, Moments,
Mat src(srcSize, type), dst(7, 1, CV_64F);
randu(src, 0, 255);
oclMat src_d(src);
cv::Moments mom;
if (RUN_OCL_IMPL)
{
oclMat src_d(src);
OCL_TEST_CYCLE() mom = cv::ocl::ocl_moments(src_d, binaryImage);
}
else if (RUN_PLAIN_IMPL)
......
......@@ -676,7 +676,7 @@ void cv::ocl::BruteForceMatcher_OCL_base::matchCollection(const oclMat &query, c
ensureSizeIsEnough(1, nQuery, CV_32S, imgIdx);
ensureSizeIsEnough(1, nQuery, CV_32F, distance);
matchDispatcher(query, (const oclMat *)trainCollection.ptr(), trainCollection.cols, masks, trainIdx, imgIdx, distance, distType);
matchDispatcher(query, &trainCollection, trainCollection.cols, masks, trainIdx, imgIdx, distance, distType);
return;
}
......
......@@ -290,8 +290,9 @@ void openCLFree(void *devPtr)
}
#else
// TODO FIXIT Attach clReleaseMemObject call to event completion callback
Context* ctx = Context::getContext();
clFinish(getClCommandQueue(ctx));
// TODO 2013/12/04 Disable workaround
// Context* ctx = Context::getContext();
// clFinish(getClCommandQueue(ctx));
#endif
openCLSafeCall(clReleaseMemObject(data.mainBuffer));
}
......
......@@ -62,13 +62,13 @@ typedef struct __attribute__((aligned (128) )) GpuHidHaarTreeNode
GpuHidHaarTreeNode;
typedef struct __attribute__((aligned (32))) GpuHidHaarClassifier
{
int count __attribute__((aligned (4)));
GpuHidHaarTreeNode* node __attribute__((aligned (8)));
float* alpha __attribute__((aligned (8)));
}
GpuHidHaarClassifier;
//typedef struct __attribute__((aligned (32))) GpuHidHaarClassifier
//{
// int count __attribute__((aligned (4)));
// GpuHidHaarTreeNode* node __attribute__((aligned (8)));
// float* alpha __attribute__((aligned (8)));
//}
//GpuHidHaarClassifier;
typedef struct __attribute__((aligned (64))) GpuHidHaarStageClassifier
......@@ -84,22 +84,22 @@ typedef struct __attribute__((aligned (64))) GpuHidHaarStageClassifier
GpuHidHaarStageClassifier;
typedef struct __attribute__((aligned (64))) GpuHidHaarClassifierCascade
{
int count __attribute__((aligned (4)));
int is_stump_based __attribute__((aligned (4)));
int has_tilted_features __attribute__((aligned (4)));
int is_tree __attribute__((aligned (4)));
int pq0 __attribute__((aligned (4)));
int pq1 __attribute__((aligned (4)));
int pq2 __attribute__((aligned (4)));
int pq3 __attribute__((aligned (4)));
int p0 __attribute__((aligned (4)));
int p1 __attribute__((aligned (4)));
int p2 __attribute__((aligned (4)));
int p3 __attribute__((aligned (4)));
float inv_window_area __attribute__((aligned (4)));
} GpuHidHaarClassifierCascade;
//typedef struct __attribute__((aligned (64))) GpuHidHaarClassifierCascade
//{
// int count __attribute__((aligned (4)));
// int is_stump_based __attribute__((aligned (4)));
// int has_tilted_features __attribute__((aligned (4)));
// int is_tree __attribute__((aligned (4)));
// int pq0 __attribute__((aligned (4)));
// int pq1 __attribute__((aligned (4)));
// int pq2 __attribute__((aligned (4)));
// int pq3 __attribute__((aligned (4)));
// int p0 __attribute__((aligned (4)));
// int p1 __attribute__((aligned (4)));
// int p2 __attribute__((aligned (4)));
// int p3 __attribute__((aligned (4)));
// float inv_window_area __attribute__((aligned (4)));
//} GpuHidHaarClassifierCascade;
#ifdef PACKED_CLASSIFIER
......@@ -196,10 +196,12 @@ __kernel void gpuRunHaarClassifierCascadePacked(
for(int stageloop = start_stage; (stageloop < end_stage) && result; stageloop++ )
{// iterate until candidate is exist
float stage_sum = 0.0f;
int2 stageinfo = *(global int2*)(stagecascadeptr+stageloop);
float stagethreshold = as_float(stageinfo.y);
__global GpuHidHaarStageClassifier* stageinfo = (__global GpuHidHaarStageClassifier*)
((__global uchar*)stagecascadeptr+stageloop*sizeof(GpuHidHaarStageClassifier));
int stagecount = stageinfo->count;
float stagethreshold = stageinfo->threshold;
int lcl_off = (lid_y*DATA_SIZE_X)+(lid_x);
for(int nodeloop = 0; nodeloop < stageinfo.x; nodecounter++,nodeloop++ )
for(int nodeloop = 0; nodeloop < stagecount; nodecounter++,nodeloop++ )
{
// simple macro to extract shorts from int
#define M0(_t) ((_t)&0xFFFF)
......@@ -355,14 +357,17 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
variance_norm_factor = variance_norm_factor * correction - mean * mean;
variance_norm_factor = variance_norm_factor >=0.f ? sqrt(variance_norm_factor) : 1.f;
for(int stageloop = start_stage; (stageloop < split_stage) && result; stageloop++ )
for(int stageloop = start_stage; (stageloop < split_stage) && result; stageloop++ )
{
float stage_sum = 0.f;
int2 stageinfo = *(global int2*)(stagecascadeptr+stageloop);
float stagethreshold = as_float(stageinfo.y);
for(int nodeloop = 0; nodeloop < stageinfo.x; )
__global GpuHidHaarStageClassifier* stageinfo = (__global GpuHidHaarStageClassifier*)
((__global uchar*)stagecascadeptr+stageloop*sizeof(GpuHidHaarStageClassifier));
int stagecount = stageinfo->count;
float stagethreshold = stageinfo->threshold;
for(int nodeloop = 0; nodeloop < stagecount; )
{
__global GpuHidHaarTreeNode* currentnodeptr = (nodeptr + nodecounter);
__global GpuHidHaarTreeNode* currentnodeptr = (__global GpuHidHaarTreeNode*)
(((__global uchar*)nodeptr) + nodecounter * sizeof(GpuHidHaarTreeNode));
int4 info1 = *(__global int4*)(&(currentnodeptr->p[0][0]));
int4 info2 = *(__global int4*)(&(currentnodeptr->p[1][0]));
......@@ -418,7 +423,7 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
#endif
}
result = (stage_sum >= stagethreshold);
result = (stage_sum >= stagethreshold) ? 1 : 0;
}
if(factor < 2)
{
......@@ -447,14 +452,17 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
lclcount[0]=0;
barrier(CLK_LOCAL_MEM_FENCE);
int2 stageinfo = *(global int2*)(stagecascadeptr+stageloop);
float stagethreshold = as_float(stageinfo.y);
//int2 stageinfo = *(global int2*)(stagecascadeptr+stageloop);
__global GpuHidHaarStageClassifier* stageinfo = (__global GpuHidHaarStageClassifier*)
((__global uchar*)stagecascadeptr+stageloop*sizeof(GpuHidHaarStageClassifier));
int stagecount = stageinfo->count;
float stagethreshold = stageinfo->threshold;
int perfscale = queuecount > 4 ? 3 : 2;
int queuecount_loop = (queuecount + (1<<perfscale)-1) >> perfscale;
int lcl_compute_win = lcl_sz >> perfscale;
int lcl_compute_win_id = (lcl_id >>(6-perfscale));
int lcl_loops = (stageinfo.x + lcl_compute_win -1) >> (6-perfscale);
int lcl_loops = (stagecount + lcl_compute_win -1) >> (6-perfscale);
int lcl_compute_id = lcl_id - (lcl_compute_win_id << (6-perfscale));
for(int queueloop=0; queueloop<queuecount_loop; queueloop++)
{
......@@ -469,10 +477,10 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
float part_sum = 0.f;
const int stump_factor = STUMP_BASED ? 1 : 2;
int root_offset = 0;
for(int lcl_loop=0; lcl_loop<lcl_loops && tempnodecounter<stageinfo.x;)
for(int lcl_loop=0; lcl_loop<lcl_loops && tempnodecounter<stagecount;)
{
__global GpuHidHaarTreeNode* currentnodeptr =
nodeptr + (nodecounter + tempnodecounter) * stump_factor + root_offset;
__global GpuHidHaarTreeNode* currentnodeptr = (__global GpuHidHaarTreeNode*)
(((__global uchar*)nodeptr) + sizeof(GpuHidHaarTreeNode) * ((nodecounter + tempnodecounter) * stump_factor + root_offset));
int4 info1 = *(__global int4*)(&(currentnodeptr->p[0][0]));
int4 info2 = *(__global int4*)(&(currentnodeptr->p[1][0]));
......@@ -549,7 +557,7 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
queuecount = lclcount[0];
barrier(CLK_LOCAL_MEM_FENCE);
nodecounter += stageinfo.x;
nodecounter += stagecount;
}//end for(int stageloop = splitstage; stageloop< endstage && queuecount>0;stageloop++)
if(lcl_id<queuecount)
......
......@@ -59,13 +59,13 @@ typedef struct __attribute__((aligned(128))) GpuHidHaarTreeNode
int right __attribute__((aligned(4)));
}
GpuHidHaarTreeNode;
typedef struct __attribute__((aligned(32))) GpuHidHaarClassifier
{
int count __attribute__((aligned(4)));
GpuHidHaarTreeNode *node __attribute__((aligned(8)));
float *alpha __attribute__((aligned(8)));
}
GpuHidHaarClassifier;
//typedef struct __attribute__((aligned(32))) GpuHidHaarClassifier
//{
// int count __attribute__((aligned(4)));
// GpuHidHaarTreeNode *node __attribute__((aligned(8)));
// float *alpha __attribute__((aligned(8)));
//}
//GpuHidHaarClassifier;
typedef struct __attribute__((aligned(64))) GpuHidHaarStageClassifier
{
int count __attribute__((aligned(4)));
......@@ -77,29 +77,29 @@ typedef struct __attribute__((aligned(64))) GpuHidHaarStageClassifier
int reserved3 __attribute__((aligned(8)));
}
GpuHidHaarStageClassifier;
typedef struct __attribute__((aligned(64))) GpuHidHaarClassifierCascade
{
int count __attribute__((aligned(4)));
int is_stump_based __attribute__((aligned(4)));
int has_tilted_features __attribute__((aligned(4)));
int is_tree __attribute__((aligned(4)));
int pq0 __attribute__((aligned(4)));
int pq1 __attribute__((aligned(4)));
int pq2 __attribute__((aligned(4)));
int pq3 __attribute__((aligned(4)));
int p0 __attribute__((aligned(4)));
int p1 __attribute__((aligned(4)));
int p2 __attribute__((aligned(4)));
int p3 __attribute__((aligned(4)));
float inv_window_area __attribute__((aligned(4)));
} GpuHidHaarClassifierCascade;
//typedef struct __attribute__((aligned(64))) GpuHidHaarClassifierCascade
//{
// int count __attribute__((aligned(4)));
// int is_stump_based __attribute__((aligned(4)));
// int has_tilted_features __attribute__((aligned(4)));
// int is_tree __attribute__((aligned(4)));
// int pq0 __attribute__((aligned(4)));
// int pq1 __attribute__((aligned(4)));
// int pq2 __attribute__((aligned(4)));
// int pq3 __attribute__((aligned(4)));
// int p0 __attribute__((aligned(4)));
// int p1 __attribute__((aligned(4)));
// int p2 __attribute__((aligned(4)));
// int p3 __attribute__((aligned(4)));
// float inv_window_area __attribute__((aligned(4)));
//} GpuHidHaarClassifierCascade;
__kernel void gpuRunHaarClassifierCascade_scaled2(
global GpuHidHaarStageClassifier *stagecascadeptr,
global GpuHidHaarStageClassifier *stagecascadeptr_,
global int4 *info,
global GpuHidHaarTreeNode *nodeptr,
global GpuHidHaarTreeNode *nodeptr_,
global const int *restrict sum,
global const float *restrict sqsum,
global const float *restrict sqsum,
global int4 *candidate,
const int rows,
const int cols,
......@@ -132,8 +132,7 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
int max_idx = rows * cols - 1;
for (int scalei = 0; scalei < loopcount; scalei++)
{
int4 scaleinfo1;
scaleinfo1 = info[scalei];
int4 scaleinfo1 = info[scalei];
int grpnumperline = (scaleinfo1.y & 0xffff0000) >> 16;
int totalgrp = scaleinfo1.y & 0xffff;
float factor = as_float(scaleinfo1.w);
......@@ -174,15 +173,18 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
for (int stageloop = start_stage; (stageloop < end_stage) && result; stageloop++)
{
float stage_sum = 0.f;
int stagecount = stagecascadeptr[stageloop].count;
__global GpuHidHaarStageClassifier* stageinfo = (__global GpuHidHaarStageClassifier*)
(((__global uchar*)stagecascadeptr_)+stageloop*sizeof(GpuHidHaarStageClassifier));
int stagecount = stageinfo->count;
for (int nodeloop = 0; nodeloop < stagecount;)
{
__global GpuHidHaarTreeNode *currentnodeptr = (nodeptr + nodecounter);
__global GpuHidHaarTreeNode* currentnodeptr = (__global GpuHidHaarTreeNode*)
(((__global uchar*)nodeptr_) + nodecounter * sizeof(GpuHidHaarTreeNode));
int4 info1 = *(__global int4 *)(&(currentnodeptr->p[0][0]));
int4 info2 = *(__global int4 *)(&(currentnodeptr->p[1][0]));
int4 info3 = *(__global int4 *)(&(currentnodeptr->p[2][0]));
float4 w = *(__global float4 *)(&(currentnodeptr->weight[0]));
float3 alpha3 = *(__global float3 *)(&(currentnodeptr->alpha[0]));
float3 alpha3 = *(__global float3*)(&(currentnodeptr->alpha[0]));
float nodethreshold = w.w * variance_norm_factor;
info1.x += p_offset;
......@@ -204,7 +206,7 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
sum[clamp(mad24(info3.w, step, info3.x), 0, max_idx)]
+ sum[clamp(mad24(info3.w, step, info3.z), 0, max_idx)]) * w.z;
bool passThres = classsum >= nodethreshold;
bool passThres = (classsum >= nodethreshold) ? 1 : 0;
#if STUMP_BASED
stage_sum += passThres ? alpha3.y : alpha3.x;
......@@ -234,7 +236,8 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
}
#endif
}
result = (int)(stage_sum >= stagecascadeptr[stageloop].threshold);
result = (stage_sum >= stageinfo->threshold) ? 1 : 0;
}
barrier(CLK_LOCAL_MEM_FENCE);
......@@ -281,11 +284,14 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
}
}
}
__kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuHidHaarTreeNode *newnode, float scale, float weight_scale, int nodenum)
__kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuHidHaarTreeNode *newnode, float scale, float weight_scale, const int nodenum)
{
int counter = get_global_id(0);
const int counter = get_global_id(0);
int tr_x[3], tr_y[3], tr_h[3], tr_w[3], i = 0;
GpuHidHaarTreeNode t1 = *(orinode + counter);
GpuHidHaarTreeNode t1 = *(__global GpuHidHaarTreeNode*)
(((__global uchar*)orinode) + counter * sizeof(GpuHidHaarTreeNode));
__global GpuHidHaarTreeNode* pNew = (__global GpuHidHaarTreeNode*)
(((__global uchar*)newnode) + (counter + nodenum) * sizeof(GpuHidHaarTreeNode));
#pragma unroll
for (i = 0; i < 3; i++)
......@@ -297,22 +303,21 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH
}
t1.weight[0] = -(t1.weight[1] * tr_h[1] * tr_w[1] + t1.weight[2] * tr_h[2] * tr_w[2]) / (tr_h[0] * tr_w[0]);
counter += nodenum;
#pragma unroll
for (i = 0; i < 3; i++)
{
newnode[counter].p[i][0] = tr_x[i];
newnode[counter].p[i][1] = tr_y[i];
newnode[counter].p[i][2] = tr_x[i] + tr_w[i];
newnode[counter].p[i][3] = tr_y[i] + tr_h[i];
newnode[counter].weight[i] = t1.weight[i] * weight_scale;
pNew->p[i][0] = tr_x[i];
pNew->p[i][1] = tr_y[i];
pNew->p[i][2] = tr_x[i] + tr_w[i];
pNew->p[i][3] = tr_y[i] + tr_h[i];
pNew->weight[i] = t1.weight[i] * weight_scale;
}
newnode[counter].left = t1.left;
newnode[counter].right = t1.right;
newnode[counter].threshold = t1.threshold;
newnode[counter].alpha[0] = t1.alpha[0];
newnode[counter].alpha[1] = t1.alpha[1];
newnode[counter].alpha[2] = t1.alpha[2];
pNew->left = t1.left;
pNew->right = t1.right;
pNew->threshold = t1.threshold;
pNew->alpha[0] = t1.alpha[0];
pNew->alpha[1] = t1.alpha[1];
pNew->alpha[2] = t1.alpha[2];
}
......@@ -74,11 +74,11 @@ __kernel void threshold(__global const T * restrict src, int src_offset, int src
VT vthresh = (VT)(thresh);
#ifdef THRESH_BINARY
VT vecValue = sdata > vthresh ? max_val : (VT)(0);
VT vecValue = sdata > vthresh ? (VT)max_val : (VT)(0);
#elif defined THRESH_BINARY_INV
VT vecValue = sdata > vthresh ? (VT)(0) : max_val;
VT vecValue = sdata > vthresh ? (VT)(0) : (VT)max_val;
#elif defined THRESH_TRUNC
VT vecValue = sdata > vthresh ? thresh : sdata;
VT vecValue = sdata > vthresh ? (VT)thresh : sdata;
#elif defined THRESH_TOZERO
VT vecValue = sdata > vthresh ? sdata : (VT)(0);
#elif defined THRESH_TOZERO_INV
......
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
package="org.opencv.engine"
android:versionCode="214@ANDROID_PLATFORM_VERSION_CODE@"
android:versionName="2.14" >
android:versionCode="216@ANDROID_PLATFORM_VERSION_CODE@"
android:versionName="2.16" >
<uses-sdk android:minSdkVersion="@ANDROID_NATIVE_API_LEVEL@" />
<uses-feature android:name="android.hardware.touchscreen" android:required="false"/>
......
......@@ -170,7 +170,7 @@ inline string JoinPlatform(int platform)
return result;
}
inline int SplitPlatfrom(const vector<string>& features)
inline int SplitPlatform(const vector<string>& features)
{
int result = 0;
......@@ -419,7 +419,7 @@ InstallPath(install_path)
return;
}
Platform = SplitPlatfrom(features);
Platform = SplitPlatform(features);
if (PLATFORM_UNKNOWN != Platform)
{
switch (Platform)
......
......@@ -170,7 +170,7 @@ TEST(CpuID, CheckVFPv3)
EXPECT_TRUE(cpu_id & FEATURES_HAS_VFPv3);
}
TEST(PlatfromDetector, CheckTegra)
TEST(PlatformDetector, CheckTegra)
{
EXPECT_NE(PLATFORM_UNKNOWN, DetectKnownPlatforms());
}
......
......@@ -90,28 +90,28 @@ public class ManagerActivity extends Activity
mInstalledPackageView.setAdapter(mInstalledPacksAdapter);
TextView HardwarePlatformView = (TextView)findViewById(R.id.HardwareValue);
int Platfrom = HardwareDetector.DetectKnownPlatforms();
int Platform = HardwareDetector.DetectKnownPlatforms();
int CpuId = HardwareDetector.GetCpuID();
if (HardwareDetector.PLATFORM_UNKNOWN != Platfrom)
if (HardwareDetector.PLATFORM_UNKNOWN != Platform)
{
if (HardwareDetector.PLATFORM_TEGRA == Platfrom)
if (HardwareDetector.PLATFORM_TEGRA == Platform)
{
HardwarePlatformView.setText("Tegra");
}
else if (HardwareDetector.PLATFORM_TEGRA2 == Platfrom)
else if (HardwareDetector.PLATFORM_TEGRA2 == Platform)
{
HardwarePlatformView.setText("Tegra 2");
}
else if (HardwareDetector.PLATFORM_TEGRA3 == Platfrom)
else if (HardwareDetector.PLATFORM_TEGRA3 == Platform)
{
HardwarePlatformView.setText("Tegra 3");
}
else if (HardwareDetector.PLATFORM_TEGRA4i == Platfrom)
else if (HardwareDetector.PLATFORM_TEGRA4i == Platform)
{
HardwarePlatformView.setText("Tegra 4i");
}
else if (HardwareDetector.PLATFORM_TEGRA4 == Platfrom)
else if (HardwareDetector.PLATFORM_TEGRA4 == Platform)
{
HardwarePlatformView.setText("Tegra 4");
}
......
......@@ -14,20 +14,20 @@ manually using adb tool:
.. code-block:: sh
adb install OpenCV-2.4.7-android-sdk/apk/OpenCV_2.4.7_Manager_2.14_<platform>.apk
adb install OpenCV-2.4.7.1-android-sdk/apk/OpenCV_2.4.7.1_Manager_2.15_<platform>.apk
Use the table below to determine proper OpenCV Manager package for your device:
+------------------------------+--------------+----------------------------------------------------+
| Hardware Platform | Android ver. | Package name |
+==============================+==============+====================================================+
| armeabi-v7a (ARMv7-A + NEON) | >= 2.3 | OpenCV_2.4.7_Manager_2.14_armv7a-neon.apk |
+------------------------------+--------------+----------------------------------------------------+
| armeabi-v7a (ARMv7-A + NEON) | = 2.2 | OpenCV_2.4.7_Manager_2.14_armv7a-neon-android8.apk |
+------------------------------+--------------+----------------------------------------------------+
| armeabi (ARMv5, ARMv6) | >= 2.3 | OpenCV_2.4.7_Manager_2.14_armeabi.apk |
+------------------------------+--------------+----------------------------------------------------+
| Intel x86 | >= 2.3 | OpenCV_2.4.7_Manager_2.14_x86.apk |
+------------------------------+--------------+----------------------------------------------------+
| MIPS | >= 2.3 | OpenCV_2.4.7_Manager_2.14_mips.apk |
+------------------------------+--------------+----------------------------------------------------+
+------------------------------+--------------+------------------------------------------------------+
| Hardware Platform | Android ver. | Package name |
+==============================+==============+======================================================+
| armeabi-v7a (ARMv7-A + NEON) | >= 2.3 | OpenCV_2.4.7.1_Manager_2.15_armv7a-neon.apk |
+------------------------------+--------------+------------------------------------------------------+
| armeabi-v7a (ARMv7-A + NEON) | = 2.2 | OpenCV_2.4.7.1_Manager_2.15_armv7a-neon-android8.apk |
+------------------------------+--------------+------------------------------------------------------+
| armeabi (ARMv5, ARMv6) | >= 2.3 | OpenCV_2.4.7.1_Manager_2.15_armeabi.apk |
+------------------------------+--------------+------------------------------------------------------+
| Intel x86 | >= 2.3 | OpenCV_2.4.7.1_Manager_2.15_x86.apk |
+------------------------------+--------------+------------------------------------------------------+
| MIPS | >= 2.3 | OpenCV_2.4.7.1_Manager_2.15_mips.apk |
+------------------------------+--------------+------------------------------------------------------+
......@@ -29,6 +29,10 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND)
ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/cudafilters/include")
endif()
if(HAVE_opencv_ocl)
ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/ocl/include")
endif()
if(CMAKE_COMPILER_IS_GNUCXX AND NOT ENABLE_NOISY_WARNINGS)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-function")
endif()
......@@ -56,6 +60,10 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND)
target_link_libraries(${the_target} opencv_cudaarithm opencv_cudafilters)
endif()
if(HAVE_opencv_ocl)
target_link_libraries(${the_target} opencv_ocl)
endif()
set_target_properties(${the_target} PROPERTIES
OUTPUT_NAME "cpp-${sample_kind}-${name}"
PROJECT_LABEL "(${sample_KIND}) ${name}")
......
#include "opencv2/opencv_modules.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/features2d/features2d.hpp"
#include "opencv2/nonfree/nonfree.hpp"
#include "opencv2/ml/ml.hpp"
#ifdef HAVE_OPENCV_OCL
#define _OCL_SVM_ 1 //select whether using ocl::svm method or not, default is using
#include "opencv2/ocl/ocl.hpp"
#endif
#include <fstream>
#include <iostream>
......@@ -2373,9 +2378,15 @@ static void setSVMTrainAutoParams( CvParamGrid& c_grid, CvParamGrid& gamma_grid,
degree_grid.step = 0;
}
#if defined HAVE_OPENCV_OCL && _OCL_SVM_
static void trainSVMClassifier( cv::ocl::CvSVM_OCL& svm, const SVMTrainParamsExt& svmParamsExt, const string& objClassName, VocData& vocData,
Ptr<BOWImgDescriptorExtractor>& bowExtractor, const Ptr<FeatureDetector>& fdetector,
const string& resPath )
#else
static void trainSVMClassifier( CvSVM& svm, const SVMTrainParamsExt& svmParamsExt, const string& objClassName, VocData& vocData,
Ptr<BOWImgDescriptorExtractor>& bowExtractor, const Ptr<FeatureDetector>& fdetector,
const string& resPath )
#endif
{
/* first check if a previously trained svm for the current class has been saved to file */
string svmFilename = resPath + svmsDir + "/" + objClassName + ".xml.gz";
......@@ -2448,9 +2459,15 @@ static void trainSVMClassifier( CvSVM& svm, const SVMTrainParamsExt& svmParamsEx
}
}
#if defined HAVE_OPENCV_OCL && _OCL_SVM_
static void computeConfidences( cv::ocl::CvSVM_OCL& svm, const string& objClassName, VocData& vocData,
Ptr<BOWImgDescriptorExtractor>& bowExtractor, const Ptr<FeatureDetector>& fdetector,
const string& resPath )
#else
static void computeConfidences( CvSVM& svm, const string& objClassName, VocData& vocData,
Ptr<BOWImgDescriptorExtractor>& bowExtractor, const Ptr<FeatureDetector>& fdetector,
const string& resPath )
#endif
{
cout << "*** CALCULATING CONFIDENCES FOR CLASS " << objClassName << " ***" << endl;
cout << "CALCULATING BOW VECTORS FOR TEST SET OF " << objClassName << "..." << endl;
......@@ -2589,7 +2606,11 @@ int main(int argc, char** argv)
for( size_t classIdx = 0; classIdx < objClasses.size(); ++classIdx )
{
// Train a classifier on train dataset
#if defined HAVE_OPENCV_OCL && _OCL_SVM_
cv::ocl::CvSVM_OCL svm;
#else
CvSVM svm;
#endif
trainSVMClassifier( svm, svmTrainParamsExt, objClasses[classIdx], vocData,
bowExtractor, featureDetector, resPath );
......
#include "opencv2/opencv_modules.hpp"
#include "opencv2/core/core.hpp"
#include "opencv2/ml/ml.hpp"
#include "opencv2/highgui/highgui.hpp"
#ifdef HAVE_OPENCV_OCL
#define _OCL_KNN_ 1 // select whether using ocl::KNN method or not, default is using
#define _OCL_SVM_ 1 // select whether using ocl::svm method or not, default is using
#include "opencv2/ocl/ocl.hpp"
#endif
#include <stdio.h>
......@@ -133,7 +139,14 @@ static void find_decision_boundary_KNN( int K )
prepare_train_data( trainSamples, trainClasses );
// learn classifier
#if defined HAVE_OPENCV_OCL && _OCL_KNN_
cv::ocl::KNearestNeighbour knnClassifier;
Mat temp, result;
knnClassifier.train(trainSamples, trainClasses, temp, false, K);
cv::ocl::oclMat testSample_ocl, reslut_ocl;
#else
CvKNearest knnClassifier( trainSamples, trainClasses, Mat(), false, K );
#endif
Mat testSample( 1, 2, CV_32FC1 );
for( int y = 0; y < img.rows; y += testStep )
......@@ -142,9 +155,19 @@ static void find_decision_boundary_KNN( int K )
{
testSample.at<float>(0) = (float)x;
testSample.at<float>(1) = (float)y;
#if defined HAVE_OPENCV_OCL && _OCL_KNN_
testSample_ocl.upload(testSample);
knnClassifier.find_nearest(testSample_ocl, K, reslut_ocl);
reslut_ocl.download(result);
int response = saturate_cast<int>(result.at<float>(0));
circle(imgDst, Point(x, y), 1, classColors[response]);
#else
int response = (int)knnClassifier.find_nearest( testSample, K );
circle( imgDst, Point(x,y), 1, classColors[response] );
#endif
}
}
}
......@@ -159,7 +182,11 @@ static void find_decision_boundary_SVM( CvSVMParams params )
prepare_train_data( trainSamples, trainClasses );
// learn classifier
#if defined HAVE_OPENCV_OCL && _OCL_SVM_
cv::ocl::CvSVM_OCL svmClassifier(trainSamples, trainClasses, Mat(), Mat(), params);
#else
CvSVM svmClassifier( trainSamples, trainClasses, Mat(), Mat(), params );
#endif
Mat testSample( 1, 2, CV_32FC1 );
for( int y = 0; y < img.rows; y += testStep )
......@@ -178,7 +205,7 @@ static void find_decision_boundary_SVM( CvSVMParams params )
for( int i = 0; i < svmClassifier.get_support_vector_count(); i++ )
{
const float* supportVector = svmClassifier.get_support_vector(i);
circle( imgDst, Point(supportVector[0],supportVector[1]), 5, Scalar(255,255,255), -1 );
circle( imgDst, Point(saturate_cast<int>(supportVector[0]),saturate_cast<int>(supportVector[1])), 5, CV_RGB(255,255,255), -1 );
}
}
......
......@@ -8,11 +8,16 @@
#include <iostream>
#include <stdio.h>
#if defined(_MSC_VER) && (_MSC_VER >= 1700)
# include <thread>
#endif
using namespace std;
using namespace cv;
#define LOOP_NUM 1
///////////////////////////single-threading faces detecting///////////////////////////////
const static Scalar colors[] = { CV_RGB(0,0,255),
CV_RGB(0,128,255),
CV_RGB(0,255,255),
......@@ -26,7 +31,7 @@ const static Scalar colors[] = { CV_RGB(0,0,255),
int64 work_begin = 0;
int64 work_end = 0;
string outputName;
string inputName, outputName, cascadeName;
static void workBegin()
{
......@@ -61,41 +66,17 @@ static void Draw(Mat& img, vector<Rect>& faces, double scale);
// Else if will return (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels)
double checkRectSimilarity(Size sz, vector<Rect>& cpu_rst, vector<Rect>& gpu_rst);
int main( int argc, const char** argv )
static int facedetect_one_thread(bool useCPU, double scale )
{
const char* keys =
"{ h help | false | print help message }"
"{ i input | | specify input image }"
"{ t template | haarcascade_frontalface_alt.xml |"
" specify template file path }"
"{ c scale | 1.0 | scale image }"
"{ s use_cpu | false | use cpu or gpu to process the image }"
"{ o output | facedetect_output.jpg |"
" specify output image save path(only works when input is images) }";
CommandLineParser cmd(argc, argv, keys);
if (cmd.get<bool>("help"))
{
cout << "Usage : facedetect [options]" << endl;
cout << "Available options:" << endl;
cmd.printMessage();
return EXIT_SUCCESS;
}
CvCapture* capture = 0;
Mat frame, frameCopy0, frameCopy, image;
bool useCPU = cmd.get<bool>("s");
string inputName = cmd.get<string>("i");
outputName = cmd.get<string>("o");
string cascadeName = cmd.get<string>("t");
double scale = cmd.get<double>("c");
ocl::OclCascadeClassifier cascade;
CascadeClassifier cpu_cascade;
if( !cascade.load( cascadeName ) || !cpu_cascade.load(cascadeName) )
{
cout << "ERROR: Could not load classifier cascade" << endl;
cout << "ERROR: Could not load classifier cascade: " << cascadeName << endl;
return EXIT_FAILURE;
}
......@@ -186,9 +167,114 @@ int main( int argc, const char** argv )
}
cvDestroyWindow("result");
std::cout<< "single-threaded sample has finished" <<std::endl;
return 0;
}
///////////////////////////////////////detectfaces with multithreading////////////////////////////////////////////
#if defined(_MSC_VER) && (_MSC_VER >= 1700)
#define MAX_THREADS 10
static void detectFaces(std::string fileName)
{
ocl::OclCascadeClassifier cascade;
if(!cascade.load(cascadeName))
{
std::cout << "ERROR: Could not load classifier cascade: " << cascadeName << std::endl;
return;
}
Mat img = imread(fileName, CV_LOAD_IMAGE_COLOR);
if (img.empty())
{
std::cout << "cann't open file " + fileName <<std::endl;
return;
}
ocl::oclMat d_img;
d_img.upload(img);
std::vector<Rect> oclfaces;
cascade.detectMultiScale(d_img, oclfaces, 1.1, 3, 0|CV_HAAR_SCALE_IMAGE, Size(30, 30), Size(0, 0));
for(unsigned int i = 0; i<oclfaces.size(); i++)
rectangle(img, Point(oclfaces[i].x, oclfaces[i].y), Point(oclfaces[i].x + oclfaces[i].width, oclfaces[i].y + oclfaces[i].height), colors[i%8], 3);
std::string::size_type pos = outputName.rfind('.');
std::string outputNameTid = outputName + '-' + std::to_string(_threadid);
if(pos == std::string::npos)
{
std::cout << "Invalid output file name: " << outputName << std::endl;
}
else
{
outputNameTid = outputName.substr(0, pos) + "_" + std::to_string(_threadid) + outputName.substr(pos);
imwrite(outputNameTid, img);
}
imshow(outputNameTid, img);
waitKey(0);
}
static void facedetect_multithreading(int nthreads)
{
int thread_number = MAX_THREADS < nthreads ? MAX_THREADS : nthreads;
std::vector<std::thread> threads;
for(int i = 0; i<thread_number; i++)
threads.push_back(std::thread(detectFaces, inputName));
for(int i = 0; i<thread_number; i++)
threads[i].join();
}
#endif
int main( int argc, const char** argv )
{
const char* keys =
"{ h help | false | print help message }"
"{ i input | | specify input image }"
"{ t template | haarcascade_frontalface_alt.xml |"
" specify template file path }"
"{ c scale | 1.0 | scale image }"
"{ s use_cpu | false | use cpu or gpu to process the image }"
"{ o output | facedetect_output.jpg |"
" specify output image save path(only works when input is images) }"
"{ n thread_num | 1 | set number of threads >= 1 }";
CommandLineParser cmd(argc, argv, keys);
if (cmd.has("help"))
{
cout << "Usage : facedetect [options]" << endl;
cout << "Available options:" << endl;
cmd.printMessage();
return EXIT_SUCCESS;
}
bool useCPU = cmd.get<bool>("s");
inputName = cmd.get<string>("i");
outputName = cmd.get<string>("o");
cascadeName = cmd.get<string>("t");
double scale = cmd.get<double>("c");
int n = cmd.get<int>("n");
if(n > 1)
{
#if defined(_MSC_VER) && (_MSC_VER >= 1700)
std::cout<<"multi-threaded sample is running" <<std::endl;
facedetect_multithreading(n);
std::cout<<"multi-threaded sample has finished" <<std::endl;
return 0;
#else
std::cout << "std::thread is not supported, running a single-threaded version" << std::endl;
#endif
}
if (n<0)
std::cout<<"incorrect number of threads:" << n << ", running a single-threaded version" <<std::endl;
else
std::cout<<"single-threaded sample is running" <<std::endl;
return facedetect_one_thread(useCPU, scale);
}
void detect( Mat& img, vector<Rect>& faces,
ocl::OclCascadeClassifier& cascade,
double scale)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册