diff --git a/3rdparty/include/opencl/1.2/CL/cl_platform.h b/3rdparty/include/opencl/1.2/CL/cl_platform.h index 46b3d9dcdc33379f79a72ee208025c6351e29e4b..e94949a31cbab68f0c761438fd33b79ccc0366f4 100644 --- a/3rdparty/include/opencl/1.2/CL/cl_platform.h +++ b/3rdparty/include/opencl/1.2/CL/cl_platform.h @@ -454,7 +454,7 @@ typedef unsigned int cl_GLenum; /* Define alignment keys */ #if defined( __GNUC__ ) #define CL_ALIGNED(_x) __attribute__ ((aligned(_x))) -#elif defined( _WIN32) && (_MSC_VER) +#elif defined( _WIN32) && defined(_MSC_VER) /* Alignment keys neutered on windows because MSVC can't swallow function arguments with alignment requirements */ /* http://msdn.microsoft.com/en-us/library/373ak2y1%28VS.71%29.aspx */ /* #include */ diff --git a/apps/traincascade/imagestorage.cpp b/apps/traincascade/imagestorage.cpp index 528f9c167d3e820636f3ac9b9648a7842c772b17..e69a7df1ad485f2dfcf390e5afe556889e465db1 100644 --- a/apps/traincascade/imagestorage.cpp +++ b/apps/traincascade/imagestorage.cpp @@ -70,7 +70,7 @@ bool CvCascadeImageReader::NegReader::nextImg() _offset.x = std::min( (int)round % winSize.width, src.cols - winSize.width ); _offset.y = std::min( (int)round / winSize.width, src.rows - winSize.height ); if( !src.empty() && src.type() == CV_8UC1 - && offset.x >= 0 && offset.y >= 0 ) + && _offset.x >= 0 && _offset.y >= 0 ) break; } diff --git a/doc/tutorials/introduction/crosscompilation/arm_crosscompile_with_cmake.rst b/doc/tutorials/introduction/crosscompilation/arm_crosscompile_with_cmake.rst index c40b86c9740d75c6dcdb9cbd8eb9f547286ca251..0b2253aceac6e88f2de0768834159602fd267574 100644 --- a/doc/tutorials/introduction/crosscompilation/arm_crosscompile_with_cmake.rst +++ b/doc/tutorials/introduction/crosscompilation/arm_crosscompile_with_cmake.rst @@ -105,7 +105,7 @@ Building OpenCV Enable hardware optimizations ----------------------------- -Depending on target platfrom architecture different instruction sets can be used. By default +Depending on target platform architecture different instruction sets can be used. By default compiler generates code for armv5l without VFPv3 and NEON extensions. Add ``-DUSE_VFPV3=ON`` to cmake command line to enable code generation for VFPv3 and ``-DUSE_NEON=ON`` for using NEON SIMD extensions. diff --git a/modules/cudaimgproc/src/color.cpp b/modules/cudaimgproc/src/color.cpp index 955c5ef374820f2f6db4faedef8fe85ae70ffe84..a06b746a7f61c6b48b61f0a44dcbef595f7f4cac 100644 --- a/modules/cudaimgproc/src/color.cpp +++ b/modules/cudaimgproc/src/color.cpp @@ -2110,6 +2110,8 @@ void cv::cuda::cvtColor(InputArray src, OutputArray dst, int code, int dcn, Stre void cv::cuda::demosaicing(InputArray _src, OutputArray _dst, int code, int dcn, Stream& stream) { + CV_Assert( !_src.empty() ); + switch (code) { case cv::COLOR_BayerBG2GRAY: case cv::COLOR_BayerGB2GRAY: case cv::COLOR_BayerRG2GRAY: case cv::COLOR_BayerGR2GRAY: diff --git a/modules/cudaimgproc/test/test_color.cpp b/modules/cudaimgproc/test/test_color.cpp index 6816cc8fd86c76c93c965ed5341248f347ff2058..9188725020a85a9065774493de459a93c4f7a0e5 100644 --- a/modules/cudaimgproc/test/test_color.cpp +++ b/modules/cudaimgproc/test/test_color.cpp @@ -2357,6 +2357,7 @@ struct Demosaicing : testing::TestWithParam CUDA_TEST_P(Demosaicing, BayerBG2BGR) { cv::Mat img = readImage("stereobm/aloe-L.png"); + ASSERT_FALSE(img.empty()) << "Can't load input image"; cv::Mat_ src; mosaic(img, src, cv::Point(1, 1)); @@ -2370,6 +2371,7 @@ CUDA_TEST_P(Demosaicing, BayerBG2BGR) CUDA_TEST_P(Demosaicing, BayerGB2BGR) { cv::Mat img = readImage("stereobm/aloe-L.png"); + ASSERT_FALSE(img.empty()) << "Can't load input image"; cv::Mat_ src; mosaic(img, src, cv::Point(0, 1)); @@ -2383,6 +2385,7 @@ CUDA_TEST_P(Demosaicing, BayerGB2BGR) CUDA_TEST_P(Demosaicing, BayerRG2BGR) { cv::Mat img = readImage("stereobm/aloe-L.png"); + ASSERT_FALSE(img.empty()) << "Can't load input image"; cv::Mat_ src; mosaic(img, src, cv::Point(0, 0)); @@ -2396,6 +2399,7 @@ CUDA_TEST_P(Demosaicing, BayerRG2BGR) CUDA_TEST_P(Demosaicing, BayerGR2BGR) { cv::Mat img = readImage("stereobm/aloe-L.png"); + ASSERT_FALSE(img.empty()) << "Can't load input image"; cv::Mat_ src; mosaic(img, src, cv::Point(1, 0)); @@ -2409,6 +2413,7 @@ CUDA_TEST_P(Demosaicing, BayerGR2BGR) CUDA_TEST_P(Demosaicing, BayerBG2BGR_MHT) { cv::Mat img = readImage("stereobm/aloe-L.png"); + ASSERT_FALSE(img.empty()) << "Can't load input image"; cv::Mat_ src; mosaic(img, src, cv::Point(1, 1)); @@ -2422,6 +2427,7 @@ CUDA_TEST_P(Demosaicing, BayerBG2BGR_MHT) CUDA_TEST_P(Demosaicing, BayerGB2BGR_MHT) { cv::Mat img = readImage("stereobm/aloe-L.png"); + ASSERT_FALSE(img.empty()) << "Can't load input image"; cv::Mat_ src; mosaic(img, src, cv::Point(0, 1)); @@ -2435,6 +2441,7 @@ CUDA_TEST_P(Demosaicing, BayerGB2BGR_MHT) CUDA_TEST_P(Demosaicing, BayerRG2BGR_MHT) { cv::Mat img = readImage("stereobm/aloe-L.png"); + ASSERT_FALSE(img.empty()) << "Can't load input image"; cv::Mat_ src; mosaic(img, src, cv::Point(0, 0)); @@ -2448,6 +2455,7 @@ CUDA_TEST_P(Demosaicing, BayerRG2BGR_MHT) CUDA_TEST_P(Demosaicing, BayerGR2BGR_MHT) { cv::Mat img = readImage("stereobm/aloe-L.png"); + ASSERT_FALSE(img.empty()) << "Can't load input image"; cv::Mat_ src; mosaic(img, src, cv::Point(1, 0)); diff --git a/modules/features2d/src/orb.cpp b/modules/features2d/src/orb.cpp index a6e8ffab3c8143334e8c1a14397d26b1332e2fc5..263c27350b5ba6ac47724939821b1410855e83cf 100644 --- a/modules/features2d/src/orb.cpp +++ b/modules/features2d/src/orb.cpp @@ -141,12 +141,12 @@ static void computeOrbDescriptor(const KeyPoint& kpt, float x, y; int ix, iy; #if 1 -#define GET_VALUE(idx) \ - (x = pattern[idx].x*a - pattern[idx].y*b, \ - y = pattern[idx].x*b + pattern[idx].y*a, \ - ix = cvRound(x), \ - iy = cvRound(y), \ - *(center + iy*step + ix) ) + #define GET_VALUE(idx) \ + (x = pattern[idx].x*a - pattern[idx].y*b, \ + y = pattern[idx].x*b + pattern[idx].y*a, \ + ix = cvRound(x), \ + iy = cvRound(y), \ + *(center + iy*step + ix) ) #else #define GET_VALUE(idx) \ (x = pattern[idx].x*a - pattern[idx].y*b, \ diff --git a/modules/highgui/src/window_gtk.cpp b/modules/highgui/src/window_gtk.cpp index 79ae638cfc4f7f60f1a18a749e04395d82593fc4..02c25644465c12ad8a9f1b769c04df5cd13874d5 100644 --- a/modules/highgui/src/window_gtk.cpp +++ b/modules/highgui/src/window_gtk.cpp @@ -1552,9 +1552,9 @@ static gboolean icvOnMouse( GtkWidget *widget, GdkEvent *event, gpointer user_da // image origin is not necessarily at (0,0) int x0 = (widget->allocation.width - image_widget->scaled_image->cols)/2; int y0 = (widget->allocation.height - image_widget->scaled_image->rows)/2; - pt.x = cvRound( ((pt32f.x-x0)*image_widget->original_image->cols)/ + pt.x = cvFloor( ((pt32f.x-x0)*image_widget->original_image->cols)/ image_widget->scaled_image->cols ); - pt.y = cvRound( ((pt32f.y-y0)*image_widget->original_image->rows)/ + pt.y = cvFloor( ((pt32f.y-y0)*image_widget->original_image->rows)/ image_widget->scaled_image->rows ); } else{ diff --git a/modules/imgproc/src/samplers.cpp b/modules/imgproc/src/samplers.cpp index 81cd23afc0e1ac689dc6627e512ebe0da4a0d65e..d6cc8a56fea5b6a32e643c1a74196d5b1cc47d1f 100644 --- a/modules/imgproc/src/samplers.cpp +++ b/modules/imgproc/src/samplers.cpp @@ -64,7 +64,7 @@ adjustRect( const uchar* src, size_t src_step, int pix_size, rect.x = win_size.width; } - if( ip.x + win_size.width < src_size.width ) + if( ip.x < src_size.width - win_size.width ) rect.width = win_size.width; else { @@ -85,7 +85,7 @@ adjustRect( const uchar* src, size_t src_step, int pix_size, else rect.y = -ip.y; - if( ip.y + win_size.height < src_size.height ) + if( ip.y < src_size.height - win_size.height ) rect.height = win_size.height; else { @@ -155,8 +155,8 @@ void getRectSubPix_Cn_(const _Tp* src, size_t src_step, Size src_size, src_step /= sizeof(src[0]); dst_step /= sizeof(dst[0]); - if( 0 <= ip.x && ip.x + win_size.width < src_size.width && - 0 <= ip.y && ip.y + win_size.height < src_size.height ) + if( 0 <= ip.x && ip.x < src_size.width - win_size.width && + 0 <= ip.y && ip.y < src_size.height - win_size.height) { // extracted rectangle is totally inside the image src += ip.y * src_step + ip.x*cn; diff --git a/modules/ocl/doc/data_structures.rst b/modules/ocl/doc/data_structures.rst index 01a16739bafd022d927038bcd56a67492708b229..bde3d14af47b81089a21a5c58da0a3045a58fb11 100644 --- a/modules/ocl/doc/data_structures.rst +++ b/modules/ocl/doc/data_structures.rst @@ -144,14 +144,6 @@ OpenCV C++ 1-D or 2-D dense array class :: //! returns true if oclMatrix data is NULL bool empty() const; - //! returns pointer to y-th row - uchar* ptr(int y = 0); - const uchar *ptr(int y = 0) const; - - //! template version of the above method - template _Tp *ptr(int y = 0); - template const _Tp *ptr(int y = 0) const; - //! matrix transposition oclMat t() const; diff --git a/modules/ocl/include/opencv2/ocl.hpp b/modules/ocl/include/opencv2/ocl.hpp index 19af00bef4fa43164c412e3f389a524c7fe81cbc..1f03170e5f8e7480b328f12b03f7bff9b65514db 100644 --- a/modules/ocl/include/opencv2/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl.hpp @@ -378,14 +378,6 @@ namespace cv //! returns true if oclMatrix data is NULL bool empty() const; - //! returns pointer to y-th row - uchar* ptr(int y = 0); - const uchar *ptr(int y = 0) const; - - //! template version of the above method - template _Tp *ptr(int y = 0); - template const _Tp *ptr(int y = 0) const; - //! matrix transposition oclMat t() const; diff --git a/modules/ocl/include/opencv2/ocl/matrix_operations.hpp b/modules/ocl/include/opencv2/ocl/matrix_operations.hpp index 76db479035a0803d726b2b1d6ca6fcb38e625339..ab1fe4fb7036f38e391af063fd11ae319f01ac9d 100644 --- a/modules/ocl/include/opencv2/ocl/matrix_operations.hpp +++ b/modules/ocl/include/opencv2/ocl/matrix_operations.hpp @@ -456,36 +456,6 @@ namespace cv return data == 0; } - - - inline uchar *oclMat::ptr(int y) - { - CV_DbgAssert( (unsigned)y < (unsigned)rows ); - CV_Error(Error::GpuNotSupported, "This function hasn't been supported yet.\n"); - return data + step * y; - } - - inline const uchar *oclMat::ptr(int y) const - { - CV_DbgAssert( (unsigned)y < (unsigned)rows ); - CV_Error(Error::GpuNotSupported, "This function hasn't been supported yet.\n"); - return data + step * y; - } - - template inline _Tp *oclMat::ptr(int y) - { - CV_DbgAssert( (unsigned)y < (unsigned)rows ); - CV_Error(Error::GpuNotSupported, "This function hasn't been supported yet.\n"); - return (_Tp *)(data + step * y); - } - - template inline const _Tp *oclMat::ptr(int y) const - { - CV_DbgAssert( (unsigned)y < (unsigned)rows ); - CV_Error(Error::GpuNotSupported, "This function hasn't been supported yet.\n"); - return (const _Tp *)(data + step * y); - } - inline oclMat oclMat::t() const { oclMat tmp; diff --git a/modules/ocl/perf/perf_moments.cpp b/modules/ocl/perf/perf_moments.cpp index c5d616f83dd81afa848c3a9f2e9a4e3645c656e9..631031ecb49991f49240bbfca044569b617a3353 100644 --- a/modules/ocl/perf/perf_moments.cpp +++ b/modules/ocl/perf/perf_moments.cpp @@ -73,10 +73,10 @@ PERF_TEST_P(MomentsFixture, Moments, Mat src(srcSize, type), dst(7, 1, CV_64F); randu(src, 0, 255); - oclMat src_d(src); cv::Moments mom; if (RUN_OCL_IMPL) { + oclMat src_d(src); OCL_TEST_CYCLE() mom = cv::ocl::ocl_moments(src_d, binaryImage); } else if (RUN_PLAIN_IMPL) diff --git a/modules/ocl/src/brute_force_matcher.cpp b/modules/ocl/src/brute_force_matcher.cpp index d0e09320de675c83d1eb847938a503bdba228388..ca16f43462759d8952f15ede91ca34060c6b2c7e 100644 --- a/modules/ocl/src/brute_force_matcher.cpp +++ b/modules/ocl/src/brute_force_matcher.cpp @@ -676,7 +676,7 @@ void cv::ocl::BruteForceMatcher_OCL_base::matchCollection(const oclMat &query, c ensureSizeIsEnough(1, nQuery, CV_32S, imgIdx); ensureSizeIsEnough(1, nQuery, CV_32F, distance); - matchDispatcher(query, (const oclMat *)trainCollection.ptr(), trainCollection.cols, masks, trainIdx, imgIdx, distance, distType); + matchDispatcher(query, &trainCollection, trainCollection.cols, masks, trainIdx, imgIdx, distance, distType); return; } diff --git a/modules/ocl/src/cl_operations.cpp b/modules/ocl/src/cl_operations.cpp index 1e415772355132832397b5d5cb8fb7dc139c90c4..aa44c48749da89716b2cb2e572e8d4614d6387a8 100644 --- a/modules/ocl/src/cl_operations.cpp +++ b/modules/ocl/src/cl_operations.cpp @@ -290,8 +290,9 @@ void openCLFree(void *devPtr) } #else // TODO FIXIT Attach clReleaseMemObject call to event completion callback - Context* ctx = Context::getContext(); - clFinish(getClCommandQueue(ctx)); + // TODO 2013/12/04 Disable workaround + // Context* ctx = Context::getContext(); + // clFinish(getClCommandQueue(ctx)); #endif openCLSafeCall(clReleaseMemObject(data.mainBuffer)); } diff --git a/modules/ocl/src/opencl/haarobjectdetect.cl b/modules/ocl/src/opencl/haarobjectdetect.cl index a62b3af8cbf2eb0adc979e7b480d69fecd40f795..980e85dd27cba7ac7264800dc1fcbf9b8d2746f5 100644 --- a/modules/ocl/src/opencl/haarobjectdetect.cl +++ b/modules/ocl/src/opencl/haarobjectdetect.cl @@ -62,13 +62,13 @@ typedef struct __attribute__((aligned (128) )) GpuHidHaarTreeNode GpuHidHaarTreeNode; -typedef struct __attribute__((aligned (32))) GpuHidHaarClassifier -{ - int count __attribute__((aligned (4))); - GpuHidHaarTreeNode* node __attribute__((aligned (8))); - float* alpha __attribute__((aligned (8))); -} -GpuHidHaarClassifier; +//typedef struct __attribute__((aligned (32))) GpuHidHaarClassifier +//{ +// int count __attribute__((aligned (4))); +// GpuHidHaarTreeNode* node __attribute__((aligned (8))); +// float* alpha __attribute__((aligned (8))); +//} +//GpuHidHaarClassifier; typedef struct __attribute__((aligned (64))) GpuHidHaarStageClassifier @@ -84,22 +84,22 @@ typedef struct __attribute__((aligned (64))) GpuHidHaarStageClassifier GpuHidHaarStageClassifier; -typedef struct __attribute__((aligned (64))) GpuHidHaarClassifierCascade -{ - int count __attribute__((aligned (4))); - int is_stump_based __attribute__((aligned (4))); - int has_tilted_features __attribute__((aligned (4))); - int is_tree __attribute__((aligned (4))); - int pq0 __attribute__((aligned (4))); - int pq1 __attribute__((aligned (4))); - int pq2 __attribute__((aligned (4))); - int pq3 __attribute__((aligned (4))); - int p0 __attribute__((aligned (4))); - int p1 __attribute__((aligned (4))); - int p2 __attribute__((aligned (4))); - int p3 __attribute__((aligned (4))); - float inv_window_area __attribute__((aligned (4))); -} GpuHidHaarClassifierCascade; +//typedef struct __attribute__((aligned (64))) GpuHidHaarClassifierCascade +//{ +// int count __attribute__((aligned (4))); +// int is_stump_based __attribute__((aligned (4))); +// int has_tilted_features __attribute__((aligned (4))); +// int is_tree __attribute__((aligned (4))); +// int pq0 __attribute__((aligned (4))); +// int pq1 __attribute__((aligned (4))); +// int pq2 __attribute__((aligned (4))); +// int pq3 __attribute__((aligned (4))); +// int p0 __attribute__((aligned (4))); +// int p1 __attribute__((aligned (4))); +// int p2 __attribute__((aligned (4))); +// int p3 __attribute__((aligned (4))); +// float inv_window_area __attribute__((aligned (4))); +//} GpuHidHaarClassifierCascade; #ifdef PACKED_CLASSIFIER @@ -196,10 +196,12 @@ __kernel void gpuRunHaarClassifierCascadePacked( for(int stageloop = start_stage; (stageloop < end_stage) && result; stageloop++ ) {// iterate until candidate is exist float stage_sum = 0.0f; - int2 stageinfo = *(global int2*)(stagecascadeptr+stageloop); - float stagethreshold = as_float(stageinfo.y); + __global GpuHidHaarStageClassifier* stageinfo = (__global GpuHidHaarStageClassifier*) + ((__global uchar*)stagecascadeptr+stageloop*sizeof(GpuHidHaarStageClassifier)); + int stagecount = stageinfo->count; + float stagethreshold = stageinfo->threshold; int lcl_off = (lid_y*DATA_SIZE_X)+(lid_x); - for(int nodeloop = 0; nodeloop < stageinfo.x; nodecounter++,nodeloop++ ) + for(int nodeloop = 0; nodeloop < stagecount; nodecounter++,nodeloop++ ) { // simple macro to extract shorts from int #define M0(_t) ((_t)&0xFFFF) @@ -355,14 +357,17 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa variance_norm_factor = variance_norm_factor * correction - mean * mean; variance_norm_factor = variance_norm_factor >=0.f ? sqrt(variance_norm_factor) : 1.f; - for(int stageloop = start_stage; (stageloop < split_stage) && result; stageloop++ ) + for(int stageloop = start_stage; (stageloop < split_stage) && result; stageloop++ ) { float stage_sum = 0.f; - int2 stageinfo = *(global int2*)(stagecascadeptr+stageloop); - float stagethreshold = as_float(stageinfo.y); - for(int nodeloop = 0; nodeloop < stageinfo.x; ) + __global GpuHidHaarStageClassifier* stageinfo = (__global GpuHidHaarStageClassifier*) + ((__global uchar*)stagecascadeptr+stageloop*sizeof(GpuHidHaarStageClassifier)); + int stagecount = stageinfo->count; + float stagethreshold = stageinfo->threshold; + for(int nodeloop = 0; nodeloop < stagecount; ) { - __global GpuHidHaarTreeNode* currentnodeptr = (nodeptr + nodecounter); + __global GpuHidHaarTreeNode* currentnodeptr = (__global GpuHidHaarTreeNode*) + (((__global uchar*)nodeptr) + nodecounter * sizeof(GpuHidHaarTreeNode)); int4 info1 = *(__global int4*)(&(currentnodeptr->p[0][0])); int4 info2 = *(__global int4*)(&(currentnodeptr->p[1][0])); @@ -418,7 +423,7 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa #endif } - result = (stage_sum >= stagethreshold); + result = (stage_sum >= stagethreshold) ? 1 : 0; } if(factor < 2) { @@ -447,14 +452,17 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa lclcount[0]=0; barrier(CLK_LOCAL_MEM_FENCE); - int2 stageinfo = *(global int2*)(stagecascadeptr+stageloop); - float stagethreshold = as_float(stageinfo.y); + //int2 stageinfo = *(global int2*)(stagecascadeptr+stageloop); + __global GpuHidHaarStageClassifier* stageinfo = (__global GpuHidHaarStageClassifier*) + ((__global uchar*)stagecascadeptr+stageloop*sizeof(GpuHidHaarStageClassifier)); + int stagecount = stageinfo->count; + float stagethreshold = stageinfo->threshold; int perfscale = queuecount > 4 ? 3 : 2; int queuecount_loop = (queuecount + (1<> perfscale; int lcl_compute_win = lcl_sz >> perfscale; int lcl_compute_win_id = (lcl_id >>(6-perfscale)); - int lcl_loops = (stageinfo.x + lcl_compute_win -1) >> (6-perfscale); + int lcl_loops = (stagecount + lcl_compute_win -1) >> (6-perfscale); int lcl_compute_id = lcl_id - (lcl_compute_win_id << (6-perfscale)); for(int queueloop=0; queueloopp[0][0])); int4 info2 = *(__global int4*)(&(currentnodeptr->p[1][0])); @@ -549,7 +557,7 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa queuecount = lclcount[0]; barrier(CLK_LOCAL_MEM_FENCE); - nodecounter += stageinfo.x; + nodecounter += stagecount; }//end for(int stageloop = splitstage; stageloop< endstage && queuecount>0;stageloop++) if(lcl_id> 16; int totalgrp = scaleinfo1.y & 0xffff; float factor = as_float(scaleinfo1.w); @@ -174,15 +173,18 @@ __kernel void gpuRunHaarClassifierCascade_scaled2( for (int stageloop = start_stage; (stageloop < end_stage) && result; stageloop++) { float stage_sum = 0.f; - int stagecount = stagecascadeptr[stageloop].count; + __global GpuHidHaarStageClassifier* stageinfo = (__global GpuHidHaarStageClassifier*) + (((__global uchar*)stagecascadeptr_)+stageloop*sizeof(GpuHidHaarStageClassifier)); + int stagecount = stageinfo->count; for (int nodeloop = 0; nodeloop < stagecount;) { - __global GpuHidHaarTreeNode *currentnodeptr = (nodeptr + nodecounter); + __global GpuHidHaarTreeNode* currentnodeptr = (__global GpuHidHaarTreeNode*) + (((__global uchar*)nodeptr_) + nodecounter * sizeof(GpuHidHaarTreeNode)); int4 info1 = *(__global int4 *)(&(currentnodeptr->p[0][0])); int4 info2 = *(__global int4 *)(&(currentnodeptr->p[1][0])); int4 info3 = *(__global int4 *)(&(currentnodeptr->p[2][0])); float4 w = *(__global float4 *)(&(currentnodeptr->weight[0])); - float3 alpha3 = *(__global float3 *)(&(currentnodeptr->alpha[0])); + float3 alpha3 = *(__global float3*)(&(currentnodeptr->alpha[0])); float nodethreshold = w.w * variance_norm_factor; info1.x += p_offset; @@ -204,7 +206,7 @@ __kernel void gpuRunHaarClassifierCascade_scaled2( sum[clamp(mad24(info3.w, step, info3.x), 0, max_idx)] + sum[clamp(mad24(info3.w, step, info3.z), 0, max_idx)]) * w.z; - bool passThres = classsum >= nodethreshold; + bool passThres = (classsum >= nodethreshold) ? 1 : 0; #if STUMP_BASED stage_sum += passThres ? alpha3.y : alpha3.x; @@ -234,7 +236,8 @@ __kernel void gpuRunHaarClassifierCascade_scaled2( } #endif } - result = (int)(stage_sum >= stagecascadeptr[stageloop].threshold); + + result = (stage_sum >= stageinfo->threshold) ? 1 : 0; } barrier(CLK_LOCAL_MEM_FENCE); @@ -281,11 +284,14 @@ __kernel void gpuRunHaarClassifierCascade_scaled2( } } } -__kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuHidHaarTreeNode *newnode, float scale, float weight_scale, int nodenum) +__kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuHidHaarTreeNode *newnode, float scale, float weight_scale, const int nodenum) { - int counter = get_global_id(0); + const int counter = get_global_id(0); int tr_x[3], tr_y[3], tr_h[3], tr_w[3], i = 0; - GpuHidHaarTreeNode t1 = *(orinode + counter); + GpuHidHaarTreeNode t1 = *(__global GpuHidHaarTreeNode*) + (((__global uchar*)orinode) + counter * sizeof(GpuHidHaarTreeNode)); + __global GpuHidHaarTreeNode* pNew = (__global GpuHidHaarTreeNode*) + (((__global uchar*)newnode) + (counter + nodenum) * sizeof(GpuHidHaarTreeNode)); #pragma unroll for (i = 0; i < 3; i++) @@ -297,22 +303,21 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH } t1.weight[0] = -(t1.weight[1] * tr_h[1] * tr_w[1] + t1.weight[2] * tr_h[2] * tr_w[2]) / (tr_h[0] * tr_w[0]); - counter += nodenum; #pragma unroll for (i = 0; i < 3; i++) { - newnode[counter].p[i][0] = tr_x[i]; - newnode[counter].p[i][1] = tr_y[i]; - newnode[counter].p[i][2] = tr_x[i] + tr_w[i]; - newnode[counter].p[i][3] = tr_y[i] + tr_h[i]; - newnode[counter].weight[i] = t1.weight[i] * weight_scale; + pNew->p[i][0] = tr_x[i]; + pNew->p[i][1] = tr_y[i]; + pNew->p[i][2] = tr_x[i] + tr_w[i]; + pNew->p[i][3] = tr_y[i] + tr_h[i]; + pNew->weight[i] = t1.weight[i] * weight_scale; } - newnode[counter].left = t1.left; - newnode[counter].right = t1.right; - newnode[counter].threshold = t1.threshold; - newnode[counter].alpha[0] = t1.alpha[0]; - newnode[counter].alpha[1] = t1.alpha[1]; - newnode[counter].alpha[2] = t1.alpha[2]; + pNew->left = t1.left; + pNew->right = t1.right; + pNew->threshold = t1.threshold; + pNew->alpha[0] = t1.alpha[0]; + pNew->alpha[1] = t1.alpha[1]; + pNew->alpha[2] = t1.alpha[2]; } diff --git a/modules/ocl/src/opencl/imgproc_threshold.cl b/modules/ocl/src/opencl/imgproc_threshold.cl index 63e410297b372b5a89829a8876030c00c3b98f86..85631be368b6780ef86c7e35ba947f2782bc9365 100644 --- a/modules/ocl/src/opencl/imgproc_threshold.cl +++ b/modules/ocl/src/opencl/imgproc_threshold.cl @@ -74,11 +74,11 @@ __kernel void threshold(__global const T * restrict src, int src_offset, int src VT vthresh = (VT)(thresh); #ifdef THRESH_BINARY - VT vecValue = sdata > vthresh ? max_val : (VT)(0); + VT vecValue = sdata > vthresh ? (VT)max_val : (VT)(0); #elif defined THRESH_BINARY_INV - VT vecValue = sdata > vthresh ? (VT)(0) : max_val; + VT vecValue = sdata > vthresh ? (VT)(0) : (VT)max_val; #elif defined THRESH_TRUNC - VT vecValue = sdata > vthresh ? thresh : sdata; + VT vecValue = sdata > vthresh ? (VT)thresh : sdata; #elif defined THRESH_TOZERO VT vecValue = sdata > vthresh ? sdata : (VT)(0); #elif defined THRESH_TOZERO_INV diff --git a/platforms/android/service/engine/AndroidManifest.xml b/platforms/android/service/engine/AndroidManifest.xml index 162d31eb02866200337da3efb46b961418bc87da..7cae6ce8a027204ff2d1d48bb24fbcba56fc58a6 100644 --- a/platforms/android/service/engine/AndroidManifest.xml +++ b/platforms/android/service/engine/AndroidManifest.xml @@ -1,8 +1,8 @@ + android:versionCode="216@ANDROID_PLATFORM_VERSION_CODE@" + android:versionName="2.16" > diff --git a/platforms/android/service/engine/jni/NativeService/PackageInfo.cpp b/platforms/android/service/engine/jni/NativeService/PackageInfo.cpp index 64ea70dae84d792dd200ec36032379ab59d7f90b..98ea828747f13435dfcb099cb263448d25e90c47 100644 --- a/platforms/android/service/engine/jni/NativeService/PackageInfo.cpp +++ b/platforms/android/service/engine/jni/NativeService/PackageInfo.cpp @@ -170,7 +170,7 @@ inline string JoinPlatform(int platform) return result; } -inline int SplitPlatfrom(const vector& features) +inline int SplitPlatform(const vector& features) { int result = 0; @@ -419,7 +419,7 @@ InstallPath(install_path) return; } - Platform = SplitPlatfrom(features); + Platform = SplitPlatform(features); if (PLATFORM_UNKNOWN != Platform) { switch (Platform) diff --git a/platforms/android/service/engine/jni/Tests/HardwareDetectionTest.cpp b/platforms/android/service/engine/jni/Tests/HardwareDetectionTest.cpp index 8637dfee30672fc868bee62fd62a262221813a42..83dd9c27e1ea57b76694a926958d4bc37fbff425 100644 --- a/platforms/android/service/engine/jni/Tests/HardwareDetectionTest.cpp +++ b/platforms/android/service/engine/jni/Tests/HardwareDetectionTest.cpp @@ -170,7 +170,7 @@ TEST(CpuID, CheckVFPv3) EXPECT_TRUE(cpu_id & FEATURES_HAS_VFPv3); } -TEST(PlatfromDetector, CheckTegra) +TEST(PlatformDetector, CheckTegra) { EXPECT_NE(PLATFORM_UNKNOWN, DetectKnownPlatforms()); } diff --git a/platforms/android/service/engine/src/org/opencv/engine/manager/ManagerActivity.java b/platforms/android/service/engine/src/org/opencv/engine/manager/ManagerActivity.java index 4e9050fa4dc12210b1d012ecdad3ba8dbb11a765..8e8389dcc7a756c1340a47f4004f910b465bea5f 100644 --- a/platforms/android/service/engine/src/org/opencv/engine/manager/ManagerActivity.java +++ b/platforms/android/service/engine/src/org/opencv/engine/manager/ManagerActivity.java @@ -90,28 +90,28 @@ public class ManagerActivity extends Activity mInstalledPackageView.setAdapter(mInstalledPacksAdapter); TextView HardwarePlatformView = (TextView)findViewById(R.id.HardwareValue); - int Platfrom = HardwareDetector.DetectKnownPlatforms(); + int Platform = HardwareDetector.DetectKnownPlatforms(); int CpuId = HardwareDetector.GetCpuID(); - if (HardwareDetector.PLATFORM_UNKNOWN != Platfrom) + if (HardwareDetector.PLATFORM_UNKNOWN != Platform) { - if (HardwareDetector.PLATFORM_TEGRA == Platfrom) + if (HardwareDetector.PLATFORM_TEGRA == Platform) { HardwarePlatformView.setText("Tegra"); } - else if (HardwareDetector.PLATFORM_TEGRA2 == Platfrom) + else if (HardwareDetector.PLATFORM_TEGRA2 == Platform) { HardwarePlatformView.setText("Tegra 2"); } - else if (HardwareDetector.PLATFORM_TEGRA3 == Platfrom) + else if (HardwareDetector.PLATFORM_TEGRA3 == Platform) { HardwarePlatformView.setText("Tegra 3"); } - else if (HardwareDetector.PLATFORM_TEGRA4i == Platfrom) + else if (HardwareDetector.PLATFORM_TEGRA4i == Platform) { HardwarePlatformView.setText("Tegra 4i"); } - else if (HardwareDetector.PLATFORM_TEGRA4 == Platfrom) + else if (HardwareDetector.PLATFORM_TEGRA4 == Platform) { HardwarePlatformView.setText("Tegra 4"); } diff --git a/platforms/android/service/readme.txt b/platforms/android/service/readme.txt index 1e757a0e5f91e5ebb6fa1a030969c9866cd78f15..a280b506f01ed6d9f68634c96bfd39ab444e0c57 100644 --- a/platforms/android/service/readme.txt +++ b/platforms/android/service/readme.txt @@ -14,20 +14,20 @@ manually using adb tool: .. code-block:: sh - adb install OpenCV-2.4.7-android-sdk/apk/OpenCV_2.4.7_Manager_2.14_.apk + adb install OpenCV-2.4.7.1-android-sdk/apk/OpenCV_2.4.7.1_Manager_2.15_.apk Use the table below to determine proper OpenCV Manager package for your device: -+------------------------------+--------------+----------------------------------------------------+ -| Hardware Platform | Android ver. | Package name | -+==============================+==============+====================================================+ -| armeabi-v7a (ARMv7-A + NEON) | >= 2.3 | OpenCV_2.4.7_Manager_2.14_armv7a-neon.apk | -+------------------------------+--------------+----------------------------------------------------+ -| armeabi-v7a (ARMv7-A + NEON) | = 2.2 | OpenCV_2.4.7_Manager_2.14_armv7a-neon-android8.apk | -+------------------------------+--------------+----------------------------------------------------+ -| armeabi (ARMv5, ARMv6) | >= 2.3 | OpenCV_2.4.7_Manager_2.14_armeabi.apk | -+------------------------------+--------------+----------------------------------------------------+ -| Intel x86 | >= 2.3 | OpenCV_2.4.7_Manager_2.14_x86.apk | -+------------------------------+--------------+----------------------------------------------------+ -| MIPS | >= 2.3 | OpenCV_2.4.7_Manager_2.14_mips.apk | -+------------------------------+--------------+----------------------------------------------------+ ++------------------------------+--------------+------------------------------------------------------+ +| Hardware Platform | Android ver. | Package name | ++==============================+==============+======================================================+ +| armeabi-v7a (ARMv7-A + NEON) | >= 2.3 | OpenCV_2.4.7.1_Manager_2.15_armv7a-neon.apk | ++------------------------------+--------------+------------------------------------------------------+ +| armeabi-v7a (ARMv7-A + NEON) | = 2.2 | OpenCV_2.4.7.1_Manager_2.15_armv7a-neon-android8.apk | ++------------------------------+--------------+------------------------------------------------------+ +| armeabi (ARMv5, ARMv6) | >= 2.3 | OpenCV_2.4.7.1_Manager_2.15_armeabi.apk | ++------------------------------+--------------+------------------------------------------------------+ +| Intel x86 | >= 2.3 | OpenCV_2.4.7.1_Manager_2.15_x86.apk | ++------------------------------+--------------+------------------------------------------------------+ +| MIPS | >= 2.3 | OpenCV_2.4.7.1_Manager_2.15_mips.apk | ++------------------------------+--------------+------------------------------------------------------+ diff --git a/samples/cpp/CMakeLists.txt b/samples/cpp/CMakeLists.txt index 2aa71f047c4373c7129789329f308bf0c3037552..4b0bf011d9f8f5c5945dc7252ab730377b1942fb 100644 --- a/samples/cpp/CMakeLists.txt +++ b/samples/cpp/CMakeLists.txt @@ -29,6 +29,10 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND) ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/cudafilters/include") endif() + if(HAVE_opencv_ocl) + ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/ocl/include") + endif() + if(CMAKE_COMPILER_IS_GNUCXX AND NOT ENABLE_NOISY_WARNINGS) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-function") endif() @@ -56,6 +60,10 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND) target_link_libraries(${the_target} opencv_cudaarithm opencv_cudafilters) endif() + if(HAVE_opencv_ocl) + target_link_libraries(${the_target} opencv_ocl) + endif() + set_target_properties(${the_target} PROPERTIES OUTPUT_NAME "cpp-${sample_kind}-${name}" PROJECT_LABEL "(${sample_KIND}) ${name}") diff --git a/samples/cpp/bagofwords_classification.cpp b/samples/cpp/bagofwords_classification.cpp index b841b63f8b8f8d469d71874e70b9790f0f6a78f4..20ff1dcfbb8b9458b62e02f8e9866d75bdfbe793 100644 --- a/samples/cpp/bagofwords_classification.cpp +++ b/samples/cpp/bagofwords_classification.cpp @@ -1,8 +1,13 @@ +#include "opencv2/opencv_modules.hpp" #include "opencv2/highgui/highgui.hpp" #include "opencv2/imgproc/imgproc.hpp" #include "opencv2/features2d/features2d.hpp" #include "opencv2/nonfree/nonfree.hpp" #include "opencv2/ml/ml.hpp" +#ifdef HAVE_OPENCV_OCL +#define _OCL_SVM_ 1 //select whether using ocl::svm method or not, default is using +#include "opencv2/ocl/ocl.hpp" +#endif #include #include @@ -2373,9 +2378,15 @@ static void setSVMTrainAutoParams( CvParamGrid& c_grid, CvParamGrid& gamma_grid, degree_grid.step = 0; } +#if defined HAVE_OPENCV_OCL && _OCL_SVM_ +static void trainSVMClassifier( cv::ocl::CvSVM_OCL& svm, const SVMTrainParamsExt& svmParamsExt, const string& objClassName, VocData& vocData, + Ptr& bowExtractor, const Ptr& fdetector, + const string& resPath ) +#else static void trainSVMClassifier( CvSVM& svm, const SVMTrainParamsExt& svmParamsExt, const string& objClassName, VocData& vocData, Ptr& bowExtractor, const Ptr& fdetector, const string& resPath ) +#endif { /* first check if a previously trained svm for the current class has been saved to file */ string svmFilename = resPath + svmsDir + "/" + objClassName + ".xml.gz"; @@ -2448,9 +2459,15 @@ static void trainSVMClassifier( CvSVM& svm, const SVMTrainParamsExt& svmParamsEx } } +#if defined HAVE_OPENCV_OCL && _OCL_SVM_ +static void computeConfidences( cv::ocl::CvSVM_OCL& svm, const string& objClassName, VocData& vocData, + Ptr& bowExtractor, const Ptr& fdetector, + const string& resPath ) +#else static void computeConfidences( CvSVM& svm, const string& objClassName, VocData& vocData, Ptr& bowExtractor, const Ptr& fdetector, const string& resPath ) +#endif { cout << "*** CALCULATING CONFIDENCES FOR CLASS " << objClassName << " ***" << endl; cout << "CALCULATING BOW VECTORS FOR TEST SET OF " << objClassName << "..." << endl; @@ -2589,7 +2606,11 @@ int main(int argc, char** argv) for( size_t classIdx = 0; classIdx < objClasses.size(); ++classIdx ) { // Train a classifier on train dataset +#if defined HAVE_OPENCV_OCL && _OCL_SVM_ + cv::ocl::CvSVM_OCL svm; +#else CvSVM svm; +#endif trainSVMClassifier( svm, svmTrainParamsExt, objClasses[classIdx], vocData, bowExtractor, featureDetector, resPath ); diff --git a/samples/cpp/points_classifier.cpp b/samples/cpp/points_classifier.cpp index ecf8ae8a30e2f93784afa0cc42d2e6253e27994a..26858da886e1c9c04b6268fb219484803d1c1f8d 100644 --- a/samples/cpp/points_classifier.cpp +++ b/samples/cpp/points_classifier.cpp @@ -1,6 +1,12 @@ +#include "opencv2/opencv_modules.hpp" #include "opencv2/core/core.hpp" #include "opencv2/ml/ml.hpp" #include "opencv2/highgui/highgui.hpp" +#ifdef HAVE_OPENCV_OCL +#define _OCL_KNN_ 1 // select whether using ocl::KNN method or not, default is using +#define _OCL_SVM_ 1 // select whether using ocl::svm method or not, default is using +#include "opencv2/ocl/ocl.hpp" +#endif #include @@ -133,7 +139,14 @@ static void find_decision_boundary_KNN( int K ) prepare_train_data( trainSamples, trainClasses ); // learn classifier +#if defined HAVE_OPENCV_OCL && _OCL_KNN_ + cv::ocl::KNearestNeighbour knnClassifier; + Mat temp, result; + knnClassifier.train(trainSamples, trainClasses, temp, false, K); + cv::ocl::oclMat testSample_ocl, reslut_ocl; +#else CvKNearest knnClassifier( trainSamples, trainClasses, Mat(), false, K ); +#endif Mat testSample( 1, 2, CV_32FC1 ); for( int y = 0; y < img.rows; y += testStep ) @@ -142,9 +155,19 @@ static void find_decision_boundary_KNN( int K ) { testSample.at(0) = (float)x; testSample.at(1) = (float)y; +#if defined HAVE_OPENCV_OCL && _OCL_KNN_ + testSample_ocl.upload(testSample); + + knnClassifier.find_nearest(testSample_ocl, K, reslut_ocl); + + reslut_ocl.download(result); + int response = saturate_cast(result.at(0)); + circle(imgDst, Point(x, y), 1, classColors[response]); +#else int response = (int)knnClassifier.find_nearest( testSample, K ); circle( imgDst, Point(x,y), 1, classColors[response] ); +#endif } } } @@ -159,7 +182,11 @@ static void find_decision_boundary_SVM( CvSVMParams params ) prepare_train_data( trainSamples, trainClasses ); // learn classifier +#if defined HAVE_OPENCV_OCL && _OCL_SVM_ + cv::ocl::CvSVM_OCL svmClassifier(trainSamples, trainClasses, Mat(), Mat(), params); +#else CvSVM svmClassifier( trainSamples, trainClasses, Mat(), Mat(), params ); +#endif Mat testSample( 1, 2, CV_32FC1 ); for( int y = 0; y < img.rows; y += testStep ) @@ -178,7 +205,7 @@ static void find_decision_boundary_SVM( CvSVMParams params ) for( int i = 0; i < svmClassifier.get_support_vector_count(); i++ ) { const float* supportVector = svmClassifier.get_support_vector(i); - circle( imgDst, Point(supportVector[0],supportVector[1]), 5, Scalar(255,255,255), -1 ); + circle( imgDst, Point(saturate_cast(supportVector[0]),saturate_cast(supportVector[1])), 5, CV_RGB(255,255,255), -1 ); } } diff --git a/samples/ocl/facedetect.cpp b/samples/ocl/facedetect.cpp index 781efa066f526a47e2ac0ed8a5d739e3fb3c9346..b452ab8d8993fa0da16f4a9a225b8136b0f08a92 100644 --- a/samples/ocl/facedetect.cpp +++ b/samples/ocl/facedetect.cpp @@ -8,11 +8,16 @@ #include #include +#if defined(_MSC_VER) && (_MSC_VER >= 1700) + # include +#endif using namespace std; using namespace cv; #define LOOP_NUM 1 +///////////////////////////single-threading faces detecting/////////////////////////////// + const static Scalar colors[] = { CV_RGB(0,0,255), CV_RGB(0,128,255), CV_RGB(0,255,255), @@ -26,7 +31,7 @@ const static Scalar colors[] = { CV_RGB(0,0,255), int64 work_begin = 0; int64 work_end = 0; -string outputName; +string inputName, outputName, cascadeName; static void workBegin() { @@ -61,41 +66,17 @@ static void Draw(Mat& img, vector& faces, double scale); // Else if will return (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels) double checkRectSimilarity(Size sz, vector& cpu_rst, vector& gpu_rst); -int main( int argc, const char** argv ) +static int facedetect_one_thread(bool useCPU, double scale ) { - const char* keys = - "{ h help | false | print help message }" - "{ i input | | specify input image }" - "{ t template | haarcascade_frontalface_alt.xml |" - " specify template file path }" - "{ c scale | 1.0 | scale image }" - "{ s use_cpu | false | use cpu or gpu to process the image }" - "{ o output | facedetect_output.jpg |" - " specify output image save path(only works when input is images) }"; - - CommandLineParser cmd(argc, argv, keys); - if (cmd.get("help")) - { - cout << "Usage : facedetect [options]" << endl; - cout << "Available options:" << endl; - cmd.printMessage(); - return EXIT_SUCCESS; - } - CvCapture* capture = 0; Mat frame, frameCopy0, frameCopy, image; - bool useCPU = cmd.get("s"); - string inputName = cmd.get("i"); - outputName = cmd.get("o"); - string cascadeName = cmd.get("t"); - double scale = cmd.get("c"); ocl::OclCascadeClassifier cascade; CascadeClassifier cpu_cascade; if( !cascade.load( cascadeName ) || !cpu_cascade.load(cascadeName) ) { - cout << "ERROR: Could not load classifier cascade" << endl; + cout << "ERROR: Could not load classifier cascade: " << cascadeName << endl; return EXIT_FAILURE; } @@ -186,9 +167,114 @@ int main( int argc, const char** argv ) } cvDestroyWindow("result"); + std::cout<< "single-threaded sample has finished" <= 1700) + +#define MAX_THREADS 10 + +static void detectFaces(std::string fileName) +{ + ocl::OclCascadeClassifier cascade; + if(!cascade.load(cascadeName)) + { + std::cout << "ERROR: Could not load classifier cascade: " << cascadeName << std::endl; + return; + } + + Mat img = imread(fileName, CV_LOAD_IMAGE_COLOR); + if (img.empty()) + { + std::cout << "cann't open file " + fileName < oclfaces; + cascade.detectMultiScale(d_img, oclfaces, 1.1, 3, 0|CV_HAAR_SCALE_IMAGE, Size(30, 30), Size(0, 0)); + + for(unsigned int i = 0; i threads; + for(int i = 0; i= 1 }"; + + CommandLineParser cmd(argc, argv, keys); + if (cmd.has("help")) + { + cout << "Usage : facedetect [options]" << endl; + cout << "Available options:" << endl; + cmd.printMessage(); + return EXIT_SUCCESS; + } + bool useCPU = cmd.get("s"); + inputName = cmd.get("i"); + outputName = cmd.get("o"); + cascadeName = cmd.get("t"); + double scale = cmd.get("c"); + int n = cmd.get("n"); + + if(n > 1) + { +#if defined(_MSC_VER) && (_MSC_VER >= 1700) + std::cout<<"multi-threaded sample is running" <& faces, ocl::OclCascadeClassifier& cascade, double scale)