From 94de7d3f3f6dbfda5c84458429b47765d8eee846 Mon Sep 17 00:00:00 2001 From: Vadim Pisarevsky Date: Tue, 16 Aug 2011 13:34:56 +0000 Subject: [PATCH] fixed crashes in parallel HOG detector; avoid use of new POPCNT instruction in ORB (to make it compatible with older CPUs). --- .../core/include/opencv2/core/internal.hpp | 13 +++++--- modules/features2d/src/brief.cpp | 33 +++---------------- modules/objdetect/src/hog.cpp | 18 ++++++---- 3 files changed, 24 insertions(+), 40 deletions(-) diff --git a/modules/core/include/opencv2/core/internal.hpp b/modules/core/include/opencv2/core/internal.hpp index 9826bf7e70..1cbc7b820a 100644 --- a/modules/core/include/opencv2/core/internal.hpp +++ b/modules/core/include/opencv2/core/internal.hpp @@ -180,6 +180,7 @@ CV_INLINE IppiSize ippiSize(int width, int height) } typedef tbb::concurrent_vector ConcurrentRectVector; + typedef tbb::concurrent_vector ConcurrentDoubleVector; } #else namespace cv @@ -201,11 +202,12 @@ CV_INLINE IppiSize ippiSize(int width, int height) #ifdef HAVE_THREADING_FRAMEWORK #include "threading_framework.hpp" - template - static void parallel_for( const BlockedRange& range, const Body& body ) - { - tf::parallel_for(range, body); - } + template + static void parallel_for( const BlockedRange& range, const Body& body ) + { + tf::parallel_for(range, body); + } + typedef tf::ConcurrentVector ConcurrentRectVector; #else template static inline @@ -214,6 +216,7 @@ CV_INLINE IppiSize ippiSize(int width, int height) body(range); } typedef std::vector ConcurrentRectVector; + typedef std::vector ConcurrentDoubleVector; #endif template static inline diff --git a/modules/features2d/src/brief.cpp b/modules/features2d/src/brief.cpp index 29b5148bf6..13d7ba8d39 100644 --- a/modules/features2d/src/brief.cpp +++ b/modules/features2d/src/brief.cpp @@ -108,11 +108,11 @@ HammingLUT::ResultType HammingLUT::operator()( const unsigned char* a, const uns Hamming::ResultType Hamming::operator()(const unsigned char* a, const unsigned char* b, int size) const { -#if __GNUC__ - ResultType result = 0; -#if CV_NEON + ResultType result; +#if defined __GNUC__ && CV_NEON if (CPU_HAS_NEON_FEATURE) { + result = 0; for (size_t i = 0; i < size; i += 16) { uint8x16_t A_vec = vld1q_u8 (a + i); @@ -131,32 +131,9 @@ Hamming::ResultType Hamming::operator()(const unsigned char* a, const unsigned c } } else -#endif - { - //for portability just use unsigned long -- and use the __builtin_popcountll (see docs for __builtin_popcountll) - typedef unsigned long long pop_t; - const size_t modulo = size % sizeof(pop_t); - const pop_t * a2 = reinterpret_cast (a); - const pop_t * b2 = reinterpret_cast (b); - const pop_t * a2_end = a2 + (size/sizeof(pop_t)); - - for (; a2 != a2_end; ++a2, ++b2) - result += __builtin_popcountll((*a2) ^ (*b2)); - - if (modulo) - { - //in the case where size is not divisible by sizeof(size_t) - //need to mask off the bits at the end - pop_t a_final=0,b_final=0; - memcpy(&a_final,a2,modulo); - memcpy(&b_final,b2,modulo); - result += __builtin_popcountll(a_final ^ b_final); - } - } - return result; -#else - return HammingLUT()(a,b,size); #endif + result = HammingLUT()(a,b,size); + return result; } BriefDescriptorExtractor::BriefDescriptorExtractor(int bytes) : diff --git a/modules/objdetect/src/hog.cpp b/modules/objdetect/src/hog.cpp index f2d32002ca..b1074ac6e4 100644 --- a/modules/objdetect/src/hog.cpp +++ b/modules/objdetect/src/hog.cpp @@ -942,7 +942,7 @@ struct HOGInvoker HOGInvoker( const HOGDescriptor* _hog, const Mat& _img, double _hitThreshold, Size _winStride, Size _padding, const double* _levelScale, ConcurrentRectVector* _vec, - vector* _weights=0, vector* _scales=0 ) + ConcurrentDoubleVector* _weights=0, ConcurrentDoubleVector* _scales=0 ) { hog = _hog; img = _img; @@ -1002,8 +1002,8 @@ struct HOGInvoker Size padding; const double* levelScale; ConcurrentRectVector* vec; - vector* weights; - vector* scales; + ConcurrentDoubleVector* weights; + ConcurrentDoubleVector* scales; }; @@ -1029,14 +1029,18 @@ void HOGDescriptor::detectMultiScale( levelScale.resize(levels); ConcurrentRectVector allCandidates; - + ConcurrentDoubleVector tempScales; + ConcurrentDoubleVector tempWeights; vector foundScales; parallel_for(BlockedRange(0, (int)levelScale.size()), - HOGInvoker(this, img, hitThreshold, winStride, padding, &levelScale[0], &allCandidates, &foundWeights, &foundScales)); + HOGInvoker(this, img, hitThreshold, winStride, padding, &levelScale[0], &allCandidates, &tempWeights, &tempScales)); - foundLocations.resize(allCandidates.size()); - std::copy(allCandidates.begin(), allCandidates.end(), foundLocations.begin()); + std::copy(tempScales.begin(), tempScales.end(), back_inserter(foundScales)); + foundLocations.clear(); + std::copy(allCandidates.begin(), allCandidates.end(), back_inserter(foundLocations)); + foundWeights.clear(); + std::copy(tempWeights.begin(), tempWeights.end(), back_inserter(foundWeights)); if ( useMeanshiftGrouping ) { -- GitLab