提交 1ecc7659 编写于 作者: P peng xiao

Merge branch '2.4' of https://github.com/Itseez/opencv into 2.4_pyrup_fix

...@@ -23,7 +23,8 @@ if(WIN32 AND NOT MINGW) ...@@ -23,7 +23,8 @@ if(WIN32 AND NOT MINGW)
add_definitions(-DJAS_WIN_MSVC_BUILD) add_definitions(-DJAS_WIN_MSVC_BUILD)
endif(WIN32 AND NOT MINGW) endif(WIN32 AND NOT MINGW)
ocv_warnings_disable(CMAKE_C_FLAGS -Wno-implicit-function-declaration -Wno-uninitialized -Wmissing-prototypes -Wmissing-declarations -Wunused -Wshadow -Wsign-compare) ocv_warnings_disable(CMAKE_C_FLAGS -Wno-implicit-function-declaration -Wno-uninitialized -Wmissing-prototypes
-Wno-unused-but-set-parameter -Wmissing-declarations -Wunused -Wshadow -Wsign-compare)
ocv_warnings_disable(CMAKE_C_FLAGS -Wunused-parameter) # clang ocv_warnings_disable(CMAKE_C_FLAGS -Wunused-parameter) # clang
ocv_warnings_disable(CMAKE_C_FLAGS /wd4013 /wd4018 /wd4101 /wd4244 /wd4267 /wd4715) # vs2005 ocv_warnings_disable(CMAKE_C_FLAGS /wd4013 /wd4018 /wd4101 /wd4244 /wd4267 /wd4715) # vs2005
......
...@@ -594,12 +594,15 @@ endif() ...@@ -594,12 +594,15 @@ endif()
status("") status("")
status(" GUI: ") status(" GUI: ")
if(HAVE_QT) if(HAVE_QT5)
status(" QT 5.x:" HAVE_QT THEN "YES (ver ${Qt5Core_VERSION_STRING})" ELSE NO)
status(" QT OpenGL support:" HAVE_QT_OPENGL THEN "YES (${Qt5OpenGL_LIBRARIES} ${Qt5OpenGL_VERSION_STRING})" ELSE NO)
elseif(HAVE_QT)
status(" QT 4.x:" HAVE_QT THEN "YES (ver ${QT_VERSION_MAJOR}.${QT_VERSION_MINOR}.${QT_VERSION_PATCH} ${QT_EDITION})" ELSE NO) status(" QT 4.x:" HAVE_QT THEN "YES (ver ${QT_VERSION_MAJOR}.${QT_VERSION_MINOR}.${QT_VERSION_PATCH} ${QT_EDITION})" ELSE NO)
status(" QT OpenGL support:" HAVE_QT_OPENGL THEN "YES (${QT_QTOPENGL_LIBRARY})" ELSE NO) status(" QT OpenGL support:" HAVE_QT_OPENGL THEN "YES (${QT_QTOPENGL_LIBRARY})" ELSE NO)
else() else()
if(DEFINED WITH_QT) if(DEFINED WITH_QT)
status(" QT 4.x:" NO) status(" QT:" NO)
endif() endif()
if(DEFINED WITH_WIN32UI) if(DEFINED WITH_WIN32UI)
status(" Win32 UI:" HAVE_WIN32UI THEN YES ELSE NO) status(" Win32 UI:" HAVE_WIN32UI THEN YES ELSE NO)
......
...@@ -176,7 +176,8 @@ macro(android_get_compatible_target VAR) ...@@ -176,7 +176,8 @@ macro(android_get_compatible_target VAR)
endmacro() endmacro()
unset(__android_project_chain CACHE) unset(__android_project_chain CACHE)
#add_android_project(target_name ${path} NATIVE_DEPS opencv_core LIBRARY_DEPS ${OpenCV_BINARY_DIR} SDK_TARGET 11)
# add_android_project(target_name ${path} NATIVE_DEPS opencv_core LIBRARY_DEPS ${OpenCV_BINARY_DIR} SDK_TARGET 11)
macro(add_android_project target path) macro(add_android_project target path)
# parse arguments # parse arguments
set(android_proj_arglist NATIVE_DEPS LIBRARY_DEPS SDK_TARGET IGNORE_JAVA IGNORE_MANIFEST) set(android_proj_arglist NATIVE_DEPS LIBRARY_DEPS SDK_TARGET IGNORE_JAVA IGNORE_MANIFEST)
...@@ -212,6 +213,16 @@ macro(add_android_project target path) ...@@ -212,6 +213,16 @@ macro(add_android_project target path)
ocv_check_dependencies(${android_proj_NATIVE_DEPS} opencv_java) ocv_check_dependencies(${android_proj_NATIVE_DEPS} opencv_java)
endif() endif()
if(EXISTS "${path}/jni/Android.mk" )
# find if native_app_glue is used
file(STRINGS "${path}/jni/Android.mk" NATIVE_APP_GLUE REGEX ".*(call import-module,android/native_app_glue)" )
if(NATIVE_APP_GLUE)
if(ANDROID_NATIVE_API_LEVEL LESS 9 OR NOT EXISTS "${ANDROID_NDK}/sources/android/native_app_glue")
set(OCV_DEPENDENCIES_FOUND FALSE)
endif()
endif()
endif()
if(OCV_DEPENDENCIES_FOUND AND android_proj_sdk_target AND ANDROID_EXECUTABLE AND ANT_EXECUTABLE AND ANDROID_TOOLS_Pkg_Revision GREATER 13 AND EXISTS "${path}/${ANDROID_MANIFEST_FILE}") if(OCV_DEPENDENCIES_FOUND AND android_proj_sdk_target AND ANDROID_EXECUTABLE AND ANT_EXECUTABLE AND ANDROID_TOOLS_Pkg_Revision GREATER 13 AND EXISTS "${path}/${ANDROID_MANIFEST_FILE}")
project(${target}) project(${target})
...@@ -268,9 +279,6 @@ macro(add_android_project target path) ...@@ -268,9 +279,6 @@ macro(add_android_project target path)
file(STRINGS "${path}/jni/Android.mk" JNI_LIB_NAME REGEX "LOCAL_MODULE[ ]*:=[ ]*.*" ) file(STRINGS "${path}/jni/Android.mk" JNI_LIB_NAME REGEX "LOCAL_MODULE[ ]*:=[ ]*.*" )
string(REGEX REPLACE "LOCAL_MODULE[ ]*:=[ ]*([a-zA-Z_][a-zA-Z_0-9]*)[ ]*" "\\1" JNI_LIB_NAME "${JNI_LIB_NAME}") string(REGEX REPLACE "LOCAL_MODULE[ ]*:=[ ]*([a-zA-Z_][a-zA-Z_0-9]*)[ ]*" "\\1" JNI_LIB_NAME "${JNI_LIB_NAME}")
# find using of native app glue to determine native activity
file(STRINGS "${path}/jni/Android.mk" NATIVE_APP_GLUE REGEX ".*(call import-module,android/native_app_glue)" )
if(JNI_LIB_NAME) if(JNI_LIB_NAME)
ocv_include_modules_recurse(${android_proj_NATIVE_DEPS}) ocv_include_modules_recurse(${android_proj_NATIVE_DEPS})
ocv_include_directories("${path}/jni") ocv_include_directories("${path}/jni")
...@@ -291,9 +299,9 @@ macro(add_android_project target path) ...@@ -291,9 +299,9 @@ macro(add_android_project target path)
) )
get_target_property(android_proj_jni_location "${JNI_LIB_NAME}" LOCATION) get_target_property(android_proj_jni_location "${JNI_LIB_NAME}" LOCATION)
if (NOT (CMAKE_BUILD_TYPE MATCHES "debug")) if (NOT (CMAKE_BUILD_TYPE MATCHES "debug"))
add_custom_command(TARGET ${JNI_LIB_NAME} POST_BUILD COMMAND ${CMAKE_STRIP} --strip-unneeded "${android_proj_jni_location}") add_custom_command(TARGET ${JNI_LIB_NAME} POST_BUILD COMMAND ${CMAKE_STRIP} --strip-unneeded "${android_proj_jni_location}")
endif() endif()
endif() endif()
endif() endif()
......
...@@ -101,7 +101,7 @@ endif() ...@@ -101,7 +101,7 @@ endif()
if(MSVC64 OR MINGW64) if(MSVC64 OR MINGW64)
set(X86_64 1) set(X86_64 1)
elseif(MSVC AND NOT CMAKE_CROSSCOMPILING) elseif(MINGW OR (MSVC AND NOT CMAKE_CROSSCOMPILING))
set(X86 1) set(X86 1)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
set(X86_64 1) set(X86_64 1)
......
...@@ -13,12 +13,31 @@ if(WITH_WIN32UI) ...@@ -13,12 +13,31 @@ if(WITH_WIN32UI)
endif(WITH_WIN32UI) endif(WITH_WIN32UI)
# --- QT4 --- # --- QT4 ---
ocv_clear_vars(HAVE_QT) ocv_clear_vars(HAVE_QT HAVE_QT5)
if(WITH_QT) if(WITH_QT)
find_package(Qt4) if(NOT CMAKE_VERSION VERSION_LESS 2.8.3 AND NOT WITH_QT EQUAL 4)
if(QT4_FOUND) find_package(Qt5Core)
set(HAVE_QT TRUE) find_package(Qt5Gui)
add_definitions(-DHAVE_QT) # We need to define the macro this way, using cvconfig.h does not work find_package(Qt5Widgets)
find_package(Qt5Test)
find_package(Qt5Concurrent)
if(Qt5Core_FOUND AND Qt5Gui_FOUND AND Qt5Widgets_FOUND AND Qt5Test_FOUND AND Qt5Concurrent_FOUND)
set(HAVE_QT5 ON)
set(HAVE_QT ON)
add_definitions(-DHAVE_QT)
find_package(Qt5OpenGL)
if(Qt5OpenGL_FOUND)
set(QT_QTOPENGL_FOUND ON)
endif()
endif()
endif()
if(NOT HAVE_QT)
find_package(Qt4)
if(QT4_FOUND)
set(HAVE_QT TRUE)
add_definitions(-DHAVE_QT) # We need to define the macro this way, using cvconfig.h does not work
endif()
endif() endif()
endif() endif()
......
...@@ -1477,6 +1477,6 @@ The function reconstructs 3-dimensional points (in homogeneous coordinates) by u ...@@ -1477,6 +1477,6 @@ The function reconstructs 3-dimensional points (in homogeneous coordinates) by u
.. [HH08] Hirschmuller, H. Stereo Processing by Semiglobal Matching and Mutual Information, PAMI(30), No. 2, February 2008, pp. 328-341. .. [HH08] Hirschmuller, H. Stereo Processing by Semiglobal Matching and Mutual Information, PAMI(30), No. 2, February 2008, pp. 328-341.
.. [Slabaugh] Slabaugh, G.G. Computing Euler angles from a rotation matrix. http://gregslabaugh.name/publications/euler.pdf .. [Slabaugh] Slabaugh, G.G. Computing Euler angles from a rotation matrix. http://www.soi.city.ac.uk/~sbbh653/publications/euler.pdf (verified: 2013-04-15)
.. [Zhang2000] Z. Zhang. A Flexible New Technique for Camera Calibration. IEEE Transactions on Pattern Analysis and Machine Intelligence, 22(11):1330-1334, 2000. .. [Zhang2000] Z. Zhang. A Flexible New Technique for Camera Calibration. IEEE Transactions on Pattern Analysis and Machine Intelligence, 22(11):1330-1334, 2000.
...@@ -2855,8 +2855,9 @@ PCA& PCA::operator()(InputArray _data, InputArray __mean, int flags, int maxComp ...@@ -2855,8 +2855,9 @@ PCA& PCA::operator()(InputArray _data, InputArray __mean, int flags, int maxComp
if( _mean.data ) if( _mean.data )
{ {
CV_Assert( _mean.size() == mean_sz ); CV_Assert( _mean.size() == mean_sz );
_mean.convertTo(mean, ctype); _mean.convertTo(mean, ctype);
covar_flags |= CV_COVAR_USE_AVG;
} }
calcCovarMatrix( data, covar, mean, covar_flags, ctype ); calcCovarMatrix( data, covar, mean, covar_flags, ctype );
......
...@@ -42,7 +42,6 @@ template <typename Distance> ...@@ -42,7 +42,6 @@ template <typename Distance>
void find_nearest(const Matrix<typename Distance::ElementType>& dataset, typename Distance::ElementType* query, int* matches, int nn, void find_nearest(const Matrix<typename Distance::ElementType>& dataset, typename Distance::ElementType* query, int* matches, int nn,
int skip = 0, Distance distance = Distance()) int skip = 0, Distance distance = Distance())
{ {
typedef typename Distance::ElementType ElementType;
typedef typename Distance::ResultType DistanceType; typedef typename Distance::ResultType DistanceType;
int n = nn + skip; int n = nn + skip;
......
...@@ -70,7 +70,26 @@ set(highgui_srcs ...@@ -70,7 +70,26 @@ set(highgui_srcs
file(GLOB highgui_ext_hdrs "include/opencv2/${name}/*.hpp" "include/opencv2/${name}/*.h") file(GLOB highgui_ext_hdrs "include/opencv2/${name}/*.hpp" "include/opencv2/${name}/*.h")
if(HAVE_QT) if(HAVE_QT5)
set(CMAKE_AUTOMOC ON)
set(CMAKE_INCLUDE_CURRENT_DIR ON)
QT5_ADD_RESOURCES(_RCC_OUTFILES src/window_QT.qrc)
list(APPEND highgui_srcs src/window_QT.cpp src/window_QT.h ${_RCC_OUTFILES})
foreach(dt5_dep Core Gui Widgets Test Concurrent)
add_definitions(${Qt5${dt5_dep}_DEFINITIONS})
include_directories(${Qt5${dt5_dep}_INCLUDE_DIRS})
list(APPEND HIGHGUI_LIBRARIES ${Qt5${dt5_dep}_LIBRARIES})
endforeach()
if(HAVE_QT_OPENGL)
add_definitions(${Qt5OpenGL_DEFINITIONS})
include_directories(${Qt5OpenGL_INCLUDE_DIRS})
list(APPEND HIGHGUI_LIBRARIES ${Qt5OpenGL_LIBRARIES})
endif()
elseif(HAVE_QT)
if (HAVE_QT_OPENGL) if (HAVE_QT_OPENGL)
set(QT_USE_QTOPENGL TRUE) set(QT_USE_QTOPENGL TRUE)
endif() endif()
......
...@@ -1665,6 +1665,17 @@ static int icvSetPropertyCAM_V4L(CvCaptureCAM_V4L* capture, int property_id, dou ...@@ -1665,6 +1665,17 @@ static int icvSetPropertyCAM_V4L(CvCaptureCAM_V4L* capture, int property_id, dou
width = height = 0; width = height = 0;
} }
break; break;
case CV_CAP_PROP_FPS:
struct v4l2_streamparm setfps;
memset (&setfps, 0, sizeof(struct v4l2_streamparm));
setfps.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
setfps.parm.capture.timeperframe.numerator = 1;
setfps.parm.capture.timeperframe.denominator = value;
if (xioctl (capture->deviceHandle, VIDIOC_S_PARM, &setfps) < 0){
fprintf(stderr, "HIGHGUI ERROR: V4L: Unable to set camera FPS\n");
retval=0;
}
break;
default: default:
retval = icvSetControl(capture, property_id, value); retval = icvSetControl(capture, property_id, value);
} }
......
...@@ -52,6 +52,11 @@ ...@@ -52,6 +52,11 @@
#include <stdio.h> #include <stdio.h>
#include <setjmp.h> #include <setjmp.h>
// the following defines are a hack to avoid multiple problems with frame ponter handling and setjmp
// see http://gcc.gnu.org/ml/gcc/2011-10/msg00324.html for some details
#define mingw_getsp(...) 0
#define __builtin_frame_address(...) 0
#ifdef WIN32 #ifdef WIN32
#define XMD_H // prevent redefinition of INT32 #define XMD_H // prevent redefinition of INT32
......
...@@ -73,6 +73,11 @@ ...@@ -73,6 +73,11 @@
#pragma warning( disable: 4611 ) #pragma warning( disable: 4611 )
#endif #endif
// the following defines are a hack to avoid multiple problems with frame ponter handling and setjmp
// see http://gcc.gnu.org/ml/gcc/2011-10/msg00324.html for some details
#define mingw_getsp(...) 0
#define __builtin_frame_address(...) 0
namespace cv namespace cv
{ {
......
...@@ -48,13 +48,13 @@ ...@@ -48,13 +48,13 @@
#endif #endif
#include <QAbstractEventDispatcher> #include <QAbstractEventDispatcher>
#include <QtGui/QApplication> #include <QApplication>
#include <QFile> #include <QFile>
#include <QPushButton> #include <QPushButton>
#include <QtGui/QGraphicsView> #include <QGraphicsView>
#include <QSizePolicy> #include <QSizePolicy>
#include <QInputDialog> #include <QInputDialog>
#include <QtGui/QBoxLayout> #include <QBoxLayout>
#include <QSettings> #include <QSettings>
#include <qtimer.h> #include <qtimer.h>
#include <QtConcurrentRun> #include <QtConcurrentRun>
...@@ -78,7 +78,7 @@ ...@@ -78,7 +78,7 @@
#include <QRadioButton> #include <QRadioButton>
#include <QButtonGroup> #include <QButtonGroup>
#include <QMenu> #include <QMenu>
#include <QtTest/QTest> #include <QTest>
//start private enum //start private enum
enum { CV_MODE_NORMAL = 0, CV_MODE_OPENGL = 1 }; enum { CV_MODE_NORMAL = 0, CV_MODE_OPENGL = 1 };
......
...@@ -406,7 +406,7 @@ Finds the convex hull of a point set. ...@@ -406,7 +406,7 @@ Finds the convex hull of a point set.
:param hull_storage: Output memory storage in the old API (``cvConvexHull2`` returns a sequence containing the convex hull points or their indices). :param hull_storage: Output memory storage in the old API (``cvConvexHull2`` returns a sequence containing the convex hull points or their indices).
:param clockwise: Orientation flag. If it is true, the output convex hull is oriented clockwise. Otherwise, it is oriented counter-clockwise. The usual screen coordinate system is assumed so that the origin is at the top-left corner, x axis is oriented to the right, and y axis is oriented downwards. :param clockwise: Orientation flag. If it is true, the output convex hull is oriented clockwise. Otherwise, it is oriented counter-clockwise. The assumed coordinate system has its X axis pointing to the right, and its Y axis pointing upwards.
:param orientation: Convex hull orientation parameter in the old API, ``CV_CLOCKWISE`` or ``CV_COUNTERCLOCKWISE``. :param orientation: Convex hull orientation parameter in the old API, ``CV_CLOCKWISE`` or ``CV_COUNTERCLOCKWISE``.
......
...@@ -93,7 +93,6 @@ icvFloodFill_CnIR( uchar* pImage, int step, CvSize roi, CvPoint seed, ...@@ -93,7 +93,6 @@ icvFloodFill_CnIR( uchar* pImage, int step, CvSize roi, CvPoint seed,
_Tp newVal, CvConnectedComp* region, int flags, _Tp newVal, CvConnectedComp* region, int flags,
std::vector<CvFFillSegment>* buffer ) std::vector<CvFFillSegment>* buffer )
{ {
typedef typename cv::DataType<_Tp>::channel_type _CTp;
_Tp* img = (_Tp*)(pImage + step * seed.y); _Tp* img = (_Tp*)(pImage + step * seed.y);
int i, L, R; int i, L, R;
int area = 0; int area = 0;
...@@ -252,7 +251,6 @@ icvFloodFillGrad_CnIR( uchar* pImage, int step, uchar* pMask, int maskStep, ...@@ -252,7 +251,6 @@ icvFloodFillGrad_CnIR( uchar* pImage, int step, uchar* pMask, int maskStep,
CvConnectedComp* region, int flags, CvConnectedComp* region, int flags,
std::vector<CvFFillSegment>* buffer ) std::vector<CvFFillSegment>* buffer )
{ {
typedef typename cv::DataType<_Tp>::channel_type _CTp;
_Tp* img = (_Tp*)(pImage + step*seed.y); _Tp* img = (_Tp*)(pImage + step*seed.y);
uchar* mask = (pMask += maskStep + 1) + maskStep*seed.y; uchar* mask = (pMask += maskStep + 1) + maskStep*seed.y;
int i, L, R; int i, L, R;
......
...@@ -1219,8 +1219,6 @@ static void resizeGeneric_( const Mat& src, Mat& dst, ...@@ -1219,8 +1219,6 @@ static void resizeGeneric_( const Mat& src, Mat& dst,
const int* yofs, const void* _beta, const int* yofs, const void* _beta,
int xmin, int xmax, int ksize ) int xmin, int xmax, int ksize )
{ {
typedef typename HResize::value_type T;
typedef typename HResize::buf_type WT;
typedef typename HResize::alpha_type AT; typedef typename HResize::alpha_type AT;
const AT* beta = (const AT*)_beta; const AT* beta = (const AT*)_beta;
......
...@@ -61,7 +61,7 @@ protected: ...@@ -61,7 +61,7 @@ protected:
{ {
int ObjNum = m_TrackList.GetBlobNum(); int ObjNum = m_TrackList.GetBlobNum();
int i; int i;
char video_name[1024]; char video_name[1024+1];
char* struct_name = NULL; char* struct_name = NULL;
CvFileStorage* storage = cvOpenFileStorage(m_pFileName,NULL,CV_STORAGE_WRITE_TEXT); CvFileStorage* storage = cvOpenFileStorage(m_pFileName,NULL,CV_STORAGE_WRITE_TEXT);
......
...@@ -117,10 +117,10 @@ class CvKDTreeWrap : public CvFeatureTree { ...@@ -117,10 +117,10 @@ class CvKDTreeWrap : public CvFeatureTree {
CvMat* results) { CvMat* results) {
int rn = results->rows * results->cols; int rn = results->rows * results->cols;
std::vector<int> inbounds; std::vector<int> inbounds;
dispatch_cvtype(mat, ((__treetype*)data)-> assert(CV_MAT_DEPTH(mat->type) == CV_32F || CV_MAT_DEPTH(mat->type) == CV_64F);
find_ortho_range((typename __treetype::scalar_type*)bounds_min->data.ptr, ((__treetype*)data)->find_ortho_range((typename __treetype::scalar_type*)bounds_min->data.ptr,
(typename __treetype::scalar_type*)bounds_max->data.ptr, (typename __treetype::scalar_type*)bounds_max->data.ptr,
inbounds)); inbounds);
std::copy(inbounds.begin(), std::copy(inbounds.begin(),
inbounds.begin() + std::min((int)inbounds.size(), rn), inbounds.begin() + std::min((int)inbounds.size(), rn),
(int*) results->data.ptr); (int*) results->data.ptr);
......
...@@ -802,6 +802,44 @@ namespace cv ...@@ -802,6 +802,44 @@ namespace cv
int minNeighbors, int flags, CvSize minSize = cvSize(0, 0), CvSize maxSize = cvSize(0, 0)); int minNeighbors, int flags, CvSize minSize = cvSize(0, 0), CvSize maxSize = cvSize(0, 0));
}; };
class CV_EXPORTS OclCascadeClassifierBuf : public cv::CascadeClassifier
{
public:
OclCascadeClassifierBuf() :
m_flags(0), initialized(false), m_scaleFactor(0), buffers(NULL) {}
~OclCascadeClassifierBuf() {}
void detectMultiScale(oclMat &image, CV_OUT std::vector<cv::Rect>& faces,
double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0,
Size minSize = Size(), Size maxSize = Size());
void release();
private:
void Init(const int rows, const int cols, double scaleFactor, int flags,
const int outputsz, const size_t localThreads[],
CvSize minSize, CvSize maxSize);
void CreateBaseBufs(const int datasize, const int totalclassifier, const int flags, const int outputsz);
void CreateFactorRelatedBufs(const int rows, const int cols, const int flags,
const double scaleFactor, const size_t localThreads[],
CvSize minSize, CvSize maxSize);
void GenResult(CV_OUT std::vector<cv::Rect>& faces, const std::vector<cv::Rect> &rectList, const std::vector<int> &rweights);
int m_rows;
int m_cols;
int m_flags;
int m_loopcount;
int m_nodenum;
bool findBiggestObject;
bool initialized;
double m_scaleFactor;
Size m_minSize;
Size m_maxSize;
vector<CvSize> sizev;
vector<float> scalev;
oclMat gimg1, gsum, gsqsum;
void * buffers;
};
/////////////////////////////// Pyramid ///////////////////////////////////// /////////////////////////////// Pyramid /////////////////////////////////////
...@@ -1731,6 +1769,44 @@ namespace cv ...@@ -1731,6 +1769,44 @@ namespace cv
std::vector<oclMat> datas; std::vector<oclMat> datas;
oclMat out; oclMat out;
}; };
class CV_EXPORTS StereoConstantSpaceBP
{
public:
enum { DEFAULT_NDISP = 128 };
enum { DEFAULT_ITERS = 8 };
enum { DEFAULT_LEVELS = 4 };
enum { DEFAULT_NR_PLANE = 4 };
static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels, int &nr_plane);
explicit StereoConstantSpaceBP(
int ndisp = DEFAULT_NDISP,
int iters = DEFAULT_ITERS,
int levels = DEFAULT_LEVELS,
int nr_plane = DEFAULT_NR_PLANE,
int msg_type = CV_32F);
StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane,
float max_data_term, float data_weight, float max_disc_term, float disc_single_jump,
int min_disp_th = 0,
int msg_type = CV_32F);
void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
int ndisp;
int iters;
int levels;
int nr_plane;
float max_data_term;
float data_weight;
float max_disc_term;
float disc_single_jump;
int min_disp_th;
int msg_type;
bool use_local_init_data_cost;
private:
oclMat u[2], d[2], l[2], r[2];
oclMat disp_selected_pyr[2];
oclMat data_cost;
oclMat data_cost_selected;
oclMat temp;
oclMat out;
};
} }
} }
#if defined _MSC_VER && _MSC_VER >= 1200 #if defined _MSC_VER && _MSC_VER >= 1200
......
...@@ -65,12 +65,12 @@ namespace cv ...@@ -65,12 +65,12 @@ namespace cv
static const int OPT_SIZE = 100; static const int OPT_SIZE = 100;
static const char * T_ARR [] = { static const char * T_ARR [] = {
"uchar", "uchar",
"char", "char",
"ushort", "ushort",
"short", "short",
"int", "int",
"float -D T_FLOAT", "float -D T_FLOAT",
"double"}; "double"};
template < int BLOCK_SIZE, int MAX_DESC_LEN/*, typename Mask*/ > template < int BLOCK_SIZE, int MAX_DESC_LEN/*, typename Mask*/ >
...@@ -86,8 +86,8 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat ...@@ -86,8 +86,8 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat
vector< pair<size_t, const void *> > args; vector< pair<size_t, const void *> > args;
char opt [OPT_SIZE] = ""; char opt [OPT_SIZE] = "";
sprintf(opt, sprintf(opt,
"-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d", "-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d",
T_ARR[query.depth()], distType, block_size, m_size); T_ARR[query.depth()], distType, block_size, m_size);
if(globalSize[0] != 0) if(globalSize[0] != 0)
...@@ -128,8 +128,8 @@ void match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/, ...@@ -128,8 +128,8 @@ void match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
vector< pair<size_t, const void *> > args; vector< pair<size_t, const void *> > args;
char opt [OPT_SIZE] = ""; char opt [OPT_SIZE] = "";
sprintf(opt, sprintf(opt,
"-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d", "-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d",
T_ARR[query.depth()], distType, block_size); T_ARR[query.depth()], distType, block_size);
if(globalSize[0] != 0) if(globalSize[0] != 0)
{ {
...@@ -171,8 +171,8 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, float maxDist ...@@ -171,8 +171,8 @@ void matchUnrolledCached(const oclMat &query, const oclMat &train, float maxDist
vector< pair<size_t, const void *> > args; vector< pair<size_t, const void *> > args;
char opt [OPT_SIZE] = ""; char opt [OPT_SIZE] = "";
sprintf(opt, sprintf(opt,
"-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d", "-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d",
T_ARR[query.depth()], distType, block_size, m_size); T_ARR[query.depth()], distType, block_size, m_size);
if(globalSize[0] != 0) if(globalSize[0] != 0)
...@@ -212,8 +212,8 @@ void radius_match(const oclMat &query, const oclMat &train, float maxDistance, c ...@@ -212,8 +212,8 @@ void radius_match(const oclMat &query, const oclMat &train, float maxDistance, c
vector< pair<size_t, const void *> > args; vector< pair<size_t, const void *> > args;
char opt [OPT_SIZE] = ""; char opt [OPT_SIZE] = "";
sprintf(opt, sprintf(opt,
"-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d", "-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d",
T_ARR[query.depth()], distType, block_size); T_ARR[query.depth()], distType, block_size);
if(globalSize[0] != 0) if(globalSize[0] != 0)
...@@ -312,8 +312,8 @@ void knn_matchUnrolledCached(const oclMat &query, const oclMat &train, const ocl ...@@ -312,8 +312,8 @@ void knn_matchUnrolledCached(const oclMat &query, const oclMat &train, const ocl
vector< pair<size_t, const void *> > args; vector< pair<size_t, const void *> > args;
char opt [OPT_SIZE] = ""; char opt [OPT_SIZE] = "";
sprintf(opt, sprintf(opt,
"-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d", "-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d",
T_ARR[query.depth()], distType, block_size, m_size); T_ARR[query.depth()], distType, block_size, m_size);
if(globalSize[0] != 0) if(globalSize[0] != 0)
...@@ -348,8 +348,8 @@ void knn_match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/, ...@@ -348,8 +348,8 @@ void knn_match(const oclMat &query, const oclMat &train, const oclMat &/*mask*/,
vector< pair<size_t, const void *> > args; vector< pair<size_t, const void *> > args;
char opt [OPT_SIZE] = ""; char opt [OPT_SIZE] = "";
sprintf(opt, sprintf(opt,
"-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d", "-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d",
T_ARR[query.depth()], distType, block_size); T_ARR[query.depth()], distType, block_size);
if(globalSize[0] != 0) if(globalSize[0] != 0)
...@@ -384,8 +384,8 @@ void calcDistanceUnrolled(const oclMat &query, const oclMat &train, const oclMat ...@@ -384,8 +384,8 @@ void calcDistanceUnrolled(const oclMat &query, const oclMat &train, const oclMat
vector< pair<size_t, const void *> > args; vector< pair<size_t, const void *> > args;
char opt [OPT_SIZE] = ""; char opt [OPT_SIZE] = "";
sprintf(opt, sprintf(opt,
"-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d", "-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d -D MAX_DESC_LEN=%d",
T_ARR[query.depth()], distType, block_size, m_size); T_ARR[query.depth()], distType, block_size, m_size);
if(globalSize[0] != 0) if(globalSize[0] != 0)
...@@ -420,8 +420,8 @@ void calcDistance(const oclMat &query, const oclMat &train, const oclMat &/*mask ...@@ -420,8 +420,8 @@ void calcDistance(const oclMat &query, const oclMat &train, const oclMat &/*mask
vector< pair<size_t, const void *> > args; vector< pair<size_t, const void *> > args;
char opt [OPT_SIZE] = ""; char opt [OPT_SIZE] = "";
sprintf(opt, sprintf(opt,
"-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d", "-D T=%s -D DIST_TYPE=%d -D BLOCK_SIZE=%d",
T_ARR[query.depth()], distType, block_size); T_ARR[query.depth()], distType, block_size);
if(globalSize[0] != 0) if(globalSize[0] != 0)
...@@ -561,7 +561,7 @@ void cv::ocl::BruteForceMatcher_OCL_base::matchSingle(const oclMat &query, const ...@@ -561,7 +561,7 @@ void cv::ocl::BruteForceMatcher_OCL_base::matchSingle(const oclMat &query, const
{ {
if (query.empty() || train.empty()) if (query.empty() || train.empty())
return; return;
CV_Assert(query.channels() == 1 && query.depth() < CV_64F); CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
CV_Assert(train.cols == query.cols && train.type() == query.type()); CV_Assert(train.cols == query.cols && train.type() == query.type());
...@@ -673,7 +673,7 @@ void cv::ocl::BruteForceMatcher_OCL_base::matchCollection(const oclMat &query, c ...@@ -673,7 +673,7 @@ void cv::ocl::BruteForceMatcher_OCL_base::matchCollection(const oclMat &query, c
return; return;
CV_Assert(query.channels() == 1 && query.depth() < CV_64F); CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
const int nQuery = query.rows; const int nQuery = query.rows;
ensureSizeIsEnough(1, nQuery, CV_32S, trainIdx); ensureSizeIsEnough(1, nQuery, CV_32S, trainIdx);
...@@ -845,8 +845,8 @@ void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Collection(const oclMat &quer ...@@ -845,8 +845,8 @@ void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Collection(const oclMat &quer
if (query.empty() || trainCollection.empty()) if (query.empty() || trainCollection.empty())
return; return;
typedef void (*caller_t)(const oclMat & query, const oclMat & trains, const oclMat & masks, // typedef void (*caller_t)(const oclMat & query, const oclMat & trains, const oclMat & masks,
const oclMat & trainIdx, const oclMat & imgIdx, const oclMat & distance); // const oclMat & trainIdx, const oclMat & imgIdx, const oclMat & distance);
CV_Assert(query.channels() == 1 && query.depth() < CV_64F); CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
...@@ -993,7 +993,7 @@ void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat &query, vector< ...@@ -993,7 +993,7 @@ void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat &query, vector<
// radiusMatchSingle // radiusMatchSingle
void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchSingle(const oclMat &query, const oclMat &train, void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchSingle(const oclMat &query, const oclMat &train,
oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance, const oclMat &mask) oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance, const oclMat &mask)
{ {
if (query.empty() || train.empty()) if (query.empty() || train.empty())
return; return;
...@@ -1095,9 +1095,9 @@ void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchCollection(const oclMat &qu ...@@ -1095,9 +1095,9 @@ void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchCollection(const oclMat &qu
if (query.empty() || empty()) if (query.empty() || empty())
return; return;
#if 0
typedef void (*caller_t)(const oclMat & query, const oclMat * trains, int n, float maxDistance, const oclMat * masks, typedef void (*caller_t)(const oclMat & query, const oclMat * trains, int n, float maxDistance, const oclMat * masks,
const oclMat & trainIdx, const oclMat & imgIdx, const oclMat & distance, const oclMat & nMatches); const oclMat & trainIdx, const oclMat & imgIdx, const oclMat & distance, const oclMat & nMatches);
#if 0
static const caller_t callers[3][6] = static const caller_t callers[3][6] =
{ {
{ {
......
...@@ -60,7 +60,7 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha, ...@@ -60,7 +60,7 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
const oclMat &src3, double beta, oclMat &dst, int flags) const oclMat &src3, double beta, oclMat &dst, int flags)
{ {
CV_Assert(src1.cols == src2.rows && CV_Assert(src1.cols == src2.rows &&
(src3.empty() || src1.rows == src3.rows && src2.cols == src3.cols)); (src3.empty() || (src1.rows == src3.rows && src2.cols == src3.cols)));
CV_Assert(!(cv::GEMM_3_T & flags)); // cv::GEMM_3_T is not supported CV_Assert(!(cv::GEMM_3_T & flags)); // cv::GEMM_3_T is not supported
if(!src3.empty()) if(!src3.empty())
{ {
......
此差异已折叠。
...@@ -43,6 +43,7 @@ ...@@ -43,6 +43,7 @@
// //
//M*/ //M*/
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
#include "precomp.hpp" #include "precomp.hpp"
using namespace std; using namespace std;
......
...@@ -327,7 +327,7 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary ) ...@@ -327,7 +327,7 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary )
mom->m12 = dstsum[8]; mom->m12 = dstsum[8];
mom->m03 = dstsum[9]; mom->m03 = dstsum[9];
delete [] dstsum; delete [] dstsum;
openCLSafeCall(clReleaseMemObject(sum));
icvCompleteMomentState( mom ); icvCompleteMomentState( mom );
} }
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
// //
// @Authors // @Authors
// Wu Xinglong, wxl370@126.com // Wu Xinglong, wxl370@126.com
// Sen Liu, swjtuls1987@126.com
// //
// Redistribution and use in source and binary forms, with or without modification, // Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met: // are permitted provided that the following conditions are met:
...@@ -52,11 +53,11 @@ typedef struct __attribute__((aligned(128))) GpuHidHaarFeature ...@@ -52,11 +53,11 @@ typedef struct __attribute__((aligned(128))) GpuHidHaarFeature
{ {
struct __attribute__((aligned(32))) struct __attribute__((aligned(32)))
{ {
int p0 __attribute__((aligned(4))); int p0 __attribute__((aligned(4)));
int p1 __attribute__((aligned(4))); int p1 __attribute__((aligned(4)));
int p2 __attribute__((aligned(4))); int p2 __attribute__((aligned(4)));
int p3 __attribute__((aligned(4))); int p3 __attribute__((aligned(4)));
float weight __attribute__((aligned(4))); float weight __attribute__((aligned(4)));
} }
rect[CV_HAAR_FEATURE_MAX] __attribute__((aligned(32))); rect[CV_HAAR_FEATURE_MAX] __attribute__((aligned(32)));
} }
...@@ -113,173 +114,168 @@ __kernel void gpuRunHaarClassifierCascade_scaled2( ...@@ -113,173 +114,168 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
global const int *restrict sum, global const int *restrict sum,
global const float *restrict sqsum, global const float *restrict sqsum,
global int4 *candidate, global int4 *candidate,
const int rows,
const int cols,
const int step, const int step,
const int loopcount, const int loopcount,
const int start_stage, const int start_stage,
const int split_stage, const int split_stage,
const int end_stage, const int end_stage,
const int startnode, const int startnode,
const int splitnode,
global int4 *p, global int4 *p,
//const int4 * pq,
global float *correction, global float *correction,
const int nodecount) const int nodecount)
{ {
int grpszx = get_local_size(0); int grpszx = get_local_size(0);
int grpszy = get_local_size(1); int grpszy = get_local_size(1);
int grpnumx = get_num_groups(0); int grpnumx = get_num_groups(0);
int grpidx = get_group_id(0); int grpidx = get_group_id(0);
int lclidx = get_local_id(0); int lclidx = get_local_id(0);
int lclidy = get_local_id(1); int lclidy = get_local_id(1);
int lcl_sz = mul24(grpszx, grpszy); int lcl_sz = mul24(grpszx, grpszy);
int lcl_id = mad24(lclidy, grpszx, lclidx); int lcl_id = mad24(lclidy, grpszx, lclidx);
__local int lclshare[1024]; __local int glboutindex[1];
__local int *glboutindex = lclshare + 0; __local int lclcount[1];
__local int *lclcount = glboutindex + 1; __local int lcloutindex[64];
__local int *lcloutindex = lclcount + 1; glboutindex[0] = 0;
__local float *partialsum = (__local float *)(lcloutindex + (lcl_sz << 1)); int outputoff = mul24(grpidx, 256);
glboutindex[0] = 0; candidate[outputoff + (lcl_id << 2)] = (int4)0;
int outputoff = mul24(grpidx, 256); candidate[outputoff + (lcl_id << 2) + 1] = (int4)0;
candidate[outputoff + (lcl_id << 2)] = (int4)0; candidate[outputoff + (lcl_id << 2) + 2] = (int4)0;
candidate[outputoff + (lcl_id << 2) + 1] = (int4)0; candidate[outputoff + (lcl_id << 2) + 3] = (int4)0;
candidate[outputoff + (lcl_id << 2) + 2] = (int4)0; int max_idx = rows * cols - 1;
candidate[outputoff + (lcl_id << 2) + 3] = (int4)0; for (int scalei = 0; scalei < loopcount; scalei++)
{
int4 scaleinfo1;
scaleinfo1 = info[scalei];
int width = (scaleinfo1.x & 0xffff0000) >> 16;
int height = scaleinfo1.x & 0xffff;
int grpnumperline = (scaleinfo1.y & 0xffff0000) >> 16;
int totalgrp = scaleinfo1.y & 0xffff;
float factor = as_float(scaleinfo1.w);
float correction_t = correction[scalei];
int ystep = (int)(max(2.0f, factor) + 0.5f);
for (int scalei = 0; scalei < loopcount; scalei++) for (int grploop = get_group_id(0); grploop < totalgrp; grploop += grpnumx)
{ {
int4 scaleinfo1; int4 cascadeinfo = p[scalei];
scaleinfo1 = info[scalei]; int grpidy = grploop / grpnumperline;
int width = (scaleinfo1.x & 0xffff0000) >> 16; int grpidx = grploop - mul24(grpidy, grpnumperline);
int height = scaleinfo1.x & 0xffff; int ix = mad24(grpidx, grpszx, lclidx);
int grpnumperline = (scaleinfo1.y & 0xffff0000) >> 16; int iy = mad24(grpidy, grpszy, lclidy);
int totalgrp = scaleinfo1.y & 0xffff; int x = ix * ystep;
float factor = as_float(scaleinfo1.w); int y = iy * ystep;
float correction_t = correction[scalei]; lcloutindex[lcl_id] = 0;
int ystep = (int)(max(2.0f, factor) + 0.5f); lclcount[0] = 0;
int nodecounter;
float mean, variance_norm_factor;
//if((ix < width) && (iy < height))
{
const int p_offset = mad24(y, step, x);
cascadeinfo.x += p_offset;
cascadeinfo.z += p_offset;
mean = (sum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.x), 0, max_idx)] - sum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.z), 0, max_idx)] -
sum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.x), 0, max_idx)] + sum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.z), 0, max_idx)])
* correction_t;
variance_norm_factor = sqsum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.x), 0, max_idx)] - sqsum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.z), 0, max_idx)] -
sqsum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.x), 0, max_idx)] + sqsum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.z), 0, max_idx)];
variance_norm_factor = variance_norm_factor * correction_t - mean * mean;
variance_norm_factor = variance_norm_factor >= 0.f ? sqrt(variance_norm_factor) : 1.f;
bool result = true;
nodecounter = startnode + nodecount * scalei;
for (int grploop = get_group_id(0); grploop < totalgrp; grploop += grpnumx) for (int stageloop = start_stage; (stageloop < end_stage) && result; stageloop++)
{ {
int4 cascadeinfo = p[scalei]; float stage_sum = 0.f;
int grpidy = grploop / grpnumperline; int stagecount = stagecascadeptr[stageloop].count;
int grpidx = grploop - mul24(grpidy, grpnumperline); for (int nodeloop = 0; nodeloop < stagecount; nodeloop++)
int ix = mad24(grpidx, grpszx, lclidx); {
int iy = mad24(grpidy, grpszy, lclidy); __global GpuHidHaarTreeNode *currentnodeptr = (nodeptr + nodecounter);
int x = ix * ystep; int4 info1 = *(__global int4 *)(&(currentnodeptr->p[0][0]));
int y = iy * ystep; int4 info2 = *(__global int4 *)(&(currentnodeptr->p[1][0]));
lcloutindex[lcl_id] = 0; int4 info3 = *(__global int4 *)(&(currentnodeptr->p[2][0]));
lclcount[0] = 0; float4 w = *(__global float4 *)(&(currentnodeptr->weight[0]));
int result = 1, nodecounter; float2 alpha2 = *(__global float2 *)(&(currentnodeptr->alpha[0]));
float mean, variance_norm_factor; float nodethreshold = w.w * variance_norm_factor;
//if((ix < width) && (iy < height)) info1.x += p_offset;
{ info1.z += p_offset;
const int p_offset = mad24(y, step, x); info2.x += p_offset;
cascadeinfo.x += p_offset; info2.z += p_offset;
cascadeinfo.z += p_offset; float classsum = (sum[clamp(mad24(info1.y, step, info1.x), 0, max_idx)] - sum[clamp(mad24(info1.y, step, info1.z), 0, max_idx)] -
mean = (sum[mad24(cascadeinfo.y, step, cascadeinfo.x)] - sum[mad24(cascadeinfo.y, step, cascadeinfo.z)] - sum[clamp(mad24(info1.w, step, info1.x), 0, max_idx)] + sum[clamp(mad24(info1.w, step, info1.z), 0, max_idx)]) * w.x;
sum[mad24(cascadeinfo.w, step, cascadeinfo.x)] + sum[mad24(cascadeinfo.w, step, cascadeinfo.z)]) classsum += (sum[clamp(mad24(info2.y, step, info2.x), 0, max_idx)] - sum[clamp(mad24(info2.y, step, info2.z), 0, max_idx)] -
* correction_t; sum[clamp(mad24(info2.w, step, info2.x), 0, max_idx)] + sum[clamp(mad24(info2.w, step, info2.z), 0, max_idx)]) * w.y;
variance_norm_factor = sqsum[mad24(cascadeinfo.y, step, cascadeinfo.x)] - sqsum[mad24(cascadeinfo.y, step, cascadeinfo.z)] - info3.x += p_offset;
sqsum[mad24(cascadeinfo.w, step, cascadeinfo.x)] + sqsum[mad24(cascadeinfo.w, step, cascadeinfo.z)]; info3.z += p_offset;
variance_norm_factor = variance_norm_factor * correction_t - mean * mean; classsum += (sum[clamp(mad24(info3.y, step, info3.x), 0, max_idx)] - sum[clamp(mad24(info3.y, step, info3.z), 0, max_idx)] -
variance_norm_factor = variance_norm_factor >= 0.f ? sqrt(variance_norm_factor) : 1.f; sum[clamp(mad24(info3.w, step, info3.x), 0, max_idx)] + sum[clamp(mad24(info3.w, step, info3.z), 0, max_idx)]) * w.z;
result = 1; stage_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x;
nodecounter = startnode + nodecount * scalei; nodecounter++;
}
for (int stageloop = start_stage; stageloop < end_stage && result; stageloop++) result = (bool)(stage_sum >= stagecascadeptr[stageloop].threshold);
{ }
float stage_sum = 0.f;
int4 stageinfo = *(global int4 *)(stagecascadeptr + stageloop);
float stagethreshold = as_float(stageinfo.y);
for (int nodeloop = 0; nodeloop < stageinfo.x; nodeloop++)
{
__global GpuHidHaarTreeNode *currentnodeptr = (nodeptr + nodecounter);
int4 info1 = *(__global int4 *)(&(currentnodeptr->p[0][0]));
int4 info2 = *(__global int4 *)(&(currentnodeptr->p[1][0]));
int4 info3 = *(__global int4 *)(&(currentnodeptr->p[2][0]));
float4 w = *(__global float4 *)(&(currentnodeptr->weight[0]));
float2 alpha2 = *(__global float2 *)(&(currentnodeptr->alpha[0]));
float nodethreshold = w.w * variance_norm_factor;
info1.x += p_offset;
info1.z += p_offset;
info2.x += p_offset;
info2.z += p_offset;
float classsum = (sum[mad24(info1.y, step, info1.x)] - sum[mad24(info1.y, step, info1.z)] -
sum[mad24(info1.w, step, info1.x)] + sum[mad24(info1.w, step, info1.z)]) * w.x;
classsum += (sum[mad24(info2.y, step, info2.x)] - sum[mad24(info2.y, step, info2.z)] -
sum[mad24(info2.w, step, info2.x)] + sum[mad24(info2.w, step, info2.z)]) * w.y;
info3.x += p_offset;
info3.z += p_offset;
classsum += (sum[mad24(info3.y, step, info3.x)] - sum[mad24(info3.y, step, info3.z)] -
sum[mad24(info3.w, step, info3.x)] + sum[mad24(info3.w, step, info3.z)]) * w.z;
stage_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x;
nodecounter++;
}
result = (stage_sum >= stagethreshold);
}
if (result && (ix < width) && (iy < height)) barrier(CLK_LOCAL_MEM_FENCE);
{
int queueindex = atomic_inc(lclcount);
lcloutindex[queueindex << 1] = (y << 16) | x;
lcloutindex[(queueindex << 1) + 1] = as_int(variance_norm_factor);
}
barrier(CLK_LOCAL_MEM_FENCE); if (result && (ix < width) && (iy < height))
int queuecount = lclcount[0]; {
nodecounter = splitnode + nodecount * scalei; int queueindex = atomic_inc(lclcount);
lcloutindex[queueindex] = (y << 16) | x;
}
if (lcl_id < queuecount) barrier(CLK_LOCAL_MEM_FENCE);
{ int queuecount = lclcount[0];
int temp = lcloutindex[lcl_id << 1];
int x = temp & 0xffff;
int y = (temp & (int)0xffff0000) >> 16;
temp = glboutindex[0];
int4 candidate_result;
candidate_result.zw = (int2)convert_int_rtn(factor * 20.f);
candidate_result.x = x;
candidate_result.y = y;
atomic_inc(glboutindex);
candidate[outputoff + temp + lcl_id] = candidate_result;
}
barrier(CLK_LOCAL_MEM_FENCE); if (lcl_id < queuecount)
} {
int temp = lcloutindex[lcl_id];
int x = temp & 0xffff;
int y = (temp & (int)0xffff0000) >> 16;
temp = atomic_inc(glboutindex);
int4 candidate_result;
candidate_result.zw = (int2)convert_int_rtn(factor * 20.f);
candidate_result.x = x;
candidate_result.y = y;
candidate[outputoff + temp + lcl_id] = candidate_result;
} }
barrier(CLK_LOCAL_MEM_FENCE);
}
} }
}
} }
__kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuHidHaarTreeNode *newnode, float scale, float weight_scale, int nodenum) __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuHidHaarTreeNode *newnode, float scale, float weight_scale, int nodenum)
{ {
int counter = get_global_id(0); int counter = get_global_id(0);
int tr_x[3], tr_y[3], tr_h[3], tr_w[3], i = 0; int tr_x[3], tr_y[3], tr_h[3], tr_w[3], i = 0;
GpuHidHaarTreeNode t1 = *(orinode + counter); GpuHidHaarTreeNode t1 = *(orinode + counter);
#pragma unroll #pragma unroll
for (i = 0; i < 3; i++) for (i = 0; i < 3; i++)
{ {
tr_x[i] = (int)(t1.p[i][0] * scale + 0.5f); tr_x[i] = (int)(t1.p[i][0] * scale + 0.5f);
tr_y[i] = (int)(t1.p[i][1] * scale + 0.5f); tr_y[i] = (int)(t1.p[i][1] * scale + 0.5f);
tr_w[i] = (int)(t1.p[i][2] * scale + 0.5f); tr_w[i] = (int)(t1.p[i][2] * scale + 0.5f);
tr_h[i] = (int)(t1.p[i][3] * scale + 0.5f); tr_h[i] = (int)(t1.p[i][3] * scale + 0.5f);
} }
t1.weight[0] = t1.p[2][0] ? -(t1.weight[1] * tr_h[1] * tr_w[1] + t1.weight[2] * tr_h[2] * tr_w[2]) / (tr_h[0] * tr_w[0]) : -t1.weight[1] * tr_h[1] * tr_w[1] / (tr_h[0] * tr_w[0]); t1.weight[0] = t1.p[2][0] ? -(t1.weight[1] * tr_h[1] * tr_w[1] + t1.weight[2] * tr_h[2] * tr_w[2]) / (tr_h[0] * tr_w[0]) : -t1.weight[1] * tr_h[1] * tr_w[1] / (tr_h[0] * tr_w[0]);
counter += nodenum; counter += nodenum;
#pragma unroll #pragma unroll
for (i = 0; i < 3; i++) for (i = 0; i < 3; i++)
{ {
newnode[counter].p[i][0] = tr_x[i]; newnode[counter].p[i][0] = tr_x[i];
newnode[counter].p[i][1] = tr_y[i]; newnode[counter].p[i][1] = tr_y[i];
newnode[counter].p[i][2] = tr_x[i] + tr_w[i]; newnode[counter].p[i][2] = tr_x[i] + tr_w[i];
newnode[counter].p[i][3] = tr_y[i] + tr_h[i]; newnode[counter].p[i][3] = tr_y[i] + tr_h[i];
newnode[counter].weight[i] = t1.weight[i] * weight_scale; newnode[counter].weight[i] = t1.weight[i] * weight_scale;
} }
newnode[counter].left = t1.left; newnode[counter].left = t1.left;
newnode[counter].right = t1.right; newnode[counter].right = t1.right;
newnode[counter].threshold = t1.threshold; newnode[counter].threshold = t1.threshold;
newnode[counter].alpha[0] = t1.alpha[0]; newnode[counter].alpha[0] = t1.alpha[0];
newnode[counter].alpha[1] = t1.alpha[1]; newnode[counter].alpha[1] = t1.alpha[1];
} }
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Sen Liu, swjtuls1987@126.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if defined (DOUBLE_SUPPORT) #if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64 #ifdef cl_khr_fp64
...@@ -609,22 +654,33 @@ __kernel void CvMoments_D5( __global float* src_data, int src_rows, int src_cols ...@@ -609,22 +654,33 @@ __kernel void CvMoments_D5( __global float* src_data, int src_rows, int src_cols
int y = wgidy*TILE_SIZE; // real Y index of pixel int y = wgidy*TILE_SIZE; // real Y index of pixel
int x = wgidx*TILE_SIZE; // real X index of pixel int x = wgidx*TILE_SIZE; // real X index of pixel
int kcn = (cn==2)?2:4; int kcn = (cn==2)?2:4;
int rstep = min(src_step/4, TILE_SIZE); src_step /= sizeof(*src_data);
int rstep = min(src_step, TILE_SIZE);
tileSize_height = min(TILE_SIZE, src_rows - y); tileSize_height = min(TILE_SIZE, src_rows - y);
tileSize_width = min(TILE_SIZE, src_cols -x); tileSize_width = min(TILE_SIZE, src_cols -x);
if(tileSize_width < TILE_SIZE) int maxIdx = mul24(src_rows, src_cols);
for(int i = tileSize_width; i < rstep; i++ ) int yOff = (y+lidy)*src_step;
*((__global float*)src_data+(y+lidy)*src_step/4+x+i) = 0; int index;
if(tileSize_width < TILE_SIZE && yOff < src_rows)
for(int i = tileSize_width; i < rstep && (yOff+x+i) < maxIdx; i++ )
*(src_data+yOff+x+i) = 0;
if( coi > 0 ) if( coi > 0 )
for(int i=0; i < tileSize_width; i+=VLEN_F) for(int i=0; i < tileSize_width; i+=VLEN_F)
{ {
#pragma unroll
for(int j=0; j<4; j++) for(int j=0; j<4; j++)
tmp_coi[j] = *(src_data+(y+lidy)*src_step/4+(x+i+j)*kcn+coi-1); {
index = yOff+(x+i+j)*kcn+coi-1;
if (index < maxIdx)
tmp_coi[j] = *(src_data+index);
else
tmp_coi[j] = 0;
}
tmp[i/VLEN_F] = (float4)(tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3]); tmp[i/VLEN_F] = (float4)(tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3]);
} }
else else
for(int i=0; i < tileSize_width; i+=VLEN_F) for(int i=0; i < tileSize_width && (yOff+x+i) < maxIdx; i+=VLEN_F)
tmp[i/VLEN_F] = (float4)(*(src_data+(y+lidy)*src_step/4+x+i),*(src_data+(y+lidy)*src_step/4+x+i+1),*(src_data+(y+lidy)*src_step/4+x+i+2),*(src_data+(y+lidy)*src_step/4+x+i+3)); tmp[i/VLEN_F] = (*(__global float4 *)(src_data+yOff+x+i));
float4 zero = (float4)(0); float4 zero = (float4)(0);
float4 full = (float4)(255); float4 full = (float4)(255);
if( binary ) if( binary )
...@@ -714,35 +770,59 @@ __kernel void CvMoments_D5( __global float* src_data, int src_rows, int src_cols ...@@ -714,35 +770,59 @@ __kernel void CvMoments_D5( __global float* src_data, int src_rows, int src_cols
// accumulate moments computed in each tile // accumulate moments computed in each tile
dst_step /= sizeof(F); dst_step /= sizeof(F);
int dst_x_off = mad24(wgidy, dst_cols, wgidx);
int dst_off = 0;
int max_dst_index = 10 * blocky * get_global_size(1);
// + m00 ( = m00' ) // + m00 ( = m00' )
*(dst_m + mad24(DST_ROW_00 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[0]; dst_off = mad24(DST_ROW_00 * blocky, dst_step, dst_x_off);
if (dst_off < max_dst_index)
*(dst_m + dst_off) = mom[0];
// + m10 ( = m10' + x*m00' ) // + m10 ( = m10' + x*m00' )
*(dst_m + mad24(DST_ROW_10 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[1] + xm; dst_off = mad24(DST_ROW_10 * blocky, dst_step, dst_x_off);
if (dst_off < max_dst_index)
*(dst_m + dst_off) = mom[1] + xm;
// + m01 ( = m01' + y*m00' ) // + m01 ( = m01' + y*m00' )
*(dst_m + mad24(DST_ROW_01 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[2] + ym; dst_off = mad24(DST_ROW_01 * blocky, dst_step, dst_x_off);
if (dst_off < max_dst_index)
*(dst_m + dst_off) = mom[2] + ym;
// + m20 ( = m20' + 2*x*m10' + x*x*m00' ) // + m20 ( = m20' + 2*x*m10' + x*x*m00' )
*(dst_m + mad24(DST_ROW_20 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[3] + x * (mom[1] * 2 + xm); dst_off = mad24(DST_ROW_20 * blocky, dst_step, dst_x_off);
if (dst_off < max_dst_index)
*(dst_m + dst_off) = mom[3] + x * (mom[1] * 2 + xm);
// + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' ) // + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' )
*(dst_m + mad24(DST_ROW_11 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[4] + x * (mom[2] + ym) + y * mom[1]; dst_off = mad24(DST_ROW_11 * blocky, dst_step, dst_x_off);
if (dst_off < max_dst_index)
*(dst_m + dst_off) = mom[4] + x * (mom[2] + ym) + y * mom[1];
// + m02 ( = m02' + 2*y*m01' + y*y*m00' ) // + m02 ( = m02' + 2*y*m01' + y*y*m00' )
*(dst_m + mad24(DST_ROW_02 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[5] + y * (mom[2] * 2 + ym); dst_off = mad24(DST_ROW_02 * blocky, dst_step, dst_x_off);
if (dst_off < max_dst_index)
*(dst_m + dst_off) = mom[5] + y * (mom[2] * 2 + ym);
// + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' ) // + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' )
*(dst_m + mad24(DST_ROW_30 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm)); dst_off = mad24(DST_ROW_30 * blocky, dst_step, dst_x_off);
if (dst_off < max_dst_index)
*(dst_m + dst_off) = mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm));
// + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20') // + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20')
*(dst_m + mad24(DST_ROW_21 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3]; dst_off = mad24(DST_ROW_21 * blocky, dst_step, dst_x_off);
if (dst_off < max_dst_index)
*(dst_m + dst_off) = mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3];
// + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02') // + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02')
*(dst_m + mad24(DST_ROW_12 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5]; dst_off = mad24(DST_ROW_12 * blocky, dst_step, dst_x_off);
if (dst_off < max_dst_index)
*(dst_m + dst_off) = mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5];
// + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' ) // + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' )
*(dst_m + mad24(DST_ROW_03 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym)); dst_off = mad24(DST_ROW_03 * blocky, dst_step, dst_x_off);
if (dst_off < max_dst_index)
*(dst_m + dst_off) = mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym));
} }
} }
......
...@@ -16,6 +16,8 @@ ...@@ -16,6 +16,8 @@
// //
// @Authors // @Authors
// Jia Haipeng, jiahaipeng95@gmail.com // Jia Haipeng, jiahaipeng95@gmail.com
// Sen Liu, swjtuls1987@126.com
// Peng Xiao, pengxiao@outlook.com
// //
// Redistribution and use in source and binary forms, with or without modification, // Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met: // are permitted provided that the following conditions are met:
...@@ -50,59 +52,40 @@ ...@@ -50,59 +52,40 @@
#define STEREO_MIND 0 // The minimum d range to check #define STEREO_MIND 0 // The minimum d range to check
#define STEREO_DISP_STEP N_DISPARITIES // the d step, must be <= 1 to avoid aliasing #define STEREO_DISP_STEP N_DISPARITIES // the d step, must be <= 1 to avoid aliasing
int SQ(int a) #ifndef radius
{ #define radius 64
return a * a; #endif
}
unsigned int CalcSSD(volatile __local unsigned int *col_ssd_cache, unsigned int CalcSSD(__local unsigned int *col_ssd)
volatile __local unsigned int *col_ssd, int radius)
{ {
unsigned int cache = 0; unsigned int cache = col_ssd[0];
unsigned int cache2 = 0;
for(int i = 1; i <= radius; i++) #pragma unroll
for(int i = 1; i <= (radius << 1); i++)
cache += col_ssd[i]; cache += col_ssd[i];
col_ssd_cache[0] = cache; return cache;
barrier(CLK_LOCAL_MEM_FENCE);
if (get_local_id(0) < BLOCK_W - radius)
cache2 = col_ssd_cache[radius];
else
for(int i = radius + 1; i < (2 * radius + 1); i++)
cache2 += col_ssd[i];
return col_ssd[0] + cache + cache2;
} }
uint2 MinSSD(volatile __local unsigned int *col_ssd_cache, uint2 MinSSD(__local unsigned int *col_ssd)
volatile __local unsigned int *col_ssd, int radius)
{ {
unsigned int ssd[N_DISPARITIES]; unsigned int ssd[N_DISPARITIES];
const int win_size = (radius << 1);
//See above: #define COL_SSD_SIZE (BLOCK_W + 2 * radius)
ssd[0] = CalcSSD(col_ssd_cache, col_ssd + 0 * (BLOCK_W + 2 * radius), radius); //See above: #define COL_SSD_SIZE (BLOCK_W + WIN_SIZE)
barrier(CLK_LOCAL_MEM_FENCE); ssd[0] = CalcSSD(col_ssd + 0 * (BLOCK_W + win_size));
ssd[1] = CalcSSD(col_ssd_cache, col_ssd + 1 * (BLOCK_W + 2 * radius), radius); ssd[1] = CalcSSD(col_ssd + 1 * (BLOCK_W + win_size));
barrier(CLK_LOCAL_MEM_FENCE); ssd[2] = CalcSSD(col_ssd + 2 * (BLOCK_W + win_size));
ssd[2] = CalcSSD(col_ssd_cache, col_ssd + 2 * (BLOCK_W + 2 * radius), radius); ssd[3] = CalcSSD(col_ssd + 3 * (BLOCK_W + win_size));
barrier(CLK_LOCAL_MEM_FENCE); ssd[4] = CalcSSD(col_ssd + 4 * (BLOCK_W + win_size));
ssd[3] = CalcSSD(col_ssd_cache, col_ssd + 3 * (BLOCK_W + 2 * radius), radius); ssd[5] = CalcSSD(col_ssd + 5 * (BLOCK_W + win_size));
barrier(CLK_LOCAL_MEM_FENCE); ssd[6] = CalcSSD(col_ssd + 6 * (BLOCK_W + win_size));
ssd[4] = CalcSSD(col_ssd_cache, col_ssd + 4 * (BLOCK_W + 2 * radius), radius); ssd[7] = CalcSSD(col_ssd + 7 * (BLOCK_W + win_size));
barrier(CLK_LOCAL_MEM_FENCE);
ssd[5] = CalcSSD(col_ssd_cache, col_ssd + 5 * (BLOCK_W + 2 * radius), radius);
barrier(CLK_LOCAL_MEM_FENCE);
ssd[6] = CalcSSD(col_ssd_cache, col_ssd + 6 * (BLOCK_W + 2 * radius), radius);
barrier(CLK_LOCAL_MEM_FENCE);
ssd[7] = CalcSSD(col_ssd_cache, col_ssd + 7 * (BLOCK_W + 2 * radius), radius);
barrier(CLK_LOCAL_MEM_FENCE);
unsigned int mssd = min(min(min(ssd[0], ssd[1]), min(ssd[4], ssd[5])), min(min(ssd[2], ssd[3]), min(ssd[6], ssd[7]))); unsigned int mssd = min(min(min(ssd[0], ssd[1]), min(ssd[4], ssd[5])), min(min(ssd[2], ssd[3]), min(ssd[6], ssd[7])));
int bestIdx = 0; int bestIdx = 0;
for (int i = 0; i < N_DISPARITIES; i++) for (int i = 0; i < N_DISPARITIES; i++)
{ {
if (mssd == ssd[i]) if (mssd == ssd[i])
...@@ -113,124 +96,66 @@ uint2 MinSSD(volatile __local unsigned int *col_ssd_cache, ...@@ -113,124 +96,66 @@ uint2 MinSSD(volatile __local unsigned int *col_ssd_cache,
} }
void StepDown(int idx1, int idx2, __global unsigned char* imageL, void StepDown(int idx1, int idx2, __global unsigned char* imageL,
__global unsigned char* imageR, int d, volatile __local unsigned int *col_ssd, int radius) __global unsigned char* imageR, int d, __local unsigned int *col_ssd)
{ {
unsigned char leftPixel1; uint8 imgR1 = convert_uint8(vload8(0, imageR + (idx1 - d - 7)));
unsigned char leftPixel2; uint8 imgR2 = convert_uint8(vload8(0, imageR + (idx2 - d - 7)));
unsigned char rightPixel1[8]; uint8 diff1 = (uint8)(imageL[idx1]) - imgR1;
unsigned char rightPixel2[8]; uint8 diff2 = (uint8)(imageL[idx2]) - imgR2;
unsigned int diff1, diff2; uint8 res = diff2 * diff2 - diff1 * diff1;
const int win_size = (radius << 1);
leftPixel1 = imageL[idx1]; col_ssd[0 * (BLOCK_W + win_size)] += res.s7;
leftPixel2 = imageL[idx2]; col_ssd[1 * (BLOCK_W + win_size)] += res.s6;
col_ssd[2 * (BLOCK_W + win_size)] += res.s5;
idx1 = idx1 - d; col_ssd[3 * (BLOCK_W + win_size)] += res.s4;
idx2 = idx2 - d; col_ssd[4 * (BLOCK_W + win_size)] += res.s3;
col_ssd[5 * (BLOCK_W + win_size)] += res.s2;
rightPixel1[7] = imageR[idx1 - 7]; col_ssd[6 * (BLOCK_W + win_size)] += res.s1;
rightPixel1[0] = imageR[idx1 - 0]; col_ssd[7 * (BLOCK_W + win_size)] += res.s0;
rightPixel1[1] = imageR[idx1 - 1];
rightPixel1[2] = imageR[idx1 - 2];
rightPixel1[3] = imageR[idx1 - 3];
rightPixel1[4] = imageR[idx1 - 4];
rightPixel1[5] = imageR[idx1 - 5];
rightPixel1[6] = imageR[idx1 - 6];
rightPixel2[7] = imageR[idx2 - 7];
rightPixel2[0] = imageR[idx2 - 0];
rightPixel2[1] = imageR[idx2 - 1];
rightPixel2[2] = imageR[idx2 - 2];
rightPixel2[3] = imageR[idx2 - 3];
rightPixel2[4] = imageR[idx2 - 4];
rightPixel2[5] = imageR[idx2 - 5];
rightPixel2[6] = imageR[idx2 - 6];
//See above: #define COL_SSD_SIZE (BLOCK_W + 2 * radius)
diff1 = leftPixel1 - rightPixel1[0];
diff2 = leftPixel2 - rightPixel2[0];
col_ssd[0 * (BLOCK_W + 2 * radius)] += SQ(diff2) - SQ(diff1);
diff1 = leftPixel1 - rightPixel1[1];
diff2 = leftPixel2 - rightPixel2[1];
col_ssd[1 * (BLOCK_W + 2 * radius)] += SQ(diff2) - SQ(diff1);
diff1 = leftPixel1 - rightPixel1[2];
diff2 = leftPixel2 - rightPixel2[2];
col_ssd[2 * (BLOCK_W + 2 * radius)] += SQ(diff2) - SQ(diff1);
diff1 = leftPixel1 - rightPixel1[3];
diff2 = leftPixel2 - rightPixel2[3];
col_ssd[3 * (BLOCK_W + 2 * radius)] += SQ(diff2) - SQ(diff1);
diff1 = leftPixel1 - rightPixel1[4];
diff2 = leftPixel2 - rightPixel2[4];
col_ssd[4 * (BLOCK_W + 2 * radius)] += SQ(diff2) - SQ(diff1);
diff1 = leftPixel1 - rightPixel1[5];
diff2 = leftPixel2 - rightPixel2[5];
col_ssd[5 * (BLOCK_W + 2 * radius)] += SQ(diff2) - SQ(diff1);
diff1 = leftPixel1 - rightPixel1[6];
diff2 = leftPixel2 - rightPixel2[6];
col_ssd[6 * (BLOCK_W + 2 * radius)] += SQ(diff2) - SQ(diff1);
diff1 = leftPixel1 - rightPixel1[7];
diff2 = leftPixel2 - rightPixel2[7];
col_ssd[7 * (BLOCK_W + 2 * radius)] += SQ(diff2) - SQ(diff1);
} }
void InitColSSD(int x_tex, int y_tex, int im_pitch, __global unsigned char* imageL, void InitColSSD(int x_tex, int y_tex, int im_pitch, __global unsigned char* imageL,
__global unsigned char* imageR, int d, __global unsigned char* imageR, int d,
volatile __local unsigned int *col_ssd, int radius) __local unsigned int *col_ssd)
{ {
unsigned char leftPixel1; uint8 leftPixel1;
int idx; uint8 diffa = 0;
unsigned int diffa[] = {0, 0, 0, 0, 0, 0, 0, 0}; int idx = y_tex * im_pitch + x_tex;
const int win_size = (radius << 1);
for(int i = 0; i < (2 * radius + 1); i++) for(int i = 0; i < (win_size + 1); i++)
{ {
idx = y_tex * im_pitch + x_tex; leftPixel1 = (uint8)(imageL[idx]);
leftPixel1 = imageL[idx]; uint8 imgR = convert_uint8(vload8(0, imageR + (idx - d - 7)));
idx = idx - d; uint8 res = leftPixel1 - imgR;
diffa += res * res;
diffa[0] += SQ(leftPixel1 - imageR[idx - 0]);
diffa[1] += SQ(leftPixel1 - imageR[idx - 1]); idx += im_pitch;
diffa[2] += SQ(leftPixel1 - imageR[idx - 2]);
diffa[3] += SQ(leftPixel1 - imageR[idx - 3]);
diffa[4] += SQ(leftPixel1 - imageR[idx - 4]);
diffa[5] += SQ(leftPixel1 - imageR[idx - 5]);
diffa[6] += SQ(leftPixel1 - imageR[idx - 6]);
diffa[7] += SQ(leftPixel1 - imageR[idx - 7]);
y_tex += 1;
} }
//See above: #define COL_SSD_SIZE (BLOCK_W + 2 * radius) //See above: #define COL_SSD_SIZE (BLOCK_W + WIN_SIZE)
col_ssd[0 * (BLOCK_W + 2 * radius)] = diffa[0]; col_ssd[0 * (BLOCK_W + win_size)] = diffa.s7;
col_ssd[1 * (BLOCK_W + 2 * radius)] = diffa[1]; col_ssd[1 * (BLOCK_W + win_size)] = diffa.s6;
col_ssd[2 * (BLOCK_W + 2 * radius)] = diffa[2]; col_ssd[2 * (BLOCK_W + win_size)] = diffa.s5;
col_ssd[3 * (BLOCK_W + 2 * radius)] = diffa[3]; col_ssd[3 * (BLOCK_W + win_size)] = diffa.s4;
col_ssd[4 * (BLOCK_W + 2 * radius)] = diffa[4]; col_ssd[4 * (BLOCK_W + win_size)] = diffa.s3;
col_ssd[5 * (BLOCK_W + 2 * radius)] = diffa[5]; col_ssd[5 * (BLOCK_W + win_size)] = diffa.s2;
col_ssd[6 * (BLOCK_W + 2 * radius)] = diffa[6]; col_ssd[6 * (BLOCK_W + win_size)] = diffa.s1;
col_ssd[7 * (BLOCK_W + 2 * radius)] = diffa[7]; col_ssd[7 * (BLOCK_W + win_size)] = diffa.s0;
} }
__kernel void stereoKernel(__global unsigned char *left, __global unsigned char *right, __kernel void stereoKernel(__global unsigned char *left, __global unsigned char *right,
__global unsigned int *cminSSDImage, int cminSSD_step, __global unsigned int *cminSSDImage, int cminSSD_step,
__global unsigned char *disp, int disp_step,int cwidth, int cheight, __global unsigned char *disp, int disp_step,int cwidth, int cheight,
int img_step, int maxdisp, int radius, int img_step, int maxdisp,
__local unsigned int *col_ssd_cache) __local unsigned int *col_ssd_cache)
{ {
__local unsigned int *col_ssd = col_ssd_cache + get_local_id(0);
volatile __local unsigned int *col_ssd = col_ssd_cache + BLOCK_W + get_local_id(0); __local unsigned int *col_ssd_extra = get_local_id(0) < (radius << 1) ? col_ssd + BLOCK_W : 0;
volatile __local unsigned int *col_ssd_extra = get_local_id(0) < (2 * radius) ? col_ssd + BLOCK_W : 0;
int X = get_group_id(0) * BLOCK_W + get_local_id(0) + maxdisp + radius; int X = get_group_id(0) * BLOCK_W + get_local_id(0) + maxdisp + radius;
// int Y = get_group_id(1) * ROWSperTHREAD + radius;
#define Y (get_group_id(1) * ROWSperTHREAD + radius) #define Y (get_group_id(1) * ROWSperTHREAD + radius)
volatile __global unsigned int* minSSDImage = cminSSDImage + X + Y * cminSSD_step; __global unsigned int* minSSDImage = cminSSDImage + X + Y * cminSSD_step;
__global unsigned char* disparImage = disp + X + Y * disp_step; __global unsigned char* disparImage = disp + X + Y * disp_step;
int end_row = ROWSperTHREAD < (cheight - Y) ? ROWSperTHREAD:(cheight - Y); int end_row = ROWSperTHREAD < (cheight - Y) ? ROWSperTHREAD:(cheight - Y);
...@@ -244,14 +169,14 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char ...@@ -244,14 +169,14 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char
{ {
y_tex = Y - radius; y_tex = Y - radius;
InitColSSD(x_tex, y_tex, img_step, left, right, d, col_ssd, radius); InitColSSD(x_tex, y_tex, img_step, left, right, d, col_ssd);
if (col_ssd_extra > 0) if (col_ssd_extra > 0)
if (x_tex + BLOCK_W < cwidth) if (x_tex + BLOCK_W < cwidth)
InitColSSD(x_tex + BLOCK_W, y_tex, img_step, left, right, d, col_ssd_extra, radius); InitColSSD(x_tex + BLOCK_W, y_tex, img_step, left, right, d, col_ssd_extra);
barrier(CLK_LOCAL_MEM_FENCE); //before MinSSD function barrier(CLK_LOCAL_MEM_FENCE); //before MinSSD function
uint2 minSSD = MinSSD(col_ssd_cache + get_local_id(0), col_ssd, radius); uint2 minSSD = MinSSD(col_ssd);
if (X < cwidth - radius && Y < cheight - radius) if (X < cwidth - radius && Y < cheight - radius)
{ {
if (minSSD.x < minSSDImage[0]) if (minSSD.x < minSSDImage[0])
...@@ -264,21 +189,18 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char ...@@ -264,21 +189,18 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char
for(int row = 1; row < end_row; row++) for(int row = 1; row < end_row; row++)
{ {
int idx1 = y_tex * img_step + x_tex; int idx1 = y_tex * img_step + x_tex;
int idx2 = min(y_tex + (2 * radius + 1), cheight - 1) * img_step + x_tex; int idx2 = min(y_tex + ((radius << 1) + 1), cheight - 1) * img_step + x_tex;
barrier(CLK_GLOBAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
StepDown(idx1, idx2, left, right, d, col_ssd, radius); StepDown(idx1, idx2, left, right, d, col_ssd);
if (col_ssd_extra > 0) if (col_ssd_extra > 0)
if (x_tex + BLOCK_W < cwidth) if (x_tex + BLOCK_W < cwidth)
StepDown(idx1, idx2, left + BLOCK_W, right + BLOCK_W, d, col_ssd_extra, radius); StepDown(idx1, idx2, left + BLOCK_W, right + BLOCK_W, d, col_ssd_extra);
y_tex += 1;
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
uint2 minSSD = MinSSD(col_ssd_cache + get_local_id(0), col_ssd, radius); uint2 minSSD = MinSSD(col_ssd);
if (X < cwidth - radius && row < cheight - radius - Y) if (X < cwidth - radius && row < cheight - radius - Y)
{ {
int idx = row * cminSSD_step; int idx = row * cminSSD_step;
...@@ -288,10 +210,11 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char ...@@ -288,10 +210,11 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char
minSSDImage[idx] = minSSD.x; minSSDImage[idx] = minSSD.x;
} }
} }
y_tex++;
} // for row loop } // for row loop
} // for d loop } // for d loop
} }
////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////// Sobel Prefiler (signal channel)////////////////////////////////////// //////////////////////////// Sobel Prefiler (signal channel)//////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////
......
此差异已折叠。
此差异已折叠。
...@@ -74,28 +74,21 @@ namespace stereoBM ...@@ -74,28 +74,21 @@ namespace stereoBM
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
static void prefilter_xsobel(const oclMat &input, oclMat &output, int prefilterCap) static void prefilter_xsobel(const oclMat &input, oclMat &output, int prefilterCap)
{ {
Context *clCxt = input.clCxt;
string kernelName = "prefilter_xsobel"; string kernelName = "prefilter_xsobel";
cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereobm, kernelName);
size_t blockSize = 1; size_t blockSize = 1;
size_t globalThreads[3] = { input.cols, input.rows, 1 }; size_t globalThreads[3] = { input.cols, input.rows, 1 };
size_t localThreads[3] = { blockSize, blockSize, 1 }; size_t localThreads[3] = { blockSize, blockSize, 1 };
openCLVerifyKernel(clCxt, kernel, localThreads); std::vector< std::pair<size_t, const void *> > args;
openCLSafeCall(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&input.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&input.data));
openCLSafeCall(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&output.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&output.data));
openCLSafeCall(clSetKernelArg(kernel, 2, sizeof(cl_int), (void *)&input.rows)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&input.rows));
openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_int), (void *)&input.cols)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&input.cols));
openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_int), (void *)&prefilterCap)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&prefilterCap));
openCLSafeCall(clEnqueueNDRangeKernel((cl_command_queue)clCxt->oclCommandQueue(), kernel, 3, NULL,
globalThreads, localThreads, 0, NULL, NULL));
clFinish((cl_command_queue)clCxt->oclCommandQueue());
openCLSafeCall(clReleaseKernel(kernel));
openCLExecuteKernel(Context::getContext(), &stereobm, kernelName,
globalThreads, localThreads, args, -1, -1);
} }
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
//////////////////////////////common//////////////////////////////////// //////////////////////////////common////////////////////////////////////
...@@ -115,19 +108,13 @@ static void stereo_bm(const oclMat &left, const oclMat &right, oclMat &disp, ...@@ -115,19 +108,13 @@ static void stereo_bm(const oclMat &left, const oclMat &right, oclMat &disp,
{ {
int winsz2 = winSize >> 1; int winsz2 = winSize >> 1;
//if(winsz2 == 0 || winsz2 >= calles_num)
//cv::ocl:error("Unsupported window size", __FILE__, __LINE__, __FUNCTION__);
Context *clCxt = left.clCxt;
string kernelName = "stereoKernel"; string kernelName = "stereoKernel";
cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereobm, kernelName);
disp.setTo(Scalar_<unsigned char>::all(0)); disp.setTo(Scalar_<unsigned char>::all(0));
minSSD_buf.setTo(Scalar_<unsigned int>::all(0xFFFFFFFF)); minSSD_buf.setTo(Scalar_<unsigned int>::all(0xFFFFFFFF));
size_t minssd_step = minSSD_buf.step / minSSD_buf.elemSize(); size_t minssd_step = minSSD_buf.step / minSSD_buf.elemSize();
size_t local_mem_size = (BLOCK_W + N_DISPARITIES * (BLOCK_W + 2 * winsz2)) * size_t local_mem_size = (N_DISPARITIES * (BLOCK_W + 2 * winsz2)) *
sizeof(cl_uint); sizeof(cl_uint);
//size_t blockSize = 1; //size_t blockSize = 1;
size_t localThreads[] = { BLOCK_W, 1,1}; size_t localThreads[] = { BLOCK_W, 1,1};
...@@ -136,26 +123,23 @@ static void stereo_bm(const oclMat &left, const oclMat &right, oclMat &disp, ...@@ -136,26 +123,23 @@ static void stereo_bm(const oclMat &left, const oclMat &right, oclMat &disp,
1 1
}; };
openCLVerifyKernel(clCxt, kernel, localThreads); std::vector< std::pair<size_t, const void *> > args;
openCLSafeCall(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&left.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&left.data));
openCLSafeCall(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&right.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&right.data));
openCLSafeCall(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&minSSD_buf.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&minSSD_buf.data));
openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_int), (void *)&minssd_step)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&minssd_step));
openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *)&disp.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&disp.data));
openCLSafeCall(clSetKernelArg(kernel, 5, sizeof(cl_int), (void *)&disp.step)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&disp.step));
openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_int), (void *)&left.cols)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&left.cols));
openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&left.rows)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&left.rows));
openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_int), (void *)&left.step)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&left.step));
openCLSafeCall(clSetKernelArg(kernel, 9, sizeof(cl_int), (void *)&maxdisp)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&maxdisp));
openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int), (void *)&winsz2)); args.push_back(std::make_pair(local_mem_size, (void *)NULL));
openCLSafeCall(clSetKernelArg(kernel, 11, local_mem_size, (void *)NULL));
char opt [128];
openCLSafeCall(clEnqueueNDRangeKernel((cl_command_queue)clCxt->oclCommandQueue(), kernel, 2, NULL, sprintf(opt, "-D radius=%d", winsz2);
globalThreads, localThreads, 0, NULL, NULL)); openCLExecuteKernel(Context::getContext(), &stereobm, kernelName,
globalThreads, localThreads, args, -1, -1, opt);
clFinish((cl_command_queue)clCxt->oclCommandQueue());
openCLSafeCall(clReleaseKernel(kernel));
} }
//////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////
///////////////////////////////postfilter_textureness/////////////////////// ///////////////////////////////postfilter_textureness///////////////////////
...@@ -163,10 +147,7 @@ static void stereo_bm(const oclMat &left, const oclMat &right, oclMat &disp, ...@@ -163,10 +147,7 @@ static void stereo_bm(const oclMat &left, const oclMat &right, oclMat &disp,
static void postfilter_textureness(oclMat &left, int winSize, static void postfilter_textureness(oclMat &left, int winSize,
float avergeTexThreshold, oclMat &disparity) float avergeTexThreshold, oclMat &disparity)
{ {
Context *clCxt = left.clCxt;
string kernelName = "textureness_kernel"; string kernelName = "textureness_kernel";
cl_kernel kernel = openCLGetKernelFromSource(clCxt, &stereobm, kernelName);
size_t blockSize = 1; size_t blockSize = 1;
size_t localThreads[] = { BLOCK_W, blockSize ,1}; size_t localThreads[] = { BLOCK_W, blockSize ,1};
...@@ -177,22 +158,19 @@ static void postfilter_textureness(oclMat &left, int winSize, ...@@ -177,22 +158,19 @@ static void postfilter_textureness(oclMat &left, int winSize,
size_t local_mem_size = (localThreads[0] + localThreads[0] + (winSize / 2) * 2) * sizeof(float); size_t local_mem_size = (localThreads[0] + localThreads[0] + (winSize / 2) * 2) * sizeof(float);
openCLVerifyKernel(clCxt, kernel, localThreads); std::vector< std::pair<size_t, const void *> > args;
openCLSafeCall(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&disparity.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&disparity.data));
openCLSafeCall(clSetKernelArg(kernel, 1, sizeof(cl_int), (void *)&disparity.rows)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&disparity.rows));
openCLSafeCall(clSetKernelArg(kernel, 2, sizeof(cl_int), (void *)&disparity.cols)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&disparity.cols));
openCLSafeCall(clSetKernelArg(kernel, 3, sizeof(cl_int), (void *)&disparity.step)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&disparity.step));
openCLSafeCall(clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *)&left.data)); args.push_back(std::make_pair(sizeof(cl_mem), (void *)&left.data));
openCLSafeCall(clSetKernelArg(kernel, 5, sizeof(cl_int), (void *)&left.rows)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&left.rows));
openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_int), (void *)&left.cols)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&left.cols));
openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&winSize)); args.push_back(std::make_pair(sizeof(cl_int), (void *)&winSize));
openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_float), (void *)&avergeTexThreshold)); args.push_back(std::make_pair(sizeof(cl_float), (void *)&avergeTexThreshold));
openCLSafeCall(clSetKernelArg(kernel, 9, local_mem_size, NULL)); args.push_back(std::make_pair(local_mem_size, (void*)NULL));
openCLSafeCall(clEnqueueNDRangeKernel((cl_command_queue)clCxt->oclCommandQueue(), kernel, 2, NULL, openCLExecuteKernel(Context::getContext(), &stereobm, kernelName,
globalThreads, localThreads, 0, NULL, NULL)); globalThreads, localThreads, args, -1, -1);
clFinish((cl_command_queue)clCxt->oclCommandQueue());
openCLSafeCall(clReleaseKernel(kernel));
} }
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////operator///////////////////////////////// /////////////////////////////////////operator/////////////////////////////////
......
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// Intel License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_TEST_INTERPOLATION_HPP__
#define __OPENCV_TEST_INTERPOLATION_HPP__
template <typename T> T readVal(const cv::Mat &src, int y, int x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
{
if (border_type == cv::BORDER_CONSTANT)
return (y >= 0 && y < src.rows && x >= 0 && x < src.cols) ? src.at<T>(y, x * src.channels() + c) : cv::saturate_cast<T>(borderVal.val[c]);
return src.at<T>(cv::borderInterpolate(y, src.rows, border_type), cv::borderInterpolate(x, src.cols, border_type) * src.channels() + c);
}
template <typename T> struct NearestInterpolator
{
static T getValue(const cv::Mat &src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
{
return readVal<T>(src, cvFloor(y), cvFloor(x), c, border_type, borderVal);
}
};
template <typename T> struct LinearInterpolator
{
static T getValue(const cv::Mat &src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
{
x -= 0.5f;
y -= 0.5f;
int x1 = cvFloor(x);
int y1 = cvFloor(y);
int x2 = x1 + 1;
int y2 = y1 + 1;
float res = 0;
res += readVal<T>(src, y1, x1, c, border_type, borderVal) * ((x2 - x) * (y2 - y));
res += readVal<T>(src, y1, x2, c, border_type, borderVal) * ((x - x1) * (y2 - y));
res += readVal<T>(src, y2, x1, c, border_type, borderVal) * ((x2 - x) * (y - y1));
res += readVal<T>(src, y2, x2, c, border_type, borderVal) * ((x - x1) * (y - y1));
return cv::saturate_cast<T>(res);
}
};
template <typename T> struct CubicInterpolator
{
static float getValue(float p[4], float x)
{
return p[1] + 0.5 * x * (p[2] - p[0] + x * (2.0 * p[0] - 5.0 * p[1] + 4.0 * p[2] - p[3] + x * (3.0 * (p[1] - p[2]) + p[3] - p[0])));
}
static float getValue(float p[4][4], float x, float y)
{
float arr[4];
arr[0] = getValue(p[0], x);
arr[1] = getValue(p[1], x);
arr[2] = getValue(p[2], x);
arr[3] = getValue(p[3], x);
return getValue(arr, y);
}
static T getValue(const cv::Mat &src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
{
int ix = cvRound(x);
int iy = cvRound(y);
float vals[4][4] =
{
{readVal<T>(src, iy - 2, ix - 2, c, border_type, borderVal), readVal<T>(src, iy - 2, ix - 1, c, border_type, borderVal), readVal<T>(src, iy - 2, ix, c, border_type, borderVal), readVal<T>(src, iy - 2, ix + 1, c, border_type, borderVal)},
{readVal<T>(src, iy - 1, ix - 2, c, border_type, borderVal), readVal<T>(src, iy - 1, ix - 1, c, border_type, borderVal), readVal<T>(src, iy - 1, ix, c, border_type, borderVal), readVal<T>(src, iy - 1, ix + 1, c, border_type, borderVal)},
{readVal<T>(src, iy , ix - 2, c, border_type, borderVal), readVal<T>(src, iy , ix - 1, c, border_type, borderVal), readVal<T>(src, iy , ix, c, border_type, borderVal), readVal<T>(src, iy , ix + 1, c, border_type, borderVal)},
{readVal<T>(src, iy + 1, ix - 2, c, border_type, borderVal), readVal<T>(src, iy + 1, ix - 1, c, border_type, borderVal), readVal<T>(src, iy + 1, ix, c, border_type, borderVal), readVal<T>(src, iy + 1, ix + 1, c, border_type, borderVal)},
};
return cv::saturate_cast<T>(getValue(vals, (x - ix + 2.0) / 4.0, (y - iy + 2.0) / 4.0));
}
};
#endif // __OPENCV_TEST_INTERPOLATION_HPP__
...@@ -71,7 +71,6 @@ ...@@ -71,7 +71,6 @@
#include "opencv2/ocl/ocl.hpp" #include "opencv2/ocl/ocl.hpp"
#include "utility.hpp" #include "utility.hpp"
#include "interpolation.hpp"
//#include "add_test_info.h" //#include "add_test_info.h"
#endif #endif
......
此差异已折叠。
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Nathan, liujun@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp" #include "precomp.hpp"
#include <iomanip> #include <iomanip>
...@@ -33,20 +77,14 @@ void blendLinearGold(const cv::Mat &img1, const cv::Mat &img2, const cv::Mat &we ...@@ -33,20 +77,14 @@ void blendLinearGold(const cv::Mat &img1, const cv::Mat &img2, const cv::Mat &we
PARAM_TEST_CASE(Blend, cv::Size, MatType/*, UseRoi*/) PARAM_TEST_CASE(Blend, cv::Size, MatType/*, UseRoi*/)
{ {
//std::vector<cv::ocl::Info> oclinfo;
cv::Size size; cv::Size size;
int type; int type;
bool useRoi; bool useRoi;
virtual void SetUp() virtual void SetUp()
{ {
//devInfo = GET_PARAM(0);
size = GET_PARAM(0); size = GET_PARAM(0);
type = GET_PARAM(1); type = GET_PARAM(1);
/*useRoi = GET_PARAM(3);*/
//int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
//CV_Assert(devnums > 0);
} }
}; };
...@@ -59,12 +97,9 @@ TEST_P(Blend, Accuracy) ...@@ -59,12 +97,9 @@ TEST_P(Blend, Accuracy)
cv::Mat weights1 = randomMat(size, CV_32F, 0, 1); cv::Mat weights1 = randomMat(size, CV_32F, 0, 1);
cv::Mat weights2 = randomMat(size, CV_32F, 0, 1); cv::Mat weights2 = randomMat(size, CV_32F, 0, 1);
cv::ocl::oclMat gimg1(size, type), gimg2(size, type), gweights1(size, CV_32F), gweights2(size, CV_32F); cv::ocl::oclMat gimg1(img1), gimg2(img2), gweights1(weights1), gweights2(weights2);
cv::ocl::oclMat dst(size, type); cv::ocl::oclMat dst;
gimg1.upload(img1);
gimg2.upload(img2);
gweights1.upload(weights1);
gweights2.upload(weights2);
cv::ocl::blendLinear(gimg1, gimg2, gweights1, gweights2, dst); cv::ocl::blendLinear(gimg1, gimg2, gweights1, gweights2, dst);
cv::Mat result; cv::Mat result;
cv::Mat result_gold; cv::Mat result_gold;
...@@ -74,10 +109,10 @@ TEST_P(Blend, Accuracy) ...@@ -74,10 +109,10 @@ TEST_P(Blend, Accuracy)
else else
blendLinearGold<float>(img1, img2, weights1, weights2, result_gold); blendLinearGold<float>(img1, img2, weights1, weights2, result_gold);
EXPECT_MAT_NEAR(result_gold, result, CV_MAT_DEPTH(type) == CV_8U ? 1.f : 1e-5f, 0); EXPECT_MAT_NEAR(result_gold, result, CV_MAT_DEPTH(type) == CV_8U ? 1.f : 1e-5f);
} }
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Blend, Combine( INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Blend, Combine(
DIFFERENT_SIZES, DIFFERENT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC4)) testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC4))
)); ));
......
...@@ -7,12 +7,16 @@ ...@@ -7,12 +7,16 @@
// copy or use the software. // copy or use the software.
// //
// //
// Intel License Agreement // License Agreement
// For Open Source Computer Vision Library // For Open Source Computer Vision Library
// //
// Copyright (C) 2010-2012, Multicoreware inc., all rights reserved. // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners. // Third party copyrights are property of their respective owners.
// //
// @Authors
// Nathan, liujun@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification, // Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met: // are permitted provided that the following conditions are met:
// //
...@@ -21,12 +25,12 @@ ...@@ -21,12 +25,12 @@
// //
// * Redistribution's in binary form must reproduce the above copyright notice, // * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation // this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution. // and/or other oclMaterials provided with the distribution.
// //
// * The name of Intel Corporation may not be used to endorse or promote products // * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission. // derived from this software without specific prior written permission.
// //
// This software is provided by the copyright holders and contributors "as is" and // This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied // any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed. // warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct, // In no event shall the Intel Corporation or contributors be liable for any direct,
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
...@@ -53,13 +53,12 @@ PARAM_TEST_CASE(Gemm, int, cv::Size, int) ...@@ -53,13 +53,12 @@ PARAM_TEST_CASE(Gemm, int, cv::Size, int)
int type; int type;
cv::Size mat_size; cv::Size mat_size;
int flags; int flags;
//vector<cv::ocl::Info> info;
virtual void SetUp() virtual void SetUp()
{ {
type = GET_PARAM(0); type = GET_PARAM(0);
mat_size = GET_PARAM(1); mat_size = GET_PARAM(1);
flags = GET_PARAM(2); flags = GET_PARAM(2);
//cv::ocl::getDevice(info);
} }
}; };
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册