First gpu code draft. Interface might be significantly changed in future.

Simple implementation of StereoBM_GPU. It is excluded from compilation now.

First gpu code draft. Interface might be significantly changed in future.
Simple implementation of StereoBM_GPU. It is excluded from compilation now.
7f6fb6ef · Anatoly Baksheev · fbc88e2d · 7f6fb6ef · 7f6fb6ef · 7f6fb6ef
15 changed file
--- a/OpenCVConfig.cmake.in
+++ b/OpenCVConfig.cmake.in
@@ -43,7 +43,7 @@ SET(OpenCV_LIB_DIR "@CMAKE_LIB_DIRS_CONFIGCMAKE@")
 # ====================================================================
 # Link libraries: e.g.   opencv_core220.so, opencv_imgproc220d.lib, etc...
 # ====================================================================
-set(OPENCV_LIB_COMPONENTS opencv_core opencv_imgproc opencv_features2d opencv_calib3d opencv_objdetect opencv_video opencv_highgui opencv_ml opencv_legacy opencv_contrib)
+set(OPENCV_LIB_COMPONENTS opencv_core opencv_imgproc opencv_features2d opencv_gpu opencv_calib3d opencv_objdetect opencv_video opencv_highgui opencv_ml opencv_legacy opencv_contrib)
 SET(OpenCV_LIBS "")
 foreach(__CVLIB ${OPENCV_LIB_COMPONENTS})
 	# CMake>=2.6 supports the notation "debug XXd optimized XX"

--- a/modules/CMakeLists.txt
+++ b/modules/CMakeLists.txt
@@ -22,3 +22,6 @@ endif()
 add_subdirectory(video)
 add_subdirectory(haartraining)
 add_subdirectory(traincascade)
+
+
+#add_subdirectory(gpu)
--- a/modules/gpu/CMakeLists.txt
+++ b/modules/gpu/CMakeLists.txt
+
+include(FindCUDA)
+
+if (CUDA_FOUND)
+    include_directories(${CUDA_INCLUDE_DIRS})
+    link_directories(${CUDA_LIBRARIES})
+
+	#CUDA_GENERATED_OUTPUT_DIR (Default CMAKE_CURRENT_BINARY_DIR)
+
+	#====================================================================================
+
+	
+	set(name "gpu")	
+	set(DEPS "opencv_core")
+		
+	project(opencv_${name})
+	add_definitions(-DCVAPI_EXPORTS)
+
+	include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include"
+						"${CMAKE_CURRENT_SOURCE_DIR}/cuda"
+						"${CMAKE_CURRENT_SOURCE_DIR}/src"
+						"${CMAKE_CURRENT_BINARY_DIR}")
+
+	foreach(d ${DEPS})
+		if(${d} MATCHES "opencv_")			
+			string(REPLACE "opencv_" "${CMAKE_CURRENT_SOURCE_DIR}/../" d_dir ${d})
+			include_directories("${d_dir}/include")			
+		endif()
+  	endforeach()		
+
+	file(GLOB lib_srcs "src/*.cpp")
+	file(GLOB lib_int_hdrs "src/*.h*")
+	file(GLOB lib_cuda "cuda/*.cu")
+	file(GLOB lib_cuda_hdrs "cuda/*.h*")	
+	source_group("Src" FILES ${lib_srcs} ${lib_int_hdrs})
+	source_group("Cuda" FILES ${lib_cuda} ${lib_cuda_hdrs})
+
+	file(GLOB lib_hdrs "include/opencv2/${name}/*.h*")		
+	source_group("Include" FILES ${lib_hdrs})
+
+	CUDA_COMPILE(cuda_objs ${lib_cuda})
+	#CUDA_BUILD_CLEAN_TARGET()
+		
+	set(the_target "opencv_${name}")
+	add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${cuda_objs})
+
+	if(PCHSupport_FOUND)
+		set(pch_header ${CMAKE_CURRENT_SOURCE_DIR}/src/precomp.hpp)
+		if(${CMAKE_GENERATOR} MATCHES "Visual*" OR ${CMAKE_GENERATOR} MATCHES "Xcode*")
+			if(${CMAKE_GENERATOR} MATCHES "Visual*")
+				set(${the_target}_pch "src/precomp.cpp")
+			endif()            
+			add_native_precompiled_header(${the_target} ${pch_header})
+		elseif(CMAKE_COMPILER_IS_GNUCXX AND ${CMAKE_GENERATOR} MATCHES ".*Makefiles")
+			add_precompiled_header(${the_target} ${pch_header})
+		endif()
+	endif()
+
+	# For dynamic link numbering convenions
+	set_target_properties(${the_target} PROPERTIES
+		VERSION ${OPENCV_VERSION}
+		SOVERSION ${OPENCV_SOVERSION}
+		OUTPUT_NAME "${the_target}${OPENCV_DLLVERSION}"
+		)
+
+	# Additional target properties
+	set_target_properties(${the_target} PROPERTIES
+		DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
+		ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib/"
+		RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/"
+		INSTALL_NAME_DIR "${CMAKE_INSTALL_PREFIX}/lib"
+		)
+
+	# Add the required libraries for linking:
+	target_link_libraries(${the_target} ${OPENCV_LINKER_LIBS} ${IPP_LIBS} ${DEPS} ${CUDA_LIBRARIES})
+
+	if(MSVC)
+		if(CMAKE_CROSSCOMPILING)
+			set_target_properties(${the_target} PROPERTIES
+				LINK_FLAGS "/NODEFAULTLIB:secchk"
+				)
+		endif()
+		set_target_properties(${the_target} PROPERTIES
+			LINK_FLAGS "/NODEFAULTLIB:libc"
+			)
+	endif()
+
+	# Dependencies of this target:
+	add_dependencies(${the_target} ${DEPS})
+
+	install(TARGETS ${the_target}
+		RUNTIME DESTINATION bin COMPONENT main
+		LIBRARY DESTINATION lib COMPONENT main
+		ARCHIVE DESTINATION lib COMPONENT main)
+
+	install(FILES ${lib_hdrs}
+		DESTINATION include/opencv2/${name}
+		COMPONENT main)
+
+endif()
+
--- a/modules/gpu/cuda/Stereo.cu
+++ b/modules/gpu/cuda/Stereo.cu
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "cuda_shared.hpp"
+
+using namespace cv::gpu;
+
+#define cudaSafeCall
+
+#define ROWSperTHREAD 21     // the number of rows a thread will process
+#define BLOCK_W 128          // the thread block width (464)
+#define N_DISPARITIES 8
+
+#define STEREO_MIND 0                    // The minimum d range to check 
+#define STEREO_DISP_STEP N_DISPARITIES   // the d step, must be <= 1 to avoid aliasing
+#define RADIUS 9                         // Kernel Radius 5V & 5H = 11x11 kernel
+
+#define WINSZ (2 * RADIUS + 1)
+#define N_DIRTY_PIXELS (2 * RADIUS)
+#define COL_SSD_SIZE (BLOCK_W + N_DIRTY_PIXELS)
+#define SHARED_MEM_SIZE (COL_SSD_SIZE) // amount of shared memory used
+
+__constant__ unsigned int* cminSSDImage;
+__constant__ size_t cminSSD_step;
+__constant__ int cwidth;
+__constant__ int cheight;
+
+namespace device_code 
+{
+
+__device__ int SQ(int a)
+{
+    return a * a;    
+}
+
+__device__ unsigned int CalcSSD(unsigned int *col_ssd_cache, unsigned int *col_ssd)
+{
+    unsigned int cache = 0;
+    unsigned int cache2 = 0;
+
+    for(int i = 1; i <= RADIUS; i++)
+        cache += col_ssd[i];
+    
+    col_ssd_cache[0] = cache;
+
+    __syncthreads();
+
+    if (threadIdx.x < BLOCK_W - RADIUS)
+        cache2 = col_ssd_cache[RADIUS];
+    else
+        for(int i = RADIUS + 1; i < WINSZ; i++)
+            cache2 += col_ssd[i];
+
+    return col_ssd[0] + cache + cache2;
+}
+
+__device__ uint2 MinSSD(unsigned int *col_ssd_cache, unsigned int *col_ssd)
+{
+    unsigned int ssd[N_DISPARITIES];
+
+    ssd[0] = CalcSSD(col_ssd_cache, col_ssd + 0 * SHARED_MEM_SIZE);
+    ssd[1] = CalcSSD(col_ssd_cache, col_ssd + 1 * SHARED_MEM_SIZE);
+    ssd[2] = CalcSSD(col_ssd_cache, col_ssd + 2 * SHARED_MEM_SIZE);
+    ssd[3] = CalcSSD(col_ssd_cache, col_ssd + 3 * SHARED_MEM_SIZE);
+    ssd[4] = CalcSSD(col_ssd_cache, col_ssd + 4 * SHARED_MEM_SIZE);
+    ssd[5] = CalcSSD(col_ssd_cache, col_ssd + 5 * SHARED_MEM_SIZE);
+    ssd[6] = CalcSSD(col_ssd_cache, col_ssd + 6 * SHARED_MEM_SIZE);
+    ssd[7] = CalcSSD(col_ssd_cache, col_ssd + 7 * SHARED_MEM_SIZE);
+
+    int mssd = min(min(min(ssd[0], ssd[1]), min(ssd[4], ssd[5])), min(min(ssd[2], ssd[3]), min(ssd[6], ssd[7])));
+
+    int bestIdx = 0;
+    for (int i = 0; i < N_DISPARITIES; i++)
+    {
+        if (mssd == ssd[i])
+            bestIdx = i;
+    }
+
+    return make_uint2(mssd, bestIdx);
+}
+
+__device__ void StepDown(int idx1, int idx2, unsigned char* imageL, unsigned char* imageR, int d, unsigned int *col_ssd)
+{
+    unsigned char leftPixel1;
+    unsigned char leftPixel2;
+    unsigned char rightPixel1[8];
+    unsigned char rightPixel2[8];
+    unsigned int diff1, diff2;
+
+    leftPixel1 = imageL[idx1];
+    leftPixel2 = imageL[idx2];
+
+    idx1 = idx1 - d;
+    idx2 = idx2 - d;
+
+    rightPixel1[7] = imageR[idx1 - 7];
+    rightPixel1[0] = imageR[idx1 - 0];
+    rightPixel1[1] = imageR[idx1 - 1];
+    rightPixel1[2] = imageR[idx1 - 2];
+    rightPixel1[3] = imageR[idx1 - 3];
+    rightPixel1[4] = imageR[idx1 - 4];
+    rightPixel1[5] = imageR[idx1 - 5];
+    rightPixel1[6] = imageR[idx1 - 6];
+    
+    rightPixel2[7] = imageR[idx2 - 7];
+    rightPixel2[0] = imageR[idx2 - 0];
+    rightPixel2[1] = imageR[idx2 - 1];
+    rightPixel2[2] = imageR[idx2 - 2];
+    rightPixel2[3] = imageR[idx2 - 3];
+    rightPixel2[4] = imageR[idx2 - 4];
+    rightPixel2[5] = imageR[idx2 - 5];
+    rightPixel2[6] = imageR[idx2 - 6];
+    
+
+    diff1 = leftPixel1 - rightPixel1[0];                
+    diff2 = leftPixel2 - rightPixel2[0];    
+    col_ssd[0 * SHARED_MEM_SIZE] += SQ(diff2) - SQ(diff1);
+
+    diff1 = leftPixel1 - rightPixel1[1];
+    diff2 = leftPixel2 - rightPixel2[1];
+    col_ssd[1 * SHARED_MEM_SIZE] += SQ(diff2) - SQ(diff1);
+     
+    diff1 = leftPixel1 - rightPixel1[2];
+    diff2 = leftPixel2 - rightPixel2[2];
+    col_ssd[2 * SHARED_MEM_SIZE] += SQ(diff2) - SQ(diff1);
+
+    diff1 = leftPixel1 - rightPixel1[3];
+    diff2 = leftPixel2 - rightPixel2[3];
+    col_ssd[3 * SHARED_MEM_SIZE] += SQ(diff2) - SQ(diff1);
+    
+    diff1 = leftPixel1 - rightPixel1[4]; 
+    diff2 = leftPixel2 - rightPixel2[4];               
+    col_ssd[4 * SHARED_MEM_SIZE] += SQ(diff2) - SQ(diff1);
+    
+    diff1 = leftPixel1 - rightPixel1[5];
+    diff2 = leftPixel2 - rightPixel2[5];
+    col_ssd[5 * SHARED_MEM_SIZE] += SQ(diff2) - SQ(diff1);
+    
+    diff1 = leftPixel1 - rightPixel1[6];
+    diff2 = leftPixel2 - rightPixel2[6];
+    col_ssd[6 * SHARED_MEM_SIZE] += SQ(diff2) - SQ(diff1);
+        
+    diff1 = leftPixel1 - rightPixel1[7];
+    diff2 = leftPixel2 - rightPixel2[7];
+    col_ssd[7 * SHARED_MEM_SIZE] += SQ(diff2) - SQ(diff1);
+}
+
+__device__ void InitColSSD(int x_tex, int y_tex, int im_pitch, unsigned char* imageL, unsigned char* imageR, int d, unsigned int *col_ssd)
+{
+    unsigned char leftPixel1;
+    int idx;
+    unsigned int diffa[] = {0, 0, 0, 0, 0, 0, 0, 0};
+
+    for(int i = 0; i < WINSZ; i++)
+    {
+        idx = y_tex * im_pitch + x_tex;
+        leftPixel1 = imageL[idx];
+        idx = idx - d;
+
+        diffa[0] += SQ(leftPixel1 - imageR[idx - 0]);        
+        diffa[1] += SQ(leftPixel1 - imageR[idx - 1]);
+        diffa[2] += SQ(leftPixel1 - imageR[idx - 2]);
+        diffa[3] += SQ(leftPixel1 - imageR[idx - 3]);
+        diffa[4] += SQ(leftPixel1 - imageR[idx - 4]);
+        diffa[5] += SQ(leftPixel1 - imageR[idx - 5]);
+        diffa[6] += SQ(leftPixel1 - imageR[idx - 6]);
+        diffa[7] += SQ(leftPixel1 - imageR[idx - 7]);
+
+        y_tex += 1;
+    }
+
+    col_ssd[0 * SHARED_MEM_SIZE] = diffa[0];
+    col_ssd[1 * SHARED_MEM_SIZE] = diffa[1];
+    col_ssd[2 * SHARED_MEM_SIZE] = diffa[2];
+    col_ssd[3 * SHARED_MEM_SIZE] = diffa[3];
+    col_ssd[4 * SHARED_MEM_SIZE] = diffa[4];
+    col_ssd[5 * SHARED_MEM_SIZE] = diffa[5];
+    col_ssd[6 * SHARED_MEM_SIZE] = diffa[6];
+    col_ssd[7 * SHARED_MEM_SIZE] = diffa[7];
+}
+
+extern "C" __global__ void stereoKernel(uchar *left, uchar *right, size_t img_step, uchar* disp, size_t disp_pitch, int maxdisp)
+{
+    extern __shared__ unsigned int col_ssd_cache[];
+    unsigned int *col_ssd = col_ssd_cache + BLOCK_W + threadIdx.x;    
+    unsigned int *col_ssd_extra = threadIdx.x < N_DIRTY_PIXELS ? col_ssd + BLOCK_W : 0;
+
+    //#define X (blockIdx.x * BLOCK_W + threadIdx.x + STEREO_MAXD)
+    int X = (blockIdx.x * BLOCK_W + threadIdx.x + maxdisp);
+    //#define Y (__mul24(blockIdx.y, ROWSperTHREAD) + RADIUS)
+    #define Y (blockIdx.y * ROWSperTHREAD + RADIUS)
+    //int Y = blockIdx.y * ROWSperTHREAD + RADIUS;
+
+    unsigned int* minSSDImage = cminSSDImage + X + Y * cminSSD_step;
+    uchar* disparImage = disp + X + Y * disp_pitch;
+ /*   if (X < cwidth)
+    {        
+        unsigned int *minSSDImage_end = minSSDImage + min(ROWSperTHREAD, cheight - Y) * minssd_step;
+        for(uint *ptr = minSSDImage; ptr != minSSDImage_end; ptr += minssd_step )
+            *ptr = 0xFFFFFFFF;        
+    }*/
+    int end_row = min(ROWSperTHREAD, cheight - Y);
+    int y_tex;    
+    int x_tex = X - RADIUS;
+    for(int d = STEREO_MIND; d < maxdisp; d += STEREO_DISP_STEP)
+    {
+        y_tex = Y - RADIUS;
+
+        InitColSSD(x_tex, y_tex, img_step, left, right, d, col_ssd); 
+
+        if (col_ssd_extra > 0)
+            InitColSSD(x_tex + BLOCK_W, y_tex, img_step, left, right, d, col_ssd_extra);
+
+        __syncthreads(); //before MinSSD function
+
+        if (X < cwidth - RADIUS && Y < cheight - RADIUS)
+        {
+            uint2 minSSD = MinSSD(col_ssd_cache + threadIdx.x, col_ssd);
+            if (minSSD.x < minSSDImage[0])
+            {
+                disparImage[0] = (unsigned char)(d + minSSD.y);
+                minSSDImage[0] = minSSD.x;
+            }
+        }
+
+        for(int row = 1; row < end_row; row++)
+        {
+            int idx1 = y_tex * img_step + x_tex;
+            int idx2 = (y_tex + WINSZ) * img_step + x_tex;
+
+            __syncthreads();
+
+            StepDown(idx1, idx2, left, right, d, col_ssd);
+
+            if (col_ssd_extra)
+                StepDown(idx1, idx2, left + BLOCK_W, right + BLOCK_W, d, col_ssd_extra);
+
+            y_tex += 1;
+ 
+            __syncthreads(); //before MinSSD function
+
+            if (X < cwidth - RADIUS && row < cheight - RADIUS - Y)
+            {       
+                int idx = row * cminSSD_step;         
+                uint2 minSSD = MinSSD(col_ssd_cache + threadIdx.x, col_ssd);  
+                if (minSSD.x < minSSDImage[idx])
+                {
+                    disparImage[disp_pitch * row] = (unsigned char)(d + minSSD.y);
+                    minSSDImage[idx] = minSSD.x;
+                }
+            }
+        } // for row loop
+    } // for d loop
+}
+
+}
+
+extern "C" void cv::gpu::impl::stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, DevMem2D& disp, int maxdisp, DevMem2D_<unsigned int>& minSSD_buf)
+{   
+    //cudaSafeCall( cudaFuncSetCacheConfig(&stereoKernel, cudaFuncCachePreferL1) );
+    //cudaSafeCall( cudaFuncSetCacheConfig(&stereoKernel, cudaFuncCachePreferShared) );
+    
+    size_t smem_size = (BLOCK_W + N_DISPARITIES * SHARED_MEM_SIZE) * sizeof(unsigned int);      
+
+    cudaSafeCall( cudaMemset2D(disp.ptr, disp.step, 0, disp.cols, disp. rows) );
+    cudaSafeCall( cudaMemset2D(minSSD_buf.ptr, minSSD_buf.step, 0xFF, minSSD_buf.cols * minSSD_buf.elemSize(), disp. rows) );        
+
+    dim3 grid(1,1,1);
+    dim3 threads(BLOCK_W, 1, 1);    
+    
+    grid.x = divUp(left.cols - maxdisp - 2 * RADIUS, BLOCK_W);
+    grid.y = divUp(left.rows - 2 * RADIUS, ROWSperTHREAD);
+    
+    cudaSafeCall( cudaMemcpyToSymbol(  cwidth, &left.cols, sizeof (left.cols) ) );
+    cudaSafeCall( cudaMemcpyToSymbol( cheight, &left.rows, sizeof (left.rows) ) );
+    cudaSafeCall( cudaMemcpyToSymbol( cminSSDImage,  &minSSD_buf.ptr, sizeof (minSSD_buf.ptr) ) );
+
+    size_t minssd_step = minSSD_buf.step/minSSD_buf.elemSize();
+    cudaSafeCall( cudaMemcpyToSymbol( cminSSD_step,  &minssd_step, sizeof (minssd_step) ) );
+         
+    device_code::stereoKernel<<<grid, threads, smem_size>>>(left.ptr, right.ptr, left.step, disp.ptr, disp.step, maxdisp);
+    cudaSafeCall( cudaThreadSynchronize() );
+}
\ No newline at end of file
--- a/modules/gpu/cuda/cuda_shared.hpp
+++ b/modules/gpu/cuda/cuda_shared.hpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_CUDA_SHARED_HPP__
+#define __OPENCV_CUDA_SHARED_HPP__
+
+#include "opencv2/gpu/devmem2d.hpp"
+
+namespace cv
+{
+    namespace gpu
+    {   
+        typedef unsigned char uchar;
+        typedef unsigned short ushort;
+        typedef unsigned int uint;
+
+        namespace impl
+        {   
+            static inline int divUp(int a, int b) { return (a % b == 0) ? a/b : a/b + 1; }
+
+            extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, DevMem2D& disp, int maxdisp, DevMem2D_<uint>& minSSD_buf);
+        }
+    }
+}
+
+#endif /* __OPENCV_CUDA_SHARED_HPP__ */
--- a/modules/gpu/include/opencv2/gpu/devmem2d.hpp
+++ b/modules/gpu/include/opencv2/gpu/devmem2d.hpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other GpuMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_GPU_DEVMEM2D_HPP__
+#define __OPENCV_GPU_DEVMEM2D_HPP__
+
+namespace cv
+{    
+    namespace gpu
+    {
+        // Simple lightweight structure that encapsulates image ptr on device, its pitch and its sizes.
+        // It is intended to pass to nvcc-compiled code.
+
+        template<typename T = unsigned char>
+        struct DevMem2D_
+        {
+            enum { elem_size = sizeof(T) };
+
+            int cols;
+            int rows;
+            T* ptr;
+            size_t step;
+
+            DevMem2D_(int rows_, int cols_, T *ptr_, size_t step_)
+                : cols(cols_), rows(rows_), ptr(ptr_), step(step_) {}
+
+            size_t elemSize() const { return elem_size; }
+        };
+
+        typedef DevMem2D_<> DevMem2D;
+    }
+}
+
+#endif /* __OPENCV_GPU_DEVMEM2D_HPP__ */
\ No newline at end of file
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other GpuMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_GPU_HPP__
+#define __OPENCV_GPU_HPP__
+
+#include "opencv2/core/core.hpp"
+#include "opencv2/gpu/devmem2d.hpp"
+
+namespace cv
+{
+    namespace gpu
+    {   
+        //////////////////////////////// Initialization ////////////////////////
+                 
+        CV_EXPORTS int getCudaEnabledDeviceCount();
+        CV_EXPORTS string getDeviceName(int device);
+        CV_EXPORTS void setDevice(int device);        
+
+        enum { CV_GPU_CC_10, CV_GPU_CC_11, CV_GPU_CC_12, CV_GPU_CC_13, CV_GPU_CC_20 };
+
+        CV_EXPORTS int getComputeCapability(int device);
+        CV_EXPORTS int getNumberOfSMs(int device);
+ 
+        //////////////////////////////// GpuMat ////////////////////////////////
+
+        class CV_EXPORTS GpuMat
+        {
+        public:
+            //! default constructor
+            GpuMat();
+            //! constructs GpuMatrix of the specified size and type
+            // (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.)
+            GpuMat(int _rows, int _cols, int _type);
+            GpuMat(Size _size, int _type);
+            //! constucts GpuMatrix and fills it with the specified value _s.
+            GpuMat(int _rows, int _cols, int _type, const Scalar& _s);
+            GpuMat(Size _size, int _type, const Scalar& _s);
+            //! copy constructor
+            GpuMat(const GpuMat& m);
+            
+            //! constructor for GpuMatrix headers pointing to user-allocated data
+            GpuMat(int _rows, int _cols, int _type, void* _data, size_t _step = Mat::AUTO_STEP);
+            GpuMat(Size _size, int _type, void* _data, size_t _step = Mat::AUTO_STEP);
+
+            //! creates a matrix header for a part of the bigger matrix
+            GpuMat(const GpuMat& m, const Range& rowRange, const Range& colRange);
+            GpuMat(const GpuMat& m, const Rect& roi);
+                                    
+            //! builds GpuMat from Mat. Perfom blocking upload to device.
+            GpuMat (const Mat& m);
+
+            //! destructor - calls release()
+            ~GpuMat();
+
+            //! assignment operators
+            GpuMat& operator = (const GpuMat& m);
+            //! assignment operator. Perfom blocking upload to device.
+            GpuMat& operator = (const Mat& m);   
+
+            //! returns lightweight DevMem2D_ structure for passing to nvcc-compiled code.
+            // Contains just image size, data ptr and step.
+            template <class T> operator DevMem2D_<T>() const;
+
+            //! pefroms blocking upload data to GpuMat. .
+            void upload(const cv::Mat& m);
+
+            //! Downloads data from device to host memory. Blocking calls.
+            operator Mat() const;
+            void download(cv::Mat& m) const;       
+
+            //! returns a new GpuMatrix header for the specified row
+            GpuMat row(int y) const;
+            //! returns a new GpuMatrix header for the specified column
+            GpuMat col(int x) const;
+            //! ... for the specified row span
+            GpuMat rowRange(int startrow, int endrow) const;
+            GpuMat rowRange(const Range& r) const;
+            //! ... for the specified column span
+            GpuMat colRange(int startcol, int endcol) const;
+            GpuMat colRange(const Range& r) const;
+
+            //! returns deep copy of the GpuMatrix, i.e. the data is copied
+            GpuMat clone() const;
+            //! copies the GpuMatrix content to "m".
+            // It calls m.create(this->size(), this->type()).
+            void copyTo( GpuMat& m ) const;
+            //! copies those GpuMatrix elements to "m" that are marked with non-zero mask elements.
+            void copyTo( GpuMat& m, const GpuMat& mask ) const;
+            //! converts GpuMatrix to another datatype with optional scalng. See cvConvertScale.
+            void convertTo( GpuMat& m, int rtype, double alpha=1, double beta=0 ) const;
+
+            void assignTo( GpuMat& m, int type=-1 ) const;
+
+            //! sets every GpuMatrix element to s
+            GpuMat& operator = (const Scalar& s);
+            //! sets some of the GpuMatrix elements to s, according to the mask
+            GpuMat& setTo(const Scalar& s, const GpuMat& mask=GpuMat());
+            //! creates alternative GpuMatrix header for the same data, with different
+            // number of channels and/or different number of rows. see cvReshape.
+            GpuMat reshape(int _cn, int _rows=0) const;
+
+            //! allocates new GpuMatrix data unless the GpuMatrix already has specified size and type.
+            // previous data is unreferenced if needed.
+            void create(int _rows, int _cols, int _type);
+            void create(Size _size, int _type);
+            //! decreases reference counter;
+            // deallocate the data when reference counter reaches 0.
+            void release();
+
+            //! swaps with other smart pointer
+            void swap(GpuMat& mat);
+
+            //! locates GpuMatrix header within a parent GpuMatrix. See below
+            void locateROI( Size& wholeSize, Point& ofs ) const;
+            //! moves/resizes the current GpuMatrix ROI inside the parent GpuMatrix.
+            GpuMat& adjustROI( int dtop, int dbottom, int dleft, int dright );
+            //! extracts a rectangular sub-GpuMatrix
+            // (this is a generalized form of row, rowRange etc.)
+            GpuMat operator()( Range rowRange, Range colRange ) const;
+            GpuMat operator()( const Rect& roi ) const;  
+
+            //! returns true iff the GpuMatrix data is continuous
+            // (i.e. when there are no gaps between successive rows).
+            // similar to CV_IS_GpuMat_CONT(cvGpuMat->type)
+            bool isContinuous() const;
+            //! returns element size in bytes,
+            // similar to CV_ELEM_SIZE(cvMat->type)
+            size_t elemSize() const;
+            //! returns the size of element channel in bytes.
+            size_t elemSize1() const;
+            //! returns element type, similar to CV_MAT_TYPE(cvMat->type)
+            int type() const;
+            //! returns element type, similar to CV_MAT_DEPTH(cvMat->type)
+            int depth() const;
+            //! returns element type, similar to CV_MAT_CN(cvMat->type)
+            int channels() const;
+            //! returns step/elemSize1()
+            size_t step1() const;
+            //! returns GpuMatrix size:
+            // width == number of columns, height == number of rows
+            Size size() const;
+            //! returns true if GpuMatrix data is NULL
+            bool empty() const;
+
+            //! returns pointer to y-th row
+            uchar* ptr(int y=0);
+            const uchar* ptr(int y=0) const;
+
+            //! template version of the above method
+            template<typename _Tp> _Tp* ptr(int y=0);
+            template<typename _Tp> const _Tp* ptr(int y=0) const;
+
+            /*! includes several bit-fields:
+            - the magic signature
+            - continuity flag
+            - depth
+            - number of channels
+            */
+            int flags;
+            //! the number of rows and columns
+            int rows, cols;
+            //! a distance between successive rows in bytes; includes the gap if any
+            size_t step;
+            //! pointer to the data
+            uchar* data;
+
+            //! pointer to the reference counter;
+            // when GpuMatrix points to user-allocated data, the pointer is NULL
+            int* refcount;
+
+            //! helper fields used in locateROI and adjustROI
+            uchar* datastart;
+            uchar* dataend;
+        };
+
+        //////////////////////////////// CudaStream ////////////////////////////////
+
+        class CudaStream
+        {
+        public:
+            CudaStream(); 
+            ~CudaStream();
+
+            bool queryIfComplete();
+            void waitForCompletion(); 
+
+            //calls cudaMemcpyAsync
+            void enqueueDownload(const GpuMat& src, Mat& dst);
+            void enqueueUpload(const Mat& src, GpuMat& dst);
+            void enqueueCopy(const GpuMat& src, GpuMat& dst);
+
+            // calls cudaMemset2D asynchronous for single channel. Invoke kernel for some multichannel.
+            void enqueueMemSet(const GpuMat& src, Scalar val);
+
+            // invoke kernel asynchronous because of mask
+            void enqueueMemSet(const GpuMat& src, Scalar val, const GpuMat& mask);
+
+            // converts matrix type, ex from float to uchar depending on type
+            void enqueueConvert(const GpuMat& src, GpuMat& dst, int type); 
+
+            //CUstream_st& getStream();
+        private:
+            void *impl;
+            
+            CudaStream(const CudaStream&); 
+            CudaStream& operator=(const CudaStream&);
+        };
+
+        //////////////////////////////// StereoBM_GPU ////////////////////////////////
+
+        class CV_EXPORTS StereoBM_GPU
+        {
+        public:
+            enum { BASIC_PRESET=0, PREFILTER_XSOBEL = 1 };
+
+            //! the default constructor
+            StereoBM_GPU();
+            //! the full constructor taking the camera-specific preset, number of disparities and the SAD window size
+            //! ndisparities should be multiple of 8. SSD WindowsSize is fixed to 19 now
+            StereoBM_GPU(int preset, int ndisparities=0);
+            //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair
+            //! Output disparity has CV_8U type.
+            void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity) const;            
+        private:
+            mutable GpuMat minSSD;
+            int preset;
+            int ndisp;
+        };
+    }
+}
+
+
+
+#include "opencv2/gpu/gpumat.hpp"
+
+#endif /* __OPENCV_GPU_HPP__ */
\ No newline at end of file
--- a/modules/gpu/include/opencv2/gpu/gpumat.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpumat.hpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other GpuMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_GPU_MATRIX_OPERATIONS_HPP__
+#define __OPENCV_GPU_MATRIX_OPERATIONS_HPP__
+
+
+namespace cv
+{
+
+namespace gpu
+{
+
+//////////////////////////////// GpuMat ////////////////////////////////
+
+inline GpuMat::GpuMat()
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) {}
+
+inline GpuMat::GpuMat(int _rows, int _cols, int _type)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
+{
+    if( _rows > 0 && _cols > 0 )
+        create( _rows, _cols, _type );
+}
+
+inline GpuMat::GpuMat(Size _size, int _type)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
+{
+    if( _size.height > 0 && _size.width > 0 )
+        create( _size.height, _size.width, _type );
+}
+
+inline GpuMat::GpuMat(int _rows, int _cols, int _type, const Scalar& _s)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
+{
+    if(_rows > 0 && _cols > 0)
+    {
+        create(_rows, _cols, _type);
+        *this = _s;
+    }
+}
+   
+inline GpuMat::GpuMat(Size _size, int _type, const Scalar& _s)
+    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
+{
+    if( _size.height > 0 && _size.width > 0 )
+    {
+        create( _size.height, _size.width, _type );
+        *this = _s;
+    }
+}    
+
+inline GpuMat::GpuMat(const GpuMat& m)
+    : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend)
+{
+    if( refcount )
+        CV_XADD(refcount, 1);
+}
+
+inline GpuMat::GpuMat(int _rows, int _cols, int _type, void* _data, size_t _step)
+    : flags(Mat::MAGIC_VAL + (_type & TYPE_MASK)), rows(_rows), cols(_cols), step(_step), data((uchar*)_data), refcount(0),
+    datastart((uchar*)_data), dataend((uchar*)_data)
+{
+    size_t minstep = cols*elemSize();
+    if( step == Mat::AUTO_STEP )
+    {
+        step = minstep;
+        flags |= Mat::CONTINUOUS_FLAG;
+    }
+    else
+    {
+        if( rows == 1 ) step = minstep;
+        CV_DbgAssert( step >= minstep );
+        flags |= step == minstep ? Mat::CONTINUOUS_FLAG : 0;
+    }
+    dataend += step*(rows-1) + minstep;
+}
+
+inline GpuMat::GpuMat(Size _size, int _type, void* _data, size_t _step)
+    : flags(Mat::MAGIC_VAL + (_type & TYPE_MASK)), rows(_size.height), cols(_size.width),
+    step(_step), data((uchar*)_data), refcount(0),
+    datastart((uchar*)_data), dataend((uchar*)_data)
+{
+    size_t minstep = cols*elemSize();
+    if( step == Mat::AUTO_STEP )
+    {
+        step = minstep;
+        flags |= Mat::CONTINUOUS_FLAG;
+    }
+    else
+    {
+        if( rows == 1 ) step = minstep;
+        CV_DbgAssert( step >= minstep );
+        flags |= step == minstep ? Mat::CONTINUOUS_FLAG : 0;
+    }
+    dataend += step*(rows-1) + minstep;
+}
+
+
+inline GpuMat::GpuMat(const GpuMat& m, const Range& rowRange, const Range& colRange)
+{
+    flags = m.flags;
+    step = m.step; refcount = m.refcount;
+    data = m.data; datastart = m.datastart; dataend = m.dataend;
+
+    if( rowRange == Range::all() )
+        rows = m.rows;
+    else
+    {
+        CV_Assert( 0 <= rowRange.start && rowRange.start <= rowRange.end && rowRange.end <= m.rows );
+        rows = rowRange.size();
+        data += step*rowRange.start;
+    }
+
+    if( colRange == Range::all() )
+        cols = m.cols;
+    else
+    {
+        CV_Assert( 0 <= colRange.start && colRange.start <= colRange.end && colRange.end <= m.cols );
+        cols = colRange.size();
+        data += colRange.start*elemSize();
+        flags &= cols < m.cols ? ~Mat::CONTINUOUS_FLAG : -1;
+    }
+
+    if( rows == 1 )
+        flags |= Mat::CONTINUOUS_FLAG;
+
+    if( refcount )
+        CV_XADD(refcount, 1);
+    if( rows <= 0 || cols <= 0 )
+        rows = cols = 0;
+}
+
+inline GpuMat::GpuMat(const GpuMat& m, const Rect& roi)
+    : flags(m.flags), rows(roi.height), cols(roi.width),
+    step(m.step), data(m.data + roi.y*step), refcount(m.refcount),
+    datastart(m.datastart), dataend(m.dataend)
+{
+    flags &= roi.width < m.cols ? ~Mat::CONTINUOUS_FLAG : -1;
+    data += roi.x*elemSize();
+    CV_Assert( 0 <= roi.x && 0 <= roi.width && roi.x + roi.width <= m.cols &&
+        0 <= roi.y && 0 <= roi.height && roi.y + roi.height <= m.rows );
+    if( refcount )
+        CV_XADD(refcount, 1);
+    if( rows <= 0 || cols <= 0 )
+        rows = cols = 0;
+}
+    
+inline GpuMat::GpuMat(const Mat& m) 
+: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) { upload(m); }
+
+inline GpuMat::~GpuMat() { release(); }
+
+inline GpuMat& GpuMat::operator = (const GpuMat& m)
+{
+    if( this != &m )
+    {
+        if( m.refcount )
+            CV_XADD(m.refcount, 1);
+        release();
+        flags = m.flags;
+        rows = m.rows; cols = m.cols;
+        step = m.step; data = m.data;
+        datastart = m.datastart; dataend = m.dataend;
+        refcount = m.refcount;
+    }
+    return *this;
+}
+
+inline GpuMat& GpuMat::operator = (const Mat& m) { upload(m); return *this; }
+
+template <class T> inline GpuMat::operator DevMem2D_<T>() const { return DevMem2D_<T>(rows, cols, (T*)data, step); }
+
+//CPP: void GpuMat::upload(const Mat& m);
+
+ inline GpuMat::operator Mat() const
+ {
+     Mat m;
+     download(m);
+     return m;
+ }
+
+//CPP void GpuMat::download(cv::Mat& m) const; 
+
+inline GpuMat GpuMat::row(int y) const { return GpuMat(*this, Range(y, y+1), Range::all()); }
+inline GpuMat GpuMat::col(int x) const { return GpuMat(*this, Range::all(), Range(x, x+1)); }
+inline GpuMat GpuMat::rowRange(int startrow, int endrow) const { return GpuMat(*this, Range(startrow, endrow), Range::all()); }
+inline GpuMat GpuMat::rowRange(const Range& r) const { return GpuMat(*this, r, Range::all()); }
+inline GpuMat GpuMat::colRange(int startcol, int endcol) const { return GpuMat(*this, Range::all(), Range(startcol, endcol)); }
+inline GpuMat GpuMat::colRange(const Range& r) const { return GpuMat(*this, Range::all(), r); }
+
+inline GpuMat GpuMat::clone() const
+{
+    GpuMat m;
+    copyTo(m);
+    return m;
+}
+
+//CPP void GpuMat::copyTo( GpuMat& m ) const;
+//CPP void GpuMat::copyTo( GpuMat& m, const GpuMat& mask  ) const;
+//CPP void GpuMat::convertTo( GpuMat& m, int rtype, double alpha=1, double beta=0 ) const;
+
+inline void GpuMat::assignTo( GpuMat& m, int type ) const
+{
+    if( type < 0 )
+        m = *this;
+    else
+        convertTo(m, type);
+}
+
+//CPP GpuMat& GpuMat::operator = (const Scalar& s);
+//CPP GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask=GpuMat());
+
+//CPP GpuMat GpuMat::reshape(int _cn, int _rows=0) const;
+
+//CPP void GpuMat::create(int _rows, int _cols, int _type);
+inline void GpuMat::create(Size _size, int _type) { create(_size.height, _size.width, _type); }
+
+//CPP void GpuMat::release();
+
+inline void GpuMat::swap(GpuMat& b) 
+{
+    std::swap( flags, b.flags );
+    std::swap( rows, b.rows ); std::swap( cols, b.cols );
+    std::swap( step, b.step ); std::swap( data, b.data );
+    std::swap( datastart, b.datastart );
+    std::swap( dataend, b.dataend );
+    std::swap( refcount, b.refcount );
+}
+
+inline void GpuMat::locateROI( Size& wholeSize, Point& ofs ) const
+{
+    size_t esz = elemSize(), minstep;
+    ptrdiff_t delta1 = data - datastart, delta2 = dataend - datastart;
+    CV_DbgAssert( step > 0 );
+    if( delta1 == 0 )
+        ofs.x = ofs.y = 0;
+    else
+    {
+        ofs.y = (int)(delta1/step);
+        ofs.x = (int)((delta1 - step*ofs.y)/esz);
+        CV_DbgAssert( data == datastart + ofs.y*step + ofs.x*esz );
+    }
+    minstep = (ofs.x + cols)*esz;
+    wholeSize.height = (int)((delta2 - minstep)/step + 1);
+    wholeSize.height = std::max(wholeSize.height, ofs.y + rows);
+    wholeSize.width = (int)((delta2 - step*(wholeSize.height-1))/esz);
+    wholeSize.width = std::max(wholeSize.width, ofs.x + cols);
+}
+
+inline GpuMat& GpuMat::adjustROI( int dtop, int dbottom, int dleft, int dright )
+{
+    Size wholeSize; Point ofs;
+    size_t esz = elemSize();
+    locateROI( wholeSize, ofs );
+    int row1 = std::max(ofs.y - dtop, 0), row2 = std::min(ofs.y + rows + dbottom, wholeSize.height);
+    int col1 = std::max(ofs.x - dleft, 0), col2 = std::min(ofs.x + cols + dright, wholeSize.width);
+    data += (row1 - ofs.y)*step + (col1 - ofs.x)*esz;
+    rows = row2 - row1; cols = col2 - col1;
+    if( esz*cols == step || rows == 1 )
+        flags |= Mat::CONTINUOUS_FLAG;
+    else
+        flags &= ~Mat::CONTINUOUS_FLAG;
+    return *this;
+}
+
+inline GpuMat GpuMat::operator()( Range rowRange, Range colRange ) const { return GpuMat(*this, rowRange, colRange); }
+inline GpuMat GpuMat::operator()( const Rect& roi ) const { return GpuMat(*this, roi); }
+
+inline bool GpuMat::isContinuous() const { return (flags & Mat::CONTINUOUS_FLAG) != 0; }
+inline size_t GpuMat::elemSize() const { return CV_ELEM_SIZE(flags); }
+inline size_t GpuMat::elemSize1() const { return CV_ELEM_SIZE1(flags); }
+inline int GpuMat::type() const { return CV_MAT_TYPE(flags); }
+inline int GpuMat::depth() const { return CV_MAT_DEPTH(flags); }
+inline int GpuMat::channels() const { return CV_MAT_CN(flags); }
+inline size_t GpuMat::step1() const { return step/elemSize1(); }
+inline Size GpuMat::size() const { return Size(cols, rows); }
+inline bool GpuMat::empty() const { return data == 0; }
+
+inline uchar* GpuMat::ptr(int y)
+{
+    CV_DbgAssert( (unsigned)y < (unsigned)rows );
+    return data + step*y;
+}
+
+inline const uchar* GpuMat::ptr(int y) const
+{
+    CV_DbgAssert( (unsigned)y < (unsigned)rows );
+    return data + step*y;
+}
+
+template<typename _Tp> inline _Tp* GpuMat::ptr(int y)
+{
+    CV_DbgAssert( (unsigned)y < (unsigned)rows );
+    return (_Tp*)(data + step*y);
+}
+
+template<typename _Tp> inline const _Tp* GpuMat::ptr(int y) const
+{
+    CV_DbgAssert( (unsigned)y < (unsigned)rows );
+    return (const _Tp*)(data + step*y);
+}
+
+static inline void swap( GpuMat& a, GpuMat& b ) { a.swap(b); }
+
+
+} /* end of namespace gpu */
+
+} /* end of namespace cv */
+
+#endif /* __OPENCV_GPU_MATRIX_OPERATIONS_HPP__ */
\ No newline at end of file
--- a/modules/gpu/include/opencv2/gpu/matpl.hpp
+++ b/modules/gpu/include/opencv2/gpu/matpl.hpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other GpuMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_GPU_MATPL_HPP__
+#define __OPENCV_GPU_MATPL_HPP__
+
+#include "opencv2/core/core.hpp"
+
+namespace cv
+{
+    namespace gpu
+    {   
+
+        //////////////////////////////// MatPL ////////////////////////////////
+
+        //class CV_EXPORTS MatPL : private Mat
+        //{
+        //public:            
+        //    MatPL() {}            
+        //    MatPL(int _rows, int _cols, int _type) : Mat(_rows, _cols, _type) {}
+        //    MatPL(Size _size, int _type) : Mat(_size, _type) {}
+        //    
+        //    Mat(int _rows, int _cols, int _type, const Scalar& _s) : Mat
+        //    MatPL(Size _size, int _type, const Scalar& _s);
+        //    //! copy constructor
+        //    MatPL(const Mat& m);
+        //    //! constructor for matrix headers pointing to user-allocated data
+        //    MatPL(int _rows, int _cols, int _type, void* _data, size_t _step=AUTO_STEP);
+        //    MatPL(Size _size, int _type, void* _data, size_t _step=AUTO_STEP);
+        //    //! creates a matrix header for a part of the bigger matrix
+        //    MatPL(const Mat& m, const Range& rowRange, const Range& colRange);
+        //    MatPL(const Mat& m, const Rect& roi);
+        //    //! converts old-style CvMat to the new matrix; the data is not copied by default
+        //    Mat(const CvMat* m, bool copyData=false);
+        //    MatPL converts old-style IplImage to the new matrix; the data is not copied by default
+        //    MatPL(const IplImage* img, bool copyData=false);
+        //    //! builds matrix from std::vector with or without copying the data
+        //    template<typename _Tp> explicit Mat(const vector<_Tp>& vec, bool copyData=false);
+        //    //! builds matrix from cv::Vec; the data is copied by default
+        //    template<typename _Tp, int n> explicit Mat(const Vec<_Tp, n>& vec,
+        //        bool copyData=true);
+        //    //! builds matrix from cv::Matx; the data is copied by default
+        //    template<typename _Tp, int m, int n> explicit Mat(const Matx<_Tp, m, n>& mtx,
+        //        bool copyData=true);
+        //    //! builds matrix from a 2D point
+        //    template<typename _Tp> explicit Mat(const Point_<_Tp>& pt);
+        //    //! builds matrix from a 3D point
+        //    template<typename _Tp> explicit Mat(const Point3_<_Tp>& pt);
+        //    //! builds matrix from comma initializer
+        //    template<typename _Tp> explicit Mat(const MatCommaInitializer_<_Tp>& commaInitializer);
+        //    //! helper constructor to compile matrix expressions
+        //    Mat(const MatExpr_Base& expr);
+        //    //! destructor - calls release()
+        //    ~Mat();
+        //    //! assignment operators
+        //    Mat& operator = (const Mat& m);
+        //    Mat& operator = (const MatExpr_Base& expr);
+
+        //    operator MatExpr_<Mat, Mat>() const;
+
+        //    //! returns a new matrix header for the specified row
+        //    Mat row(int y) const;
+        //    //! returns a new matrix header for the specified column
+        //    Mat col(int x) const;
+        //    //! ... for the specified row span
+        //    Mat rowRange(int startrow, int endrow) const;
+        //    Mat rowRange(const Range& r) const;
+        //    //! ... for the specified column span
+        //    Mat colRange(int startcol, int endcol) const;
+        //    Mat colRange(const Range& r) const;
+        //    //! ... for the specified diagonal
+        //    // (d=0 - the main diagonal,
+        //    //  >0 - a diagonal from the lower half,
+        //    //  <0 - a diagonal from the upper half)
+        //    Mat diag(int d=0) const;
+        //    //! constructs a square diagonal matrix which main diagonal is vector "d"
+        //    static Mat diag(const Mat& d);
+
+        //    //! returns deep copy of the matrix, i.e. the data is copied
+        //    Mat clone() const;
+        //    //! copies the matrix content to "m".
+        //    // It calls m.create(this->size(), this->type()).
+        //    void copyTo( Mat& m ) const;
+        //    //! copies those matrix elements to "m" that are marked with non-zero mask elements.
+        //    void copyTo( Mat& m, const Mat& mask ) const;
+        //    //! converts matrix to another datatype with optional scalng. See cvConvertScale.
+        //    void convertTo( Mat& m, int rtype, double alpha=1, double beta=0 ) const;
+
+        //    void assignTo( Mat& m, int type=-1 ) const;
+
+        //    //! sets every matrix element to s
+        //    Mat& operator = (const Scalar& s);
+        //    //! sets some of the matrix elements to s, according to the mask
+        //    Mat& setTo(const Scalar& s, const Mat& mask=Mat());
+        //    //! creates alternative matrix header for the same data, with different
+        //    // number of channels and/or different number of rows. see cvReshape.
+        //    Mat reshape(int _cn, int _rows=0) const;
+
+        //    //! matrix transposition by means of matrix expressions
+        //    MatExpr_<MatExpr_Op2_<Mat, double, Mat, MatOp_T_<Mat> >, Mat>
+        //        t() const;
+        //    //! matrix inversion by means of matrix expressions
+        //    MatExpr_<MatExpr_Op2_<Mat, int, Mat, MatOp_Inv_<Mat> >, Mat>
+        //        inv(int method=DECOMP_LU) const;
+        //    MatExpr_<MatExpr_Op4_<Mat, Mat, double, char, Mat, MatOp_MulDiv_<Mat> >, Mat>
+        //        //! per-element matrix multiplication by means of matrix expressions
+        //        mul(const Mat& m, double scale=1) const;
+        //    MatExpr_<MatExpr_Op4_<Mat, Mat, double, char, Mat, MatOp_MulDiv_<Mat> >, Mat>
+        //        mul(const MatExpr_<MatExpr_Op2_<Mat, double, Mat, MatOp_Scale_<Mat> >, Mat>& m, double scale=1) const;
+        //    MatExpr_<MatExpr_Op4_<Mat, Mat, double, char, Mat, MatOp_MulDiv_<Mat> >, Mat>    
+        //        mul(const MatExpr_<MatExpr_Op2_<Mat, double, Mat, MatOp_DivRS_<Mat> >, Mat>& m, double scale=1) const;
+
+        //    //! computes cross-product of 2 3D vectors
+        //    Mat cross(const Mat& m) const;
+        //    //! computes dot-product
+        //    double dot(const Mat& m) const;
+
+        //    //! Matlab-style matrix initialization
+        //    static MatExpr_Initializer zeros(int rows, int cols, int type);
+        //    static MatExpr_Initializer zeros(Size size, int type);
+        //    static MatExpr_Initializer ones(int rows, int cols, int type);
+        //    static MatExpr_Initializer ones(Size size, int type);
+        //    static MatExpr_Initializer eye(int rows, int cols, int type);
+        //    static MatExpr_Initializer eye(Size size, int type);
+
+        //    //! allocates new matrix data unless the matrix already has specified size and type.
+        //    // previous data is unreferenced if needed.
+        //    void create(int _rows, int _cols, int _type);
+        //    void create(Size _size, int _type);
+        //    //! increases the reference counter; use with care to avoid memleaks
+        //    void addref();
+        //    //! decreases reference counter;
+        //    // deallocate the data when reference counter reaches 0.
+        //    void release();
+
+        //    //! locates matrix header within a parent matrix. See below
+        //    void locateROI( Size& wholeSize, Point& ofs ) const;
+        //    //! moves/resizes the current matrix ROI inside the parent matrix.
+        //    Mat& adjustROI( int dtop, int dbottom, int dleft, int dright );
+        //    //! extracts a rectangular sub-matrix
+        //    // (this is a generalized form of row, rowRange etc.)
+        //    Mat operator()( Range rowRange, Range colRange ) const;
+        //    Mat operator()( const Rect& roi ) const;
+
+        //    //! converts header to CvMat; no data is copied
+        //    operator CvMat() const;
+        //    //! converts header to IplImage; no data is copied
+        //    operator IplImage() const;
+
+        //    //! returns true iff the matrix data is continuous
+        //    // (i.e. when there are no gaps between successive rows).
+        //    // similar to CV_IS_MAT_CONT(cvmat->type)
+        //    bool isContinuous() const;
+        //    //! returns element size in bytes,
+        //    // similar to CV_ELEM_SIZE(cvmat->type)
+        //    size_t elemSize() const;
+        //    //! returns the size of element channel in bytes.
+        //    size_t elemSize1() const;
+        //    //! returns element type, similar to CV_MAT_TYPE(cvmat->type)
+        //    int type() const;
+        //    //! returns element type, similar to CV_MAT_DEPTH(cvmat->type)
+        //    int depth() const;
+        //    //! returns element type, similar to CV_MAT_CN(cvmat->type)
+        //    int channels() const;
+        //    //! returns step/elemSize1()
+        //    size_t step1() const;
+        //    //! returns matrix size:
+        //    // width == number of columns, height == number of rows
+        //    Size size() const;
+        //    //! returns true if matrix data is NULL
+        //    bool empty() const;
+
+        //    //! returns pointer to y-th row
+        //    uchar* ptr(int y=0);
+        //    const uchar* ptr(int y=0) const;
+
+        //    //! template version of the above method
+        //    template<typename _Tp> _Tp* ptr(int y=0);
+        //    template<typename _Tp> const _Tp* ptr(int y=0) const;
+
+        //    //! template methods for read-write or read-only element access.
+        //    // note that _Tp must match the actual matrix type -
+        //    // the functions do not do any on-fly type conversion
+        //    template<typename _Tp> _Tp& at(int y, int x);
+        //    template<typename _Tp> _Tp& at(Point pt);
+        //    template<typename _Tp> const _Tp& at(int y, int x) const;
+        //    template<typename _Tp> const _Tp& at(Point pt) const;
+        //    template<typename _Tp> _Tp& at(int i);
+        //    template<typename _Tp> const _Tp& at(int i) const;
+
+        //    //! template methods for iteration over matrix elements.
+        //    // the iterators take care of skipping gaps in the end of rows (if any)
+        //    template<typename _Tp> MatIterator_<_Tp> begin();
+        //    template<typename _Tp> MatIterator_<_Tp> end();
+        //    template<typename _Tp> MatConstIterator_<_Tp> begin() const;
+        //    template<typename _Tp> MatConstIterator_<_Tp> end() const;
+
+        //    enum { MAGIC_VAL=0x42FF0000, AUTO_STEP=0, CONTINUOUS_FLAG=CV_MAT_CONT_FLAG };
+
+        //    /*! includes several bit-fields:
+        //    - the magic signature
+        //    - continuity flag
+        //    - depth
+        //    - number of channels
+        //    */
+        //    int flags;
+        //    //! the number of rows and columns
+        //    int rows, cols;
+        //    //! a distance between successive rows in bytes; includes the gap if any
+        //    size_t step;
+        //    //! pointer to the data
+        //    uchar* data;
+
+        //    //! pointer to the reference counter;
+        //    // when matrix points to user-allocated data, the pointer is NULL
+        //    int* refcount;
+
+        //    //! helper fields used in locateROI and adjustROI
+        //    uchar* datastart;
+        //    uchar* dataend;
+        //};
+    }
+}
+
+
+#endif /* __OPENCV_GPU_MATPL_HPP__ */
\ No newline at end of file
--- a/modules/gpu/src/cudastream.cpp
+++ b/modules/gpu/src/cudastream.cpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+using namespace cv;
+using namespace cv::gpu;
+
+
+cv::gpu::CudaStream::CudaStream() : impl( fastMalloc(sizeof(cudaStream_t)) )
+{
+    cudaSafeCall( cudaStreamCreate((cudaStream_t*)impl) );
+}
+cv::gpu::CudaStream::~CudaStream() 
+{ 
+    cudaSafeCall( cudaStreamDestroy( *(cudaStream_t*)impl ) );
+    cv::fastFree( impl );     
+}
+
+bool cv::gpu::CudaStream::queryIfComplete()
+{
+    cudaError_t err = cudaStreamQuery( *(cudaStream_t*)impl );
+
+    if (err == cudaSuccess)
+        return true;
+
+    if (err == cudaErrorNotReady)
+        return false;
+
+    //cudaErrorInvalidResourceHandle  
+    cudaSafeCall( err );    
+    return true;
+}
+void cv::gpu::CudaStream::waitForCompletion()
+{
+    cudaSafeCall( cudaStreamSynchronize(  *(cudaStream_t*)impl ) );
+}
+
+void cv::gpu::CudaStream::enqueueDownload(const GpuMat& src, Mat& dst)
+{
+//    cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToHost, 
+}
+void cv::gpu::CudaStream::enqueueUpload(const Mat& src, GpuMat& dst)
+{
+    CV_Assert(!"Not implemented");
+}
+void cv::gpu::CudaStream::enqueueCopy(const GpuMat& src, GpuMat& dst)
+{
+    CV_Assert(!"Not implemented");
+}
+
+void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& src, Scalar val)
+{
+    CV_Assert(!"Not implemented");
+}
+
+void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& src, Scalar val, const GpuMat& mask)
+{
+    CV_Assert(!"Not implemented");
+}
+
+void cv::gpu::CudaStream::enqueueConvert(const GpuMat& src, GpuMat& dst, int type)
+{
+    CV_Assert(!"Not implemented");
+}
+
+//struct cudaStream_t& cv::gpu::CudaStream::getStream() { return stream; }
+
+    
--- a/modules/gpu/src/gpumat.cpp
+++ b/modules/gpu/src/gpumat.cpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+using namespace cv;
+using namespace cv::gpu;
+
+//////////////////////////////// GpuMat ////////////////////////////////
+
+void GpuMat::upload(const Mat& m)
+{
+    CV_DbgAssert(!m.empty());
+    create(m.size(), m.type());
+    cudaSafeCall( cudaMemcpy2D(data, step, m.data, m.step, cols * elemSize(), rows, cudaMemcpyHostToDevice) );
+}
+
+void GpuMat::download(cv::Mat& m) const
+{
+    CV_DbgAssert(!this->empty());
+    m.create(size(), type());
+    cudaSafeCall( cudaMemcpy2D(m.data, m.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToHost) );
+}
+
+void GpuMat::copyTo( GpuMat& m ) const
+{
+    CV_DbgAssert(!this->empty());
+    m.create(size(), type());
+    cudaSafeCall( cudaMemcpy2D(m.data, m.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToDevice) );
+    cudaSafeCall( cudaThreadSynchronize() );
+}
+            
+void GpuMat::copyTo( GpuMat& /*m*/, const GpuMat&/* mask */) const
+{    
+    CV_Assert(!"Not implemented");
+}
+ 
+void GpuMat::convertTo( GpuMat& /*m*/, int /*rtype*/, double /*alpha*/, double /*beta*/ ) const
+{
+    CV_Assert(!"Not implemented");
+}
+
+GpuMat& GpuMat::operator = (const Scalar& /*s*/)
+{
+    CV_Assert(!"Not implemented"); 
+    return *this;
+}
+
+GpuMat& GpuMat::setTo(const Scalar& /*s*/, const GpuMat& /*mask*/)
+{
+    CV_Assert(!"Not implemented");    
+    return *this;
+}
+
+
+GpuMat GpuMat::reshape(int new_cn, int new_rows) const
+{
+    GpuMat hdr = *this;
+
+    int cn = channels();
+    if( new_cn == 0 )
+        new_cn = cn;
+
+    int total_width = cols * cn;
+
+    if( (new_cn > total_width || total_width % new_cn != 0) && new_rows == 0 )
+        new_rows = rows * total_width / new_cn;
+
+    if( new_rows != 0 && new_rows != rows )
+    {
+        int total_size = total_width * rows;
+        if( !isContinuous() )
+            CV_Error( CV_BadStep, "The matrix is not continuous, thus its number of rows can not be changed" );
+
+        if( (unsigned)new_rows > (unsigned)total_size )
+            CV_Error( CV_StsOutOfRange, "Bad new number of rows" );
+
+        total_width = total_size / new_rows;
+
+        if( total_width * new_rows != total_size )
+            CV_Error( CV_StsBadArg, "The total number of matrix elements is not divisible by the new number of rows" );
+
+        hdr.rows = new_rows;
+        hdr.step = total_width * elemSize1();
+    }
+
+    int new_width = total_width / new_cn;
+
+    if( new_width * new_cn != total_width )
+        CV_Error( CV_BadNumChannels, "The total width is not divisible by the new number of channels" );
+
+    hdr.cols = new_width;
+    hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn-1) << CV_CN_SHIFT);
+    return hdr;
+}
+
+void GpuMat::create(int _rows, int _cols, int _type)
+{
+    _type &= TYPE_MASK;
+    if( rows == _rows && cols == _cols && type() == _type && data )
+        return;
+    if( data )
+        release();
+    CV_DbgAssert( _rows >= 0 && _cols >= 0 );
+    if( _rows > 0 && _cols > 0 )
+    {
+        flags = Mat::MAGIC_VAL + _type;
+        rows = _rows;
+        cols = _cols;
+
+        size_t esz = elemSize();                
+        
+        void *dev_ptr;
+        cudaSafeCall( cudaMallocPitch(&dev_ptr, &step, esz * cols, rows) );
+
+        if (esz * cols == step)
+            flags |= Mat::CONTINUOUS_FLAG;
+
+        int64 _nettosize = (int64)step*rows;
+        size_t nettosize = (size_t)_nettosize;
+                
+        datastart = data = (uchar*)dev_ptr;
+        dataend = data + nettosize;            
+        
+        refcount = (int*)fastMalloc(sizeof(*refcount));
+        *refcount = 1;
+    }
+}
+
+void GpuMat::release()
+{
+    if( refcount && CV_XADD(refcount, -1) == 1 )
+    {
+        fastFree(refcount);
+        cudaSafeCall( cudaFree(datastart) );        
+    }
+    data = datastart = dataend = 0;
+    step = rows = cols = 0;
+    refcount = 0;
+}
+
+
+
+
+
+
--- a/modules/gpu/src/initialization.cpp
+++ b/modules/gpu/src/initialization.cpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+using namespace cv;
+using namespace cv::gpu;
+
+CV_EXPORTS int cv::gpu::getCudaEnabledDeviceCount()
+{
+    int count;
+    cudaSafeCall( cudaGetDeviceCount( &count ) );
+    return count;
+}
+
+CV_EXPORTS string cv::gpu::getDeviceName(int device)
+{
+    cudaDeviceProp prop;
+    cudaSafeCall( cudaGetDeviceProperties( &prop, device) );
+    return prop.name;
+}
+
+CV_EXPORTS void cv::gpu::setDevice(int device)
+{
+    cudaSafeCall( cudaSetDevice( device ) );
+}
+
+CV_EXPORTS int cv::gpu::getComputeCapability(int device)
+{    
+    cudaDeviceProp prop;
+    cudaSafeCall( cudaGetDeviceProperties( &prop, device) );
+
+    if (prop.major == 2)
+        return CV_GPU_CC_20;
+
+    if (prop.major == 1)
+        switch (prop.minor)
+        {
+        case 0: return CV_GPU_CC_10;
+        case 1: return CV_GPU_CC_11;
+        case 2: return CV_GPU_CC_12;
+        case 3: return CV_GPU_CC_13;        
+        }
+    
+    return -1;
+}
+
+
+CV_EXPORTS int cv::gpu::getNumberOfSMs(int device)
+{
+    cudaDeviceProp prop;
+    cudaSafeCall( cudaGetDeviceProperties( &prop, device ) );
+    return prop.multiProcessorCount;
+}
\ No newline at end of file
--- a/modules/gpu/src/precomp.cpp
+++ b/modules/gpu/src/precomp.cpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                        Intel License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of Intel Corporation may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+/* End of file. */
--- a/modules/gpu/src/precomp.hpp
+++ b/modules/gpu/src/precomp.hpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+#ifndef __OPENCV_PRECOMP_H__
+#define __OPENCV_PRECOMP_H__
+
+#if _MSC_VER >= 1200
+#pragma warning( disable: 4251 4710 4711 4514 4996 )
+#endif
+
+#ifdef HAVE_CONFIG_H 
+#include <cvconfig.h> 
+#endif
+
+#include "opencv2/gpu/gpu.hpp"
+#include "opencv2/gpu/gpumat.hpp"
+
+#include "cuda_shared.hpp"
+
+#include "cuda_runtime.h"
+
+
+#define cudaSafeCall(err)  __cudaSafeCall(err, __FILE__, __LINE__)
+
+//inline void __cudaSafeCall( cudaError err, const char *file, const int line )
+//{
+//    if( cudaSuccess != err) 
+//        CV_Error_(CV_StsAssert, ("%s(%i) : Runtime API error : %s.\n", cudaGetErrorString(err)));
+//}
+
+namespace cv
+{
+    namespace gpu
+    {
+        
+        inline void __cudaSafeCall( cudaError err, const char *file, const int line )
+        {
+            if( cudaSuccess != err) 
+            {
+		        fprintf(stderr, "%s(%i) : cudaSafeCall() Runtime API error : %s.\n", file, line, cudaGetErrorString(err) );
+                exit(-1);
+            }
+        }
+
+        template<class T>
+        inline DevMem2D_<T> getDevMem(const GpuMat& mat) { return DevMem2D_<T>(m.rows, m.cols, m.data, m.step); }
+    }
+}
+
+#endif
--- a/modules/gpu/src/stereobm_gpu.cpp
+++ b/modules/gpu/src/stereobm_gpu.cpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other GpuMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+#include <limits>
+
+using namespace cv;
+using namespace cv::gpu;
+   
+StereoBM_GPU::StereoBM_GPU() : preset(BASIC_PRESET), ndisp(64)  {}
+StereoBM_GPU::StereoBM_GPU(int preset_, int ndisparities_) : preset(preset_), ndisp(ndisparities_) 
+{
+    CV_Assert(ndisp <= std::numeric_limits<unsigned char>::max());
+}
+  
+void StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity) const
+{
+    CV_DbgAssert(left.rows == right.rows && left.cols == right.cols);
+    CV_DbgAssert(left.type() == CV_8UC1);
+    CV_DbgAssert(right.type() == CV_8UC1);
+
+    disparity.create(left.size(), CV_8U);
+    minSSD.create(left.size(), CV_32S);
+
+    if (preset == PREFILTER_XSOBEL)
+    {
+         CV_Assert(!"Not implemented");
+    }   
+
+    DevMem2D disp = disparity;
+    DevMem2D_<uint> mssd = minSSD;    
+    impl::stereoBM_GPU(left, right, disp, ndisp, mssd);     
+}