提交 3be997cf 编写于 作者: V Vladislav Vinogradov

gpulegacy module fixes

上级 6d735c11
......@@ -4,6 +4,6 @@ endif()
set(the_description "GPU-accelerated Computer Vision (legacy)")
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations -Wuninitialized)
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4130 /wd4324 /wd4512 -Wundef -Wmissing-declarations -Wuninitialized)
ocv_define_module(gpulegacy opencv_core OPTIONAL opencv_objdetect)
......@@ -52,8 +52,8 @@ namespace cv { namespace gpu { namespace cudev
{
namespace pyramid
{
template <typename T> void kernelDownsampleX2_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template <typename T> void kernelInterpolateFrom1_gpu(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
CV_EXPORTS void downsampleX2(PtrStepSzb src, PtrStepSzb dst, int depth, int cn, cudaStream_t stream);
CV_EXPORTS void interpolateFrom1(PtrStepSzb src, PtrStepSzb dst, int depth, int cn, cudaStream_t stream);
}
}}}
......
......@@ -66,6 +66,7 @@
#ifdef HAVE_OPENCV_OBJDETECT
# include "opencv2/objdetect.hpp"
# include "opencv2/objdetect/objdetect_c.h"
#endif
#include "opencv2/gpulegacy/NCV.hpp"
......@@ -2130,7 +2131,7 @@ static NCVStatus loadFromXML(const cv::String &filename,
haar.ClassifierSize.height = 0;
haar.bHasStumpsOnly = true;
haar.bNeedsTiltedII = false;
Ncv32u curMaxTreeDepth;
Ncv32u curMaxTreeDepth = 0;
std::vector<HaarClassifierNode128> h_TmpClassifierNotRootNodes;
haarStages.resize(0);
......
......@@ -223,17 +223,25 @@ namespace cv { namespace gpu { namespace cudev
cudaSafeCall( cudaDeviceSynchronize() );
}
template void kernelDownsampleX2_gpu<uchar1>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void kernelDownsampleX2_gpu<uchar3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void kernelDownsampleX2_gpu<uchar4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void kernelDownsampleX2_gpu<ushort1>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void kernelDownsampleX2_gpu<ushort3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void kernelDownsampleX2_gpu<ushort4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
void downsampleX2(PtrStepSzb src, PtrStepSzb dst, int depth, int cn, cudaStream_t stream)
{
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void kernelDownsampleX2_gpu<float1>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void kernelDownsampleX2_gpu<float3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void kernelDownsampleX2_gpu<float4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
static const func_t funcs[6][4] =
{
{kernelDownsampleX2_gpu<uchar1> , 0 /*kernelDownsampleX2_gpu<uchar2>*/ , kernelDownsampleX2_gpu<uchar3> , kernelDownsampleX2_gpu<uchar4> },
{0 /*kernelDownsampleX2_gpu<char1>*/ , 0 /*kernelDownsampleX2_gpu<char2>*/ , 0 /*kernelDownsampleX2_gpu<char3>*/ , 0 /*kernelDownsampleX2_gpu<char4>*/ },
{kernelDownsampleX2_gpu<ushort1> , 0 /*kernelDownsampleX2_gpu<ushort2>*/, kernelDownsampleX2_gpu<ushort3> , kernelDownsampleX2_gpu<ushort4> },
{0 /*kernelDownsampleX2_gpu<short1>*/ , 0 /*kernelDownsampleX2_gpu<short2>*/ , 0 /*kernelDownsampleX2_gpu<short3>*/, 0 /*kernelDownsampleX2_gpu<short4>*/},
{0 /*kernelDownsampleX2_gpu<int1>*/ , 0 /*kernelDownsampleX2_gpu<int2>*/ , 0 /*kernelDownsampleX2_gpu<int3>*/ , 0 /*kernelDownsampleX2_gpu<int4>*/ },
{kernelDownsampleX2_gpu<float1> , 0 /*kernelDownsampleX2_gpu<float2>*/ , kernelDownsampleX2_gpu<float3> , kernelDownsampleX2_gpu<float4> }
};
const func_t func = funcs[depth][cn - 1];
CV_Assert(func != 0);
func(src, dst, stream);
}
}
}}}
......@@ -298,17 +306,25 @@ namespace cv { namespace gpu { namespace cudev
cudaSafeCall( cudaDeviceSynchronize() );
}
template void kernelInterpolateFrom1_gpu<uchar1>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void kernelInterpolateFrom1_gpu<uchar3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void kernelInterpolateFrom1_gpu<uchar4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void kernelInterpolateFrom1_gpu<ushort1>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void kernelInterpolateFrom1_gpu<ushort3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void kernelInterpolateFrom1_gpu<ushort4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
void interpolateFrom1(PtrStepSzb src, PtrStepSzb dst, int depth, int cn, cudaStream_t stream)
{
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void kernelInterpolateFrom1_gpu<float1>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void kernelInterpolateFrom1_gpu<float3>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
template void kernelInterpolateFrom1_gpu<float4>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
static const func_t funcs[6][4] =
{
{kernelInterpolateFrom1_gpu<uchar1> , 0 /*kernelInterpolateFrom1_gpu<uchar2>*/ , kernelInterpolateFrom1_gpu<uchar3> , kernelInterpolateFrom1_gpu<uchar4> },
{0 /*kernelInterpolateFrom1_gpu<char1>*/ , 0 /*kernelInterpolateFrom1_gpu<char2>*/ , 0 /*kernelInterpolateFrom1_gpu<char3>*/ , 0 /*kernelInterpolateFrom1_gpu<char4>*/ },
{kernelInterpolateFrom1_gpu<ushort1> , 0 /*kernelInterpolateFrom1_gpu<ushort2>*/, kernelInterpolateFrom1_gpu<ushort3> , kernelInterpolateFrom1_gpu<ushort4> },
{0 /*kernelInterpolateFrom1_gpu<short1>*/, 0 /*kernelInterpolateFrom1_gpu<short2>*/ , 0 /*kernelInterpolateFrom1_gpu<short3>*/, 0 /*kernelInterpolateFrom1_gpu<short4>*/},
{0 /*kernelInterpolateFrom1_gpu<int1>*/ , 0 /*kernelInterpolateFrom1_gpu<int2>*/ , 0 /*kernelInterpolateFrom1_gpu<int3>*/ , 0 /*kernelInterpolateFrom1_gpu<int4>*/ },
{kernelInterpolateFrom1_gpu<float1> , 0 /*kernelInterpolateFrom1_gpu<float2>*/ , kernelInterpolateFrom1_gpu<float3> , kernelInterpolateFrom1_gpu<float4> }
};
const func_t func = funcs[depth][cn - 1];
CV_Assert(func != 0);
func(src, dst, stream);
}
}
}}}
......
......@@ -349,7 +349,7 @@ bool nvidia_NPPST_Resize(const std::string& test_data_path, OutputLevel outputLe
NCVAutoTestLister testListerResize("NPPST Resize", outputLevel);
NCVTestSourceProvider<Ncv32u> testSrcRandom_32u(2010, 0, 0xFFFFFFFF, 2048, 2048);
NCVTestSourceProvider<Ncv64u> testSrcRandom_64u(2010, 0, -1, 2048, 2048);
NCVTestSourceProvider<Ncv64u> testSrcRandom_64u(2010, 0, (Ncv64u) -1, 2048, 2048);
generateResizeTests(testListerResize, testSrcRandom_32u);
generateResizeTests(testListerResize, testSrcRandom_64u);
......@@ -379,7 +379,7 @@ bool nvidia_NPPST_Transpose(const std::string& test_data_path, OutputLevel outpu
NCVAutoTestLister testListerTranspose("NPPST Transpose", outputLevel);
NCVTestSourceProvider<Ncv32u> testSrcRandom_32u(2010, 0, 0xFFFFFFFF, 2048, 2048);
NCVTestSourceProvider<Ncv64u> testSrcRandom_64u(2010, 0, -1, 2048, 2048);
NCVTestSourceProvider<Ncv64u> testSrcRandom_64u(2010, 0, (Ncv64u) -1, 2048, 2048);
generateTransposeTests(testListerTranspose, testSrcRandom_32u);
generateTransposeTests(testListerTranspose, testSrcRandom_64u);
......
......@@ -140,25 +140,8 @@ void cv::gpu::ImagePyramid::build(const GpuMat& img, int numLayers, Stream& stre
(void) stream;
throw_no_cuda();
#else
using namespace cv::gpu::cudev::pyramid;
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
static const func_t funcs[6][4] =
{
{kernelDownsampleX2_gpu<uchar1> , 0 /*kernelDownsampleX2_gpu<uchar2>*/ , kernelDownsampleX2_gpu<uchar3> , kernelDownsampleX2_gpu<uchar4> },
{0 /*kernelDownsampleX2_gpu<char1>*/ , 0 /*kernelDownsampleX2_gpu<char2>*/ , 0 /*kernelDownsampleX2_gpu<char3>*/ , 0 /*kernelDownsampleX2_gpu<char4>*/ },
{kernelDownsampleX2_gpu<ushort1> , 0 /*kernelDownsampleX2_gpu<ushort2>*/, kernelDownsampleX2_gpu<ushort3> , kernelDownsampleX2_gpu<ushort4> },
{0 /*kernelDownsampleX2_gpu<short1>*/ , 0 /*kernelDownsampleX2_gpu<short2>*/ , 0 /*kernelDownsampleX2_gpu<short3>*/, 0 /*kernelDownsampleX2_gpu<short4>*/},
{0 /*kernelDownsampleX2_gpu<int1>*/ , 0 /*kernelDownsampleX2_gpu<int2>*/ , 0 /*kernelDownsampleX2_gpu<int3>*/ , 0 /*kernelDownsampleX2_gpu<int4>*/ },
{kernelDownsampleX2_gpu<float1> , 0 /*kernelDownsampleX2_gpu<float2>*/ , kernelDownsampleX2_gpu<float3> , kernelDownsampleX2_gpu<float4> }
};
CV_Assert(img.depth() <= CV_32F && img.channels() <= 4);
const func_t func = funcs[img.depth()][img.channels() - 1];
CV_Assert(func != 0);
layer0_ = img;
Size szLastLayer = img.size();
nLayers_ = 1;
......@@ -180,7 +163,7 @@ void cv::gpu::ImagePyramid::build(const GpuMat& img, int numLayers, Stream& stre
const GpuMat& prevLayer = i == 0 ? layer0_ : pyramid_[i - 1];
func(prevLayer, pyramid_[i], StreamAccessor::getStream(stream));
cudev::pyramid::downsampleX2(prevLayer, pyramid_[i], img.depth(), img.channels(), StreamAccessor::getStream(stream));
szLastLayer = szCurLayer;
}
......@@ -195,27 +178,10 @@ void cv::gpu::ImagePyramid::getLayer(GpuMat& outImg, Size outRoi, Stream& stream
(void) stream;
throw_no_cuda();
#else
using namespace cv::gpu::cudev::pyramid;
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
static const func_t funcs[6][4] =
{
{kernelInterpolateFrom1_gpu<uchar1> , 0 /*kernelInterpolateFrom1_gpu<uchar2>*/ , kernelInterpolateFrom1_gpu<uchar3> , kernelInterpolateFrom1_gpu<uchar4> },
{0 /*kernelInterpolateFrom1_gpu<char1>*/ , 0 /*kernelInterpolateFrom1_gpu<char2>*/ , 0 /*kernelInterpolateFrom1_gpu<char3>*/ , 0 /*kernelInterpolateFrom1_gpu<char4>*/ },
{kernelInterpolateFrom1_gpu<ushort1> , 0 /*kernelInterpolateFrom1_gpu<ushort2>*/, kernelInterpolateFrom1_gpu<ushort3> , kernelInterpolateFrom1_gpu<ushort4> },
{0 /*kernelInterpolateFrom1_gpu<short1>*/, 0 /*kernelInterpolateFrom1_gpu<short2>*/ , 0 /*kernelInterpolateFrom1_gpu<short3>*/, 0 /*kernelInterpolateFrom1_gpu<short4>*/},
{0 /*kernelInterpolateFrom1_gpu<int1>*/ , 0 /*kernelInterpolateFrom1_gpu<int2>*/ , 0 /*kernelInterpolateFrom1_gpu<int3>*/ , 0 /*kernelInterpolateFrom1_gpu<int4>*/ },
{kernelInterpolateFrom1_gpu<float1> , 0 /*kernelInterpolateFrom1_gpu<float2>*/ , kernelInterpolateFrom1_gpu<float3> , kernelInterpolateFrom1_gpu<float4> }
};
CV_Assert(outRoi.width <= layer0_.cols && outRoi.height <= layer0_.rows && outRoi.width > 0 && outRoi.height > 0);
ensureSizeIsEnough(outRoi, layer0_.type(), outImg);
const func_t func = funcs[outImg.depth()][outImg.channels() - 1];
CV_Assert(func != 0);
if (outRoi.width == layer0_.cols && outRoi.height == layer0_.rows)
{
if (stream)
......@@ -249,7 +215,7 @@ void cv::gpu::ImagePyramid::getLayer(GpuMat& outImg, Size outRoi, Stream& stream
lastLayer = curLayer;
}
func(lastLayer, outImg, StreamAccessor::getStream(stream));
cudev::pyramid::interpolateFrom1(lastLayer, outImg, outImg.depth(), outImg.channels(), StreamAccessor::getStream(stream));
#endif
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册