make the asynchronous call to NPP safe

* Stop calling nppSetStream

make the asynchronous call to NPP safe
* Stop calling nppSetStream
6c253510 · Tomoaki Teshima · 2dff9f4c · 6c253510 · 6c253510
隐藏空白更改
内联并排

Showing with 4 addition and 4 deletion

modules/core/include/opencv2/core/private.cuda.hpp modules/core/include/opencv2/core/private.cuda.hpp +4 -2

modules/cudaarithm/src/reductions.cpp modules/cudaarithm/src/reductions.cpp +0 -2

未找到文件。
--- a/modules/core/include/opencv2/core/private.cuda.hpp
+++ b/modules/core/include/opencv2/core/private.cuda.hpp
@@ -108,6 +108,8 @@ static inline void throw_no_cuda() { CV_Error(cv::Error::GpuNotSupported, "The l

 #else // HAVE_CUDA

+#define nppSafeSetStream(oldStream, newStream) { if(oldStream != newStream) { cudaStreamSynchronize(oldStream); nppSetStream(newStream); } }
+
 static inline void throw_no_cuda() { CV_Error(cv::Error::StsNotImplemented, "The called functionality is disabled for current build or platform"); }

 namespace cv { namespace cuda
@@ -139,13 +141,13 @@ namespace cv { namespace cuda
        inline explicit NppStreamHandler(Stream& newStream)
        {
            oldStream = nppGetStream();
-            nppSetStream(StreamAccessor::getStream(newStream));
+            nppSafeSetStream(oldStream, StreamAccessor::getStream(newStream));
        }

        inline explicit NppStreamHandler(cudaStream_t newStream)
        {
            oldStream = nppGetStream();
-            nppSetStream(newStream);
+            nppSafeSetStream(oldStream, newStream);
        }

        inline ~NppStreamHandler()

--- a/modules/cudaarithm/src/reductions.cpp
+++ b/modules/cudaarithm/src/reductions.cpp
@@ -157,8 +157,6 @@ void cv::cuda::meanStdDev(InputArray _src, OutputArray _dst, Stream& stream)
    BufferPool pool(stream);
    GpuMat buf = pool.getBuffer(1, bufSize, CV_8UC1);

-    NppStreamHandler h(StreamAccessor::getStream(stream));
-
    nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), sz, buf.ptr<Npp8u>(), dst.ptr<Npp64f>(), dst.ptr<Npp64f>() + 1) );

    syncOutput(dst, _dst, stream);