implemented asynchronous call for GpuMat::upload() and GpuMat::download()....

implemented asynchronous call for GpuMat::upload() and GpuMat::download(). added test for asynchronous call.

implemented asynchronous call for GpuMat::upload() and GpuMat::download()....
implemented asynchronous call for GpuMat::upload() and GpuMat::download(). added test for asynchronous call.
fff2160d · Andrey Morozov · bb2fe87b · fff2160d · fff2160d · fff2160d
4 changed file
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@@ -107,10 +107,12 @@ namespace cv

            //! pefroms blocking upload data to GpuMat. .
            void upload(const cv::Mat& m);
+            void upload(const cv::Mat& m, CudaStream & stream);

            //! Downloads data from device to host memory. Blocking calls.
            operator Mat() const;
            void download(cv::Mat& m) const;
+            void download(cv::Mat& m, CudaStream & stream) const;

            //! returns a new GpuMatrix header for the specified row
            GpuMat row(int y) const;

--- a/modules/gpu/src/cudastream.cpp
+++ b/modules/gpu/src/cudastream.cpp
@@ -41,6 +41,7 @@
 //M*/

 #include "precomp.hpp"
+#include "cuda_shared.hpp"

 using namespace cv;
 using namespace cv::gpu;
@@ -158,17 +159,37 @@ void cv::gpu::CudaStream::enqueueCopy(const GpuMat& src, GpuMat& dst) { devcopy(

 void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& src, Scalar val)
 {
-    CV_Assert(!"Not implemented");
+    cv::gpu::impl::set_to_without_mask(src, src.depth(), val.val, src.channels(), impl->stream);
 }

 void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& src, Scalar val, const GpuMat& mask)
 {
-    CV_Assert(!"Not implemented");
+    cv::gpu::impl::set_to_with_mask(src, src.depth(), val.val, mask, src.channels(), impl->stream);
 }

-void cv::gpu::CudaStream::enqueueConvert(const GpuMat& src, GpuMat& dst, int type, double a, double b)
+void cv::gpu::CudaStream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype, double alpha, double beta)
 {
-    CV_Assert(!"Not implemented");
+    bool noScale = fabs(alpha-1) < std::numeric_limits<double>::epsilon() && fabs(beta) < std::numeric_limits<double>::epsilon();
+
+    if( rtype < 0 )
+        rtype = src.type();
+    else
+        rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), src.channels());
+
+    int sdepth = src.depth(), ddepth = CV_MAT_DEPTH(rtype);
+    if( sdepth == ddepth && noScale )
+    {
+        src.copyTo(dst);
+        return;
+    }
+
+    GpuMat temp;
+    const GpuMat* psrc = &src;
+    if( sdepth != ddepth && psrc == &dst )
+        psrc = &(temp = src);
+
+    dst.create( src.size(), rtype );
+    cv::gpu::impl::convert_to(*psrc, sdepth, dst, ddepth, psrc->cols * psrc->channels(), psrc->rows, alpha, beta, impl->stream);
 }



--- a/modules/gpu/src/matrix_operations.cpp
+++ b/modules/gpu/src/matrix_operations.cpp
@@ -84,6 +84,12 @@ void cv::gpu::GpuMat::upload(const Mat& m)
    cudaSafeCall( cudaMemcpy2D(data, step, m.data, m.step, cols * elemSize(), rows, cudaMemcpyHostToDevice) );
 }

+void cv::gpu::GpuMat::upload(const cv::Mat& m, CudaStream & stream)
+{
+    CV_DbgAssert(!m.empty());
+    stream.enqueueUpload(m, *this);
+}
+
 void cv::gpu::GpuMat::download(cv::Mat& m) const
 {
    CV_DbgAssert(!this->empty());
@@ -91,6 +97,12 @@ void cv::gpu::GpuMat::download(cv::Mat& m) const
    cudaSafeCall( cudaMemcpy2D(m.data, m.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToHost) );
 }

+void cv::gpu::GpuMat::download(cv::Mat& m, CudaStream & stream) const
+{
+    CV_DbgAssert(!m.empty());
+    stream.enqueueDownload(*this, m);
+}
+
 void cv::gpu::GpuMat::copyTo( GpuMat& m ) const
 {
    CV_DbgAssert(!this->empty());

--- a/tests/gpu/src/operator_async_call.cpp
+++ b/tests/gpu/src/operator_async_call.cpp
+#include "gputest.hpp"
+#include "highgui.h"
+#include "cv.h"
+#include <string>
+#include <iostream>
+#include <fstream>
+#include <iterator>
+#include <limits>
+#include <numeric>
+#include <iomanip> // for  cout << setw()
+
+using namespace cv;
+using namespace std;
+using namespace gpu;
+
+class CV_GpuMatASyncCall : public CvTest
+{
+    public:
+        CV_GpuMatASyncCall();
+        ~CV_GpuMatASyncCall();
+    protected:
+
+        template <typename T>
+        void print_mat(const T & mat, const std::string & name) const;
+
+        void run(int);
+
+        bool compare_matrix(cv::Mat & cpumat, gpu::GpuMat & gpumat);
+
+    private:
+        int rows;
+        int cols;
+};
+
+CV_GpuMatASyncCall::CV_GpuMatASyncCall(): CvTest( "GPU-MatOperatorASyncCall", "async" )
+{
+    rows = 234;
+    cols = 123;
+
+    //#define PRINT_MATRIX
+}
+
+CV_GpuMatASyncCall::~CV_GpuMatASyncCall() {}
+
+template<typename T>
+void CV_GpuMatASyncCall::print_mat(const T & mat, const std::string & name) const
+{
+    cv::imshow(name, mat);
+}
+
+bool CV_GpuMatASyncCall::compare_matrix(cv::Mat & cpumat, gpu::GpuMat & gpumat)
+{
+    Mat cmat(cpumat.size(), cpumat.type(), Scalar::all(0));
+    GpuMat gmat0(cmat);
+    GpuMat gmat1;
+    GpuMat gmat2;
+    GpuMat gmat3;
+
+    //int64 time = getTickCount();
+
+    CudaStream stream;
+    stream.enqueueCopy(gmat0, gmat1);
+    stream.enqueueCopy(gmat0, gmat2);
+    stream.enqueueCopy(gmat0, gmat3);
+    stream.waitForCompletion();
+
+    //int64 time1 = getTickCount();
+
+    gmat1.copyTo(gmat0);
+    gmat2.copyTo(gmat0);
+    gmat3.copyTo(gmat0);
+
+    //int64 time2 = getTickCount();
+
+    //std::cout << "\ntime async: " << std::fixed << std::setprecision(12) << double((time1 - time)  / (double)getTickFrequency());
+    //std::cout << "\ntime  sync: " << std::fixed << std::setprecision(12) << double((time2 - time1) / (double)getTickFrequency());
+    //std::cout << "\n";
+
+#ifdef PRINT_MATRIX
+    print_mat(cmat, "cpu mat");
+    print_mat(gmat0, "gpu mat 0");
+    print_mat(gmat1, "gpu mat 1");
+    print_mat(gmat2, "gpu mat 2");
+    print_mat(gmat3, "gpu mat 3");
+    cv::waitKey(0);
+#endif
+
+    double ret = norm(cmat, gmat0) + norm(cmat, gmat1) + norm(cmat, gmat2) + norm(cmat, gmat3);
+
+    if (ret < 1.0)
+        return true;
+    else
+    {
+        std::cout << "return : " << ret << "\n";
+        return false;
+    }
+}
+
+void CV_GpuMatASyncCall::run( int /* start_from */)
+{
+    bool is_test_good = true;
+
+    Mat cpumat(rows, cols, CV_8U);
+    cpumat.setTo(Scalar::all(127));
+    GpuMat gpumat(cpumat);
+    is_test_good &= compare_matrix(cpumat, gpumat);
+
+    if (is_test_good == true)
+        ts->set_failed_test_info(CvTS::OK);
+    else
+        ts->set_failed_test_info(CvTS::FAIL_GENERIC);
+}
+
+CV_GpuMatASyncCall CV_GpuMatASyncCall_test;