Merge pull request #16835 from YashasSamaga:cuda4dnn-hotfix-memory-lock

dbb30134 · Alexander Alekhin · 8fe96743 · 034a43e7 · dbb30134 · dbb30134
隐藏空白更改
内联并排

Showing with 22 addition and 3 deletion

modules/dnn/src/cuda4dnn/csl/memory.hpp modules/dnn/src/cuda4dnn/csl/memory.hpp +11 -3

modules/dnn/src/op_cuda.hpp modules/dnn/src/op_cuda.hpp +11 -0

未找到文件。
--- a/modules/dnn/src/cuda4dnn/csl/memory.hpp
+++ b/modules/dnn/src/cuda4dnn/csl/memory.hpp
@@ -276,14 +276,22 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl {

        MemoryLockGuard& operator=(const MemoryLockGuard&) = delete;
        MemoryLockGuard& operator=(MemoryLockGuard&& other) noexcept {
-            ptr = other.ptr;
-            other.ptr = nullptr;
+            if (&other != this) {
+                if(ptr != nullptr) {
+                    /* cudaHostUnregister does not throw for a valid ptr */
+                    CUDA4DNN_CHECK_CUDA(cudaHostUnregister(ptr));
+                }
+                ptr = other.ptr;
+                other.ptr = nullptr;
+            }
            return *this;
        }

        ~MemoryLockGuard() {
-            if(ptr != nullptr)
+            if(ptr != nullptr) {
+                /* cudaHostUnregister does not throw for a valid ptr */
                CUDA4DNN_CHECK_CUDA(cudaHostUnregister(ptr));
+            }
        }

    private:

--- a/modules/dnn/src/op_cuda.hpp
+++ b/modules/dnn/src/op_cuda.hpp
@@ -308,7 +308,18 @@ namespace cv { namespace dnn {

            auto numel = total(shape_);
            if (numel > shared_block->device.size())
+            {
+                /* if the host memory was already page-locked, release it and register again with the new size */
+                shared_block->memGuard = cuda4dnn::csl::MemoryLockGuard();
+                try {
+                    CV_Assert(shared_block->host.type() == CV_32F);
+                    shared_block->memGuard = cuda4dnn::csl::MemoryLockGuard(shared_block->host.data, numel * sizeof(float));
+                } catch (...) {
+                    /* a common reason for failure is that the host system (for example, a Jetson device) does not support it */
+                    /* we ignore the failure as this is just an optimization and not a requirement */
+                }
                shared_block->device.reset(numel);
+            }
        }

        static Ptr<BackendWrapper> create(Mat& m) {