[Cherry-pick] [CUDA] [KERNEL] Fix graphics memory leak problem in dnn models. (#3599)

* update cuda demo.

[Cherry-pick] [CUDA] [KERNEL] Fix graphics memory leak problem in dnn models. (#3599)
* update cuda demo.
a2cb26fe · Wilber · GitHub · c54c9f1e · a2cb26fe · a2cb26fe
Showing with 11 addition and 2 deletion

lite/backends/cuda/math/batched_gemm.cc lite/backends/cuda/math/batched_gemm.cc +3 -0

lite/kernels/cuda/search_aligned_mat_mul_compute.h lite/kernels/cuda/search_aligned_mat_mul_compute.h +8 -2

未找到文件。
--- a/lite/backends/cuda/math/batched_gemm.cc
+++ b/lite/backends/cuda/math/batched_gemm.cc
@@ -33,6 +33,9 @@ bool BatchedGemm<float, float>::init(const bool trans_a,
  }
  cu_trans_a_ = trans_a ? CUBLAS_OP_T : CUBLAS_OP_N;
  cu_trans_b_ = trans_b ? CUBLAS_OP_T : CUBLAS_OP_N;
+  if (A_ != nullptr) {
+    cudaFree(A_);
+  }
  cudaMalloc(reinterpret_cast<void **>(&A_),
             3 * max_batch_size * sizeof(float *));
  return true;

--- a/lite/kernels/cuda/search_aligned_mat_mul_compute.h
+++ b/lite/kernels/cuda/search_aligned_mat_mul_compute.h
@@ -13,6 +13,7 @@
 // limitations under the License.

 #pragma once
+#include <limits>
 #include <memory>
 #include "lite/backends/cuda/math/batched_gemm.h"
 #include "lite/core/context.h"
@@ -32,6 +33,7 @@ class SearchAlignedMatMulCompute

  void PrepareForRun() override {
    batched_gemm_impl_.reset(new lite::cuda::math::BatchedGemm<float, float>);
+    last_seq_num_ = std::numeric_limits<int>::min();
  }

  void Run() override {
@@ -75,8 +77,11 @@ class SearchAlignedMatMulCompute
      A_[seq + seq_num * 2] = out_data + seq * out_stride;
    }

-    CHECK(
-        batched_gemm_impl_->init(x_transpose, y_transpose, seq_num, &cuda_ctx));
+    if (seq_num != last_seq_num_) {
+      CHECK(batched_gemm_impl_->init(
+          x_transpose, y_transpose, seq_num, &cuda_ctx));
+      last_seq_num_ = seq_num;
+    }
    batched_gemm_impl_->run(
        alpha, 0.0f, const_cast<const float**>(A_), M, N, K, seq_num);
  }
@@ -86,6 +91,7 @@ class SearchAlignedMatMulCompute
 private:
  std::unique_ptr<lite::cuda::math::BatchedGemm<float, float>>
      batched_gemm_impl_;
+  int last_seq_num_;
 };

 }  // namespace cuda