fix cudnn workspace size problem during inference. (#26021)

test=develop

fix cudnn workspace size problem during inference. (#26021)
test=develop
50f149a4 · Zhaolong Xing · GitHub · 1f74b94d · 50f149a4
隐藏空白更改
内联并排

Showing with 6 addition and 0 deletion

paddle/fluid/operators/fused/conv_fusion_op.cu paddle/fluid/operators/fused/conv_fusion_op.cu +6 -0

未找到文件。
--- a/paddle/fluid/operators/fused/conv_fusion_op.cu
+++ b/paddle/fluid/operators/fused/conv_fusion_op.cu
@@ -216,6 +216,12 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
              perf_results.get()));
      algo = (perf_results.get())[best_algo_idx].algo;
      VLOG(3) << "cuDNN forward algo " << algo;
+      PADDLE_ENFORCE_CUDA_SUCCESS(
+          platform::dynload::cudnnGetConvolutionForwardWorkspaceSize(
+              handle, cudnn_input_desc, cudnn_filter_desc, cudnn_conv_desc,
+              cudnn_output_desc, algo, &workspace_size_in_bytes));
+      if (workspace_size_in_bytes > workspace_size_limit)
+        workspace_size_limit = workspace_size_in_bytes;
    } else {
      std::function<cudnnConvolutionFwdAlgo_t()> search_func =
          [&]() -> cudnnConvolutionFwdAlgo_t {