diff --git a/paddle/phi/kernels/gpu/interpolate_grad_kernel.cu b/paddle/phi/kernels/gpu/interpolate_grad_kernel.cu
index b38cae829680b2e6f7638ec97e13db9dd045bebc..cb1d959e30aa095ba44f6119f4e56ebf016f8f8b 100644
--- a/paddle/phi/kernels/gpu/interpolate_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/interpolate_grad_kernel.cu
@@ -25,6 +25,8 @@
 #include "paddle/phi/kernels/funcs/math_function.h"
 #include "paddle/phi/kernels/primitive/datamover_primitives.h"
 
+DECLARE_bool(cudnn_deterministic);
+
 namespace phi {
 
 template <typename T>
@@ -1034,6 +1036,12 @@ static void Interpolate2DCUDABwd(
 #endif
 
     if (optimize_flag & is_nchw) {
+      if (FLAGS_cudnn_deterministic) {
+        VLOG(2)
+            << "Run grad kernel of bilinear interpolate 2d with single thread.";
+        config.block_per_grid = 1;
+        config.thread_per_block = 1;
+      }
       KeBilinearInterpBwShareMemory<T><<<config.block_per_grid,
                                          config.thread_per_block,
                                          0,
@@ -1052,21 +1060,27 @@ static void Interpolate2DCUDABwd(
     } else if (!optimize_flag & is_nchw) {
       const int num_kernels = n * c * out_h * out_w;
       const int num_threads = std::min(dev_ctx.GetMaxThreadsPerBlock(), 1024);
+      int block_per_grid = backends::gpu::DivUp(num_kernels, num_threads);
+      int thread_per_block = num_threads;
+      if (FLAGS_cudnn_deterministic) {
+        VLOG(2)
+            << "Run grad kernel of bilinear interpolate 2d with single thread.";
+        block_per_grid = 1;
+        thread_per_block = 1;
+      }
       KeBilinearInterpNCHWBw<T>
-          <<<backends::gpu::DivUp(num_kernels, num_threads),
-             num_threads,
-             0,
-             dev_ctx.stream()>>>(input_grad_data,
-                                 in_h,
-                                 in_w,
-                                 out_h,
-                                 out_w,
-                                 n,
-                                 c,
-                                 ratio_h,
-                                 ratio_w,
-                                 output_grad_data,
-                                 align_type_value);
+          <<<block_per_grid, thread_per_block, 0, dev_ctx.stream()>>>(
+              input_grad_data,
+              in_h,
+              in_w,
+              out_h,
+              out_w,
+              n,
+              c,
+              ratio_h,
+              ratio_w,
+              output_grad_data,
+              align_type_value);
     } else {
       int64_t cw = c * out_w;
       auto interp_divmods = funcs::FastDivModForInterpolate(c, out_chw, cw);