Revert "make bilinear interpolate stable. (#48644)" (#49307)

This reverts commit e1e8bf72.

Revert "make bilinear interpolate stable. (#48644)" (#49307)
This reverts commit e1e8bf72.
17ec1620 · xiongkun · GitHub · a9533953 · 17ec1620
隐藏空白更改
内联并排

Showing with 14 addition and 28 deletion

paddle/phi/kernels/gpu/interpolate_grad_kernel.cu paddle/phi/kernels/gpu/interpolate_grad_kernel.cu +14 -28

未找到文件。
--- a/paddle/phi/kernels/gpu/interpolate_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/interpolate_grad_kernel.cu
@@ -25,8 +25,6 @@
 #include "paddle/phi/kernels/funcs/math_function.h"
 #include "paddle/phi/kernels/primitive/datamover_primitives.h"

-DECLARE_bool(cudnn_deterministic);
-
 namespace phi {

 template <typename T>
@@ -1039,12 +1037,6 @@ static void Interpolate2DCUDABwd(
 #endif

    if (optimize_flag & is_nchw) {
-      if (FLAGS_cudnn_deterministic) {
-        VLOG(2)
-            << "Run grad kernel of bilinear interpolate 2d with single thread.";
-        config.block_per_grid = 1;
-        config.thread_per_block = 1;
-      }
      KeBilinearInterpBwShareMemory<T><<<config.block_per_grid,
                                         config.thread_per_block,
                                         0,
@@ -1063,27 +1055,21 @@ static void Interpolate2DCUDABwd(
    } else if (!optimize_flag & is_nchw) {
      const int num_kernels = n * c * out_h * out_w;
      const int num_threads = std::min(dev_ctx.GetMaxThreadsPerBlock(), 1024);
-      int block_per_grid = backends::gpu::DivUp(num_kernels, num_threads);
-      int thread_per_block = num_threads;
-      if (FLAGS_cudnn_deterministic) {
-        VLOG(2)
-            << "Run grad kernel of bilinear interpolate 2d with single thread.";
-        block_per_grid = 1;
-        thread_per_block = 1;
-      }
      KeBilinearInterpNCHWBw<T>
-          <<<block_per_grid, thread_per_block, 0, dev_ctx.stream()>>>(
-              input_grad_data,
-              in_h,
-              in_w,
-              out_h,
-              out_w,
-              n,
-              c,
-              ratio_h,
-              ratio_w,
-              output_grad_data,
-              align_type_value);
+          <<<backends::gpu::DivUp(num_kernels, num_threads),
+             num_threads,
+             0,
+             dev_ctx.stream()>>>(input_grad_data,
+                                 in_h,
+                                 in_w,
+                                 out_h,
+                                 out_w,
+                                 n,
+                                 c,
+                                 ratio_h,
+                                 ratio_w,
+                                 output_grad_data,
+                                 align_type_value);
    } else {
      int64_t cw = c * out_w;
      auto interp_divmods = funcs::FastDivModForInterpolate(c, out_chw, cw);