未验证 提交 e1e8bf72 编写于 作者: X xiongkun 提交者: GitHub

make bilinear interpolate stable. (#48644)

* make bilinear interpolate stable.

* fix code
上级 0a2dfa38
...@@ -25,6 +25,8 @@ ...@@ -25,6 +25,8 @@
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/primitive/datamover_primitives.h" #include "paddle/phi/kernels/primitive/datamover_primitives.h"
DECLARE_bool(cudnn_deterministic);
namespace phi { namespace phi {
template <typename T> template <typename T>
...@@ -1034,6 +1036,12 @@ static void Interpolate2DCUDABwd( ...@@ -1034,6 +1036,12 @@ static void Interpolate2DCUDABwd(
#endif #endif
if (optimize_flag & is_nchw) { if (optimize_flag & is_nchw) {
if (FLAGS_cudnn_deterministic) {
VLOG(2)
<< "Run grad kernel of bilinear interpolate 2d with single thread.";
config.block_per_grid = 1;
config.thread_per_block = 1;
}
KeBilinearInterpBwShareMemory<T><<<config.block_per_grid, KeBilinearInterpBwShareMemory<T><<<config.block_per_grid,
config.thread_per_block, config.thread_per_block,
0, 0,
...@@ -1052,21 +1060,27 @@ static void Interpolate2DCUDABwd( ...@@ -1052,21 +1060,27 @@ static void Interpolate2DCUDABwd(
} else if (!optimize_flag & is_nchw) { } else if (!optimize_flag & is_nchw) {
const int num_kernels = n * c * out_h * out_w; const int num_kernels = n * c * out_h * out_w;
const int num_threads = std::min(dev_ctx.GetMaxThreadsPerBlock(), 1024); const int num_threads = std::min(dev_ctx.GetMaxThreadsPerBlock(), 1024);
int block_per_grid = backends::gpu::DivUp(num_kernels, num_threads);
int thread_per_block = num_threads;
if (FLAGS_cudnn_deterministic) {
VLOG(2)
<< "Run grad kernel of bilinear interpolate 2d with single thread.";
block_per_grid = 1;
thread_per_block = 1;
}
KeBilinearInterpNCHWBw<T> KeBilinearInterpNCHWBw<T>
<<<backends::gpu::DivUp(num_kernels, num_threads), <<<block_per_grid, thread_per_block, 0, dev_ctx.stream()>>>(
num_threads, input_grad_data,
0, in_h,
dev_ctx.stream()>>>(input_grad_data, in_w,
in_h, out_h,
in_w, out_w,
out_h, n,
out_w, c,
n, ratio_h,
c, ratio_w,
ratio_h, output_grad_data,
ratio_w, align_type_value);
output_grad_data,
align_type_value);
} else { } else {
int64_t cw = c * out_w; int64_t cw = c * out_w;
auto interp_divmods = funcs::FastDivModForInterpolate(c, out_chw, cw); auto interp_divmods = funcs::FastDivModForInterpolate(c, out_chw, cw);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册