From 0294ab41540d84769727b48a58030faacc71ac28 Mon Sep 17 00:00:00 2001 From: zhangkaihuo Date: Fri, 13 Jan 2023 10:31:06 +0800 Subject: [PATCH] Update threshold of bn1d (#49734) --- paddle/phi/kernels/funcs/norm_utils.h | 4 ++++ paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu | 5 +---- paddle/phi/kernels/gpu/batch_norm_kernel.cu | 7 ++----- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/paddle/phi/kernels/funcs/norm_utils.h b/paddle/phi/kernels/funcs/norm_utils.h index 2d0a879e41..5c898549b3 100644 --- a/paddle/phi/kernels/funcs/norm_utils.h +++ b/paddle/phi/kernels/funcs/norm_utils.h @@ -18,6 +18,10 @@ limitations under the License. */ namespace phi { namespace funcs { +#define CUDNN_PER_ACTIVATION_THRESHOLD 10240 +#define CUDNN_SPATIAL_THRESHOLD_TRAIN 880801 +#define CUDNN_SPATIAL_THRESHOLD_EVAL 65535 + inline void ExtractNCWHD(const phi::DDim &dims, const DataLayout &data_layout, int *N, diff --git a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu index f65e22ec99..01a7aa0162 100644 --- a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu @@ -907,15 +907,12 @@ void BatchNormGradRawKernel(const Context &ctx, #else } // CUDNN only support small batch size - // const size_t CUDNN_PER_ACTIVATION_THRESHOLD = 131070; - const size_t CUDNN_PER_ACTIVATION_THRESHOLD = 10240; - const size_t CUDNN_SPATIAL_THRESHOLD = 880801; bool use_native_nhwc = d_x ? (x_dims.size() == 4 && compute_format == DataLayout::kNHWC) : false; const bool use_native_kernel = ((x_dims.size() == 2 && N >= CUDNN_PER_ACTIVATION_THRESHOLD) || - (x_dims.size() == 3 && N >= CUDNN_SPATIAL_THRESHOLD)); + (x_dims.size() == 3 && N >= CUDNN_SPATIAL_THRESHOLD_TRAIN)); if (use_native_nhwc || (d_x && d_scale && d_bias)) { if (use_native_kernel || use_native_nhwc) { if (x_dims.size() == 2 || use_native_nhwc) { diff --git a/paddle/phi/kernels/gpu/batch_norm_kernel.cu b/paddle/phi/kernels/gpu/batch_norm_kernel.cu index 01e4f08c29..60d0d1a01b 100644 --- a/paddle/phi/kernels/gpu/batch_norm_kernel.cu +++ b/paddle/phi/kernels/gpu/batch_norm_kernel.cu @@ -722,9 +722,6 @@ void BatchNormKernel(const Context &ctx, auto handle = ctx.cudnn_handle(); - const size_t CUDNN_PER_ACTIVATION_THRESHOLD = 10240; - const size_t CUDNN_SPATIAL_THRESHOLD = 880801; - // Now, depending on whether we are running test or not, we have two paths. // It is training mode when it's not reference AND not using pre-trained // model. @@ -829,7 +826,7 @@ void BatchNormKernel(const Context &ctx, #else const bool use_native_kernel = (x_dims.size() == 2 || - (x_dims.size() == 3 && N >= CUDNN_SPATIAL_THRESHOLD)); + (x_dims.size() == 3 && N >= CUDNN_SPATIAL_THRESHOLD_EVAL)); if (use_native_kernel) { const int block_size = 256; const int grid_size = (N * C * H * W * D + block_size - 1) / block_size; @@ -1005,7 +1002,7 @@ void BatchNormKernel(const Context &ctx, // const size_t CUDNN_PER_ACTIVATION_THRESHOLD = 131070; const bool use_native_kernel = ((x_dims.size() == 2 && N >= CUDNN_PER_ACTIVATION_THRESHOLD) || - (x_dims.size() == 3 && N >= CUDNN_SPATIAL_THRESHOLD)); + (x_dims.size() == 3 && N >= CUDNN_SPATIAL_THRESHOLD_TRAIN)); if (use_native_kernel) { dim3 block; dim3 grid; -- GitLab