diff --git a/paddle/phi/kernels/funcs/norm_utils.h b/paddle/phi/kernels/funcs/norm_utils.h index 2d0a879e41c783a801021db38711d997f62011b4..5c898549b353ead9856624ff5de556b7e8440c10 100644 --- a/paddle/phi/kernels/funcs/norm_utils.h +++ b/paddle/phi/kernels/funcs/norm_utils.h @@ -18,6 +18,10 @@ limitations under the License. */ namespace phi { namespace funcs { +#define CUDNN_PER_ACTIVATION_THRESHOLD 10240 +#define CUDNN_SPATIAL_THRESHOLD_TRAIN 880801 +#define CUDNN_SPATIAL_THRESHOLD_EVAL 65535 + inline void ExtractNCWHD(const phi::DDim &dims, const DataLayout &data_layout, int *N, diff --git a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu index f65e22ec997fa4d48070ed5ea988220191f73d13..01a7aa0162718882e85c6b6ead336fd1ba11f89b 100644 --- a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu @@ -907,15 +907,12 @@ void BatchNormGradRawKernel(const Context &ctx, #else } // CUDNN only support small batch size - // const size_t CUDNN_PER_ACTIVATION_THRESHOLD = 131070; - const size_t CUDNN_PER_ACTIVATION_THRESHOLD = 10240; - const size_t CUDNN_SPATIAL_THRESHOLD = 880801; bool use_native_nhwc = d_x ? (x_dims.size() == 4 && compute_format == DataLayout::kNHWC) : false; const bool use_native_kernel = ((x_dims.size() == 2 && N >= CUDNN_PER_ACTIVATION_THRESHOLD) || - (x_dims.size() == 3 && N >= CUDNN_SPATIAL_THRESHOLD)); + (x_dims.size() == 3 && N >= CUDNN_SPATIAL_THRESHOLD_TRAIN)); if (use_native_nhwc || (d_x && d_scale && d_bias)) { if (use_native_kernel || use_native_nhwc) { if (x_dims.size() == 2 || use_native_nhwc) { diff --git a/paddle/phi/kernels/gpu/batch_norm_kernel.cu b/paddle/phi/kernels/gpu/batch_norm_kernel.cu index 01e4f08c29bdd5669c5d7094c90f551ce18368a4..60d0d1a01bb301e63f92ddff0157e67cfc186fbd 100644 --- a/paddle/phi/kernels/gpu/batch_norm_kernel.cu +++ b/paddle/phi/kernels/gpu/batch_norm_kernel.cu @@ -722,9 +722,6 @@ void BatchNormKernel(const Context &ctx, auto handle = ctx.cudnn_handle(); - const size_t CUDNN_PER_ACTIVATION_THRESHOLD = 10240; - const size_t CUDNN_SPATIAL_THRESHOLD = 880801; - // Now, depending on whether we are running test or not, we have two paths. // It is training mode when it's not reference AND not using pre-trained // model. @@ -829,7 +826,7 @@ void BatchNormKernel(const Context &ctx, #else const bool use_native_kernel = (x_dims.size() == 2 || - (x_dims.size() == 3 && N >= CUDNN_SPATIAL_THRESHOLD)); + (x_dims.size() == 3 && N >= CUDNN_SPATIAL_THRESHOLD_EVAL)); if (use_native_kernel) { const int block_size = 256; const int grid_size = (N * C * H * W * D + block_size - 1) / block_size; @@ -1005,7 +1002,7 @@ void BatchNormKernel(const Context &ctx, // const size_t CUDNN_PER_ACTIVATION_THRESHOLD = 131070; const bool use_native_kernel = ((x_dims.size() == 2 && N >= CUDNN_PER_ACTIVATION_THRESHOLD) || - (x_dims.size() == 3 && N >= CUDNN_SPATIAL_THRESHOLD)); + (x_dims.size() == 3 && N >= CUDNN_SPATIAL_THRESHOLD_TRAIN)); if (use_native_kernel) { dim3 block; dim3 grid;