diff --git a/paddle/fluid/operators/group_norm_op.cu b/paddle/fluid/operators/group_norm_op.cu index c93910bde5a2ca84b8fa2880aeb8ce17e37e10be..bb8031b0cc4e6cd97afe5f5584d64d58197d6f9d 100644 --- a/paddle/fluid/operators/group_norm_op.cu +++ b/paddle/fluid/operators/group_norm_op.cu @@ -605,8 +605,16 @@ class GroupNormGradKernel int flags = (scale_data != nullptr) * kHasScale + (bias_data != nullptr) * kHasBias; if (data_layout == DataLayout::kNCHW) { + const int max_num_threads = 1024; + int max_block_size = std::min(imsize, max_num_threads); + int block_size_nchw = 1; + while (block_size_nchw < max_block_size) { + block_size_nchw *= 2; + } + block_size_nchw = std::max(block_size_nchw, kps::details::kWarpSize); + dim3 blocks(block_size_nchw); ScalarGetDsDbCUDAKernel< - T><<>>( + T><<>>( imsize, x_data, dy_data, ds_data, db_data); if (d_scale || d_bias) {