Add launch_bounds (#47285)

13181fd9 · Shijie · GitHub · 493fbfd7 · 13181fd9
显示空白变更内容
内联并排

Showing with 13 addition and 11 deletion

paddle/fluid/operators/fused/fused_dropout_act_bias.h paddle/fluid/operators/fused/fused_dropout_act_bias.h +13 -11

未找到文件。
--- a/paddle/fluid/operators/fused/fused_dropout_act_bias.h
+++ b/paddle/fluid/operators/fused/fused_dropout_act_bias.h
@@ -256,8 +256,10 @@ template <typename T,
          int BlockSizeX,
          int BlockSizeY,
          int VecSize,
-          typename Functor>
-__global__ void FusedDropoutActBiasGrad(Functor act_grad,
+          typename Functor,
+          int THREADS_PER_CTA = BlockSizeX *BlockSizeY>
+__global__ __launch_bounds__(THREADS_PER_CTA) void FusedDropoutActBiasGrad(
+    Functor act_grad,
    const T *dout,
    const MaskType *mask,
    const T *src,