Add launch_bounds (#47285)

13181fd9 · Shijie · GitHub · 493fbfd7 · 13181fd9
隐藏空白更改
内联并排

Showing with 13 addition and 11 deletion

paddle/fluid/operators/fused/fused_dropout_act_bias.h paddle/fluid/operators/fused/fused_dropout_act_bias.h +13 -11

未找到文件。
--- a/paddle/fluid/operators/fused/fused_dropout_act_bias.h
+++ b/paddle/fluid/operators/fused/fused_dropout_act_bias.h
@@ -256,17 +256,19 @@ template <typename T,
          int BlockSizeX,
          int BlockSizeY,
          int VecSize,
-          typename Functor>
+          typename Functor,
-__global__ void FusedDropoutActBiasGrad(Functor act_grad,
+          int THREADS_PER_CTA = BlockSizeX *BlockSizeY>
-                                        const T *dout,
+__global__ __launch_bounds__(THREADS_PER_CTA) void FusedDropoutActBiasGrad(
-                                        const MaskType *mask,
+    Functor act_grad,
-                                        const T *src,
+    const T *dout,
-                                        const T *bias,
+    const MaskType *mask,
-                                        const T factor,
+    const T *src,
-                                        const int64_t rows,
+    const T *bias,
-                                        const int64_t cols,
+    const T factor,
-                                        T *dx,
+    const int64_t rows,
-                                        T *dbias) {
+    const int64_t cols,
+    T *dx,
+    T *dbias) {
  int64_t col_id = blockIdx.x * blockDim.x + threadIdx.x;
  using LoadT = phi::AlignedVector<T, VecSize>;