From 13181fd975bccb25d1d1126347872ef68279d567 Mon Sep 17 00:00:00 2001 From: Shijie <505749828@qq.com> Date: Thu, 27 Oct 2022 14:54:41 +0800 Subject: [PATCH] Add launch_bounds (#47285) --- .../operators/fused/fused_dropout_act_bias.h | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/paddle/fluid/operators/fused/fused_dropout_act_bias.h b/paddle/fluid/operators/fused/fused_dropout_act_bias.h index 6b2cdfb6a8..e3e19d9ea6 100644 --- a/paddle/fluid/operators/fused/fused_dropout_act_bias.h +++ b/paddle/fluid/operators/fused/fused_dropout_act_bias.h @@ -256,17 +256,19 @@ template -__global__ void FusedDropoutActBiasGrad(Functor act_grad, - const T *dout, - const MaskType *mask, - const T *src, - const T *bias, - const T factor, - const int64_t rows, - const int64_t cols, - T *dx, - T *dbias) { + typename Functor, + int THREADS_PER_CTA = BlockSizeX *BlockSizeY> +__global__ __launch_bounds__(THREADS_PER_CTA) void FusedDropoutActBiasGrad( + Functor act_grad, + const T *dout, + const MaskType *mask, + const T *src, + const T *bias, + const T factor, + const int64_t rows, + const int64_t cols, + T *dx, + T *dbias) { int64_t col_id = blockIdx.x * blockDim.x + threadIdx.x; using LoadT = phi::AlignedVector; -- GitLab