diff --git a/paddle/fluid/operators/fused/fused_dropout_act_bias.h b/paddle/fluid/operators/fused/fused_dropout_act_bias.h index 6b2cdfb6a8d2f70bcaab2907e1c7ebae37e77ff4..e3e19d9ea6ebcbea48b83a54b0edb817cbec4f8c 100644 --- a/paddle/fluid/operators/fused/fused_dropout_act_bias.h +++ b/paddle/fluid/operators/fused/fused_dropout_act_bias.h @@ -256,17 +256,19 @@ template -__global__ void FusedDropoutActBiasGrad(Functor act_grad, - const T *dout, - const MaskType *mask, - const T *src, - const T *bias, - const T factor, - const int64_t rows, - const int64_t cols, - T *dx, - T *dbias) { + typename Functor, + int THREADS_PER_CTA = BlockSizeX *BlockSizeY> +__global__ __launch_bounds__(THREADS_PER_CTA) void FusedDropoutActBiasGrad( + Functor act_grad, + const T *dout, + const MaskType *mask, + const T *src, + const T *bias, + const T factor, + const int64_t rows, + const int64_t cols, + T *dx, + T *dbias) { int64_t col_id = blockIdx.x * blockDim.x + threadIdx.x; using LoadT = phi::AlignedVector;