未验证 提交 13181fd9 编写于 作者: S Shijie 提交者: GitHub

Add launch_bounds (#47285)

上级 493fbfd7
...@@ -256,8 +256,10 @@ template <typename T, ...@@ -256,8 +256,10 @@ template <typename T,
int BlockSizeX, int BlockSizeX,
int BlockSizeY, int BlockSizeY,
int VecSize, int VecSize,
typename Functor> typename Functor,
__global__ void FusedDropoutActBiasGrad(Functor act_grad, int THREADS_PER_CTA = BlockSizeX *BlockSizeY>
__global__ __launch_bounds__(THREADS_PER_CTA) void FusedDropoutActBiasGrad(
Functor act_grad,
const T *dout, const T *dout,
const MaskType *mask, const MaskType *mask,
const T *src, const T *src,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册