未验证 提交 13181fd9 编写于 作者: S Shijie 提交者: GitHub

Add launch_bounds (#47285)

上级 493fbfd7
......@@ -256,17 +256,19 @@ template <typename T,
int BlockSizeX,
int BlockSizeY,
int VecSize,
typename Functor>
__global__ void FusedDropoutActBiasGrad(Functor act_grad,
const T *dout,
const MaskType *mask,
const T *src,
const T *bias,
const T factor,
const int64_t rows,
const int64_t cols,
T *dx,
T *dbias) {
typename Functor,
int THREADS_PER_CTA = BlockSizeX *BlockSizeY>
__global__ __launch_bounds__(THREADS_PER_CTA) void FusedDropoutActBiasGrad(
Functor act_grad,
const T *dout,
const MaskType *mask,
const T *src,
const T *bias,
const T factor,
const int64_t rows,
const int64_t cols,
T *dx,
T *dbias) {
int64_t col_id = blockIdx.x * blockDim.x + threadIdx.x;
using LoadT = phi::AlignedVector<T, VecSize>;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册