未验证 提交 13181fd9 编写于 作者: S Shijie 提交者: GitHub

Add launch_bounds (#47285)

上级 493fbfd7
...@@ -256,17 +256,19 @@ template <typename T, ...@@ -256,17 +256,19 @@ template <typename T,
int BlockSizeX, int BlockSizeX,
int BlockSizeY, int BlockSizeY,
int VecSize, int VecSize,
typename Functor> typename Functor,
__global__ void FusedDropoutActBiasGrad(Functor act_grad, int THREADS_PER_CTA = BlockSizeX *BlockSizeY>
const T *dout, __global__ __launch_bounds__(THREADS_PER_CTA) void FusedDropoutActBiasGrad(
const MaskType *mask, Functor act_grad,
const T *src, const T *dout,
const T *bias, const MaskType *mask,
const T factor, const T *src,
const int64_t rows, const T *bias,
const int64_t cols, const T factor,
T *dx, const int64_t rows,
T *dbias) { const int64_t cols,
T *dx,
T *dbias) {
int64_t col_id = blockIdx.x * blockDim.x + threadIdx.x; int64_t col_id = blockIdx.x * blockDim.x + threadIdx.x;
using LoadT = phi::AlignedVector<T, VecSize>; using LoadT = phi::AlignedVector<T, VecSize>;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册