From 6e6eab07e80d287fb10f6033a01f15650b36fcdb Mon Sep 17 00:00:00 2001 From: zhaoyuchen2018 <45989343+zhaoyuchen2018@users.noreply.github.com> Date: Wed, 23 Oct 2019 04:30:23 -0500 Subject: [PATCH] Fix multihead op bug. (#20783) The op should handle k=1024 test=develop Signed-off-by: zhaoyuchen --- paddle/fluid/operators/multihead_matmul_op.cc | 2 +- paddle/fluid/operators/multihead_matmul_op.cu | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/operators/multihead_matmul_op.cc b/paddle/fluid/operators/multihead_matmul_op.cc index b612be02b4f..fbf372ba6e1 100644 --- a/paddle/fluid/operators/multihead_matmul_op.cc +++ b/paddle/fluid/operators/multihead_matmul_op.cc @@ -134,7 +134,7 @@ MultiHeadMatMul Operator. This op is used for optimize multi head calculation in ernie model. Not suggest to use in other case except has same structure as ernie. -Example of matrix multiplication with head_number of H +Example of matrix multiplication with head_number of B - X: [B, M, K], Y: [B, K, N] => Out: [B, M, N] Both the input `Q` and `K` can carry the LoD (Level of Details) information, diff --git a/paddle/fluid/operators/multihead_matmul_op.cu b/paddle/fluid/operators/multihead_matmul_op.cu index 6e8aa712fbf..b0b34712304 100644 --- a/paddle/fluid/operators/multihead_matmul_op.cu +++ b/paddle/fluid/operators/multihead_matmul_op.cu @@ -331,7 +331,7 @@ void MultiHeadGPUCompute(const platform::CUDADeviceContext &dev_ctx, auto stream = dev_ctx.stream(); int grid = m; - PADDLE_ENFORCE_LT(k, 1024, + PADDLE_ENFORCE_LE(k, 1024, "Input head_number * size_per_head should <= 1024"); int block = k <= 1024 ? k : 1024; add_QKV<<>>(Q, K, V, q_buf, k_buf, v_buf, bias_q, -- GitLab