From 65494051201a0c8e521e668b85688f5c1de9bcd9 Mon Sep 17 00:00:00 2001 From: liu zhengxi <380185688@qq.com> Date: Fri, 10 Dec 2021 20:35:05 +0800 Subject: [PATCH] Transfer MultiHeadAttention's matmul to v2 op (#36222) * promote to v2 * alter --- python/paddle/nn/layer/transformer.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/python/paddle/nn/layer/transformer.py b/python/paddle/nn/layer/transformer.py index 36bc836479..b0b6e62a60 100644 --- a/python/paddle/nn/layer/transformer.py +++ b/python/paddle/nn/layer/transformer.py @@ -402,9 +402,8 @@ class MultiHeadAttention(Layer): q, k, v, cache = self._prepare_qkv(query, key, value, cache) # scale dot product attention - # TODO(guosheng): use tensor.matmul, however it doesn't support `alpha` - product = layers.matmul( - x=q, y=k, transpose_y=True, alpha=self.head_dim**-0.5) + product = paddle.matmul( + x=q * (self.head_dim**-0.5), y=k, transpose_y=True) if attn_mask is not None: # Support bool or int mask attn_mask = _convert_attention_mask(attn_mask, product.dtype) -- GitLab