diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py index c291a4ea1d478d36b769b158961a8455a59c1461..7afca8d77823e64337dfca59a3a6dad9a5baec90 100644 --- a/python/paddle/trainer_config_helpers/networks.py +++ b/python/paddle/trainer_config_helpers/networks.py @@ -1529,7 +1529,7 @@ def multi_head_attention(query, :param head_num: The number of attention heads. :type head_num: int :param attention_type: The type of the attention mechanism used in each attention - heads. Now, we only support scaled dot-product attention and ### + heads. Now, we only support scaled dot-product attention and additive attention. :type attention_type: basestring :return: The context vector.