diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py
index c291a4ea1d478d36b769b158961a8455a59c1461..7afca8d77823e64337dfca59a3a6dad9a5baec90 100644
--- a/python/paddle/trainer_config_helpers/networks.py
+++ b/python/paddle/trainer_config_helpers/networks.py
@@ -1529,7 +1529,7 @@ def multi_head_attention(query,
     :param head_num: The number of attention heads.
     :type head_num: int
     :param attention_type: The type of the attention mechanism used in each attention
-                           heads. Now, we only support scaled dot-product attention and ###
+                           heads. Now, we only support scaled dot-product attention and
                            additive attention.
     :type attention_type: basestring
     :return: The context vector.