diff --git a/demo/seqToseq/seqToseq_net.py b/demo/seqToseq/seqToseq_net.py
index 8b613de71ade4d05b36bd3a254ca14877d1bc876..08183e1f75d339ff70c0fb5130b94508d8b86344 100644
--- a/demo/seqToseq/seqToseq_net.py
+++ b/demo/seqToseq/seqToseq_net.py
@@ -158,12 +158,15 @@ def gru_encoder_decoder(data_conf,
                                   is_seq=True),
                       StaticInput(input=encoded_proj,
                                   is_seq=True), ]
-        # In generation, decoder predicts a next target word based on
+        # In generation, the decoder predicts a next target word based on
         # the encoded source sequence and the last generated target word.
+
         # The encoded source sequence (encoder's output) must be specified by
-        # StaticInput which is a read-only memory.
-        # Here, GeneratedInputs automatically fetchs the last generated word,
-        # which is initialized by a start mark, such as <s>.
+        # StaticInput, which is a read-only memory.
+        # Embedding of the last generated word is automatically gotten by
+        # GeneratedInputs, which is initialized by a start mark, such as <s>,
+        # and must be included in generation.
+
         trg_embedding = GeneratedInput(
             size=target_dict_dim,
             embedding_name='_target_language_embedding',
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index 1eaf26fdbf5eaeec85ddca7b0364f3ce3e3ec9f8..6e7964c12c171faab16524d9391b0dff0362a774 100644
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -2198,7 +2198,8 @@ def recurrent_group(step, input, reverse=False, name=None):
 
     :type input: LayerOutput|StaticInput|SubsequenceInput|list|tuple
 
-    :param reverse: Reverse is true, rnn will process sequence reversely.
+    :param reverse: If reverse is set true, the recurrent unit will process the
+                    input sequence in a reverse order.
     :type reverse: bool
     :return: Layer output object
     :rtype: LayerOutput
@@ -2372,6 +2373,84 @@ def beam_search(step, input, bos_id, eos_id, beam_size,
                 result_file, dict_file="", id_input=None,
                 max_length=500, name=None,
                 num_results_per_sample=None):
+    """
+    Beam search is a heuristic search algorithm used in sequence generation.
+    It explores a graph by expanding the most promising nodes in a limited set
+    to maintain tractability.
+
+    The example usage is:
+
+    .. code-block:: python
+
+        def rnn_step(input):
+            last_time_step_output = memory(name='rnn', size=512)
+            with mixed_layer(size=512) as simple_rnn:
+                simple_rnn += full_matrix_projection(input)
+                simple_rnn += last_time_step_output
+            return simple_rnn
+
+        beam_gen = beam_search(name="decoder",
+                               step=rnn_step,
+                               input=[StaticInput("encoder_last")],
+                               bos_id=0,
+                               eos_id=1,
+                               beam_size=5,
+                               result_file="./generated_sequences.txt")
+
+    Please see the following demo for more details:
+
+    - machine translation : demo/seqToseq/translation/gen.conf \
+                            demo/seqToseq/seqToseq_net.py
+
+    :param name: Name of the recurrent unit that generates sequences.
+    :type name: base string
+    :param step: A callable function that defines the calculation in a time
+                 step, and it is appled to sequences with arbitrary length by
+                 sharing a same set of weights.
+
+                 You can refer to the first parameter of recurrent_group, or
+                 demo/seqToseq/seqToseq_net.py for more details.
+    :type step: callable
+    :param input: Input data for the recurrent unit
+    :type input: StaticInput|GeneratedInput
+    :param bos_id: Index of the start symbol in the dictionary. The start symbol
+                   is a special token for NLP task, which indicates the
+                   beginning of a sequence. In the generation task, the start
+                   symbol is ensential, since it is used to initialize the RNN
+                   internal state.
+    :type bos_id: int
+    :param eos_id: Index of the end symbol in the dictionary. The end symbol is
+                   a special token for NLP task, which indicates the end of a
+                   sequence. The generation process will stop once the end
+                   symbol is generated, or a pre-defined max iteration number
+                   is exceeded.
+    :type eos_id: int
+    :param beam_size: Beam search for sequence generation is an iterative search
+                      algorithm. To maintain tractability, every iteration only
+                      only stores a predetermined number, called the beam_size,
+                      of the most promising next words. The greater the beam
+                      size, the fewer candidate words are pruned.
+    :type beam_size: int
+    :param result_file: Path of the file to store the generated results.
+    :type result_file: basestring
+    :param dict_file: Path of dictionary. This is an optional parameter.
+                      Every line is a word in the dictionary with
+                      (line number - 1) as the word index.
+                      If this parameter is set to None, or to an empty string,
+                      only word index are printed in the generated results.
+    :type dict_file: basestring
+    :param num_results_per_sample: Number of the generated results per input
+                                  sequence. This number must always be less than
+                                  beam size.
+    :type num_results_per_sample: int
+    :param id_input: Index of the input sequence, and the specified index will
+                     be prited in the gereated results. This an optional
+                     parameter.
+    :type id_input: LayerOutput
+    :return: The seq_text_printer that prints the generated sequence to a file.
+    :rtype: evaluator
+    """
+
     if num_results_per_sample is None:
         num_results_per_sample = beam_size
     if num_results_per_sample > beam_size:
diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py
index b162304b91861acf626ee48a26ca045acf2d283b..1d0a1d52a9f943006713135c19af628beb43d74c 100644
--- a/python/paddle/trainer_config_helpers/networks.py
+++ b/python/paddle/trainer_config_helpers/networks.py
@@ -787,7 +787,7 @@ def simple_attention(encoded_sequence,
                      name=None):
     """
     Calculate and then return a context vector by attention machanism.
-    Size of the context vector equals to size of encoded_sequence.
+    Size of the context vector equals to size of the encoded_sequence.
 
     ..  math::
 
@@ -795,7 +795,7 @@ def simple_attention(encoded_sequence,
 
         e_{i,j} & = a(s_{i-1}, h_{j})
 
-        a_{i,j} & = \\frac{exp(e_{i,i})}{\\sum_{k=1}^{T_{x}{exp(e_{i,k})}}}
+        a_{i,j} & = \\frac{exp(e_{i,j})}{\\sum_{k=1}^{T_x}{exp(e_{i,k})}}
 
         c_{i} & = \\sum_{j=1}^{T_{x}}a_{i,j}h_{j}