diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index fdb6f83f2ba510232714fb8a9c7c1af837a753ff..21eba71527e60833e0c69b344ecc639626faa529 100755
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -3173,11 +3173,11 @@ def memory(name,
 
 
 @wrap_bias_attr_default()
-@wrap_act_default(
-    param_names=['gate_act', 'state_act'], act=SigmoidActivation())
+@wrap_act_default(param_names=['gate_act'], act=SigmoidActivation())
+@wrap_act_default(param_names=['state_act'], act=TanhActivation())
 @wrap_act_default(act=TanhActivation())
 @wrap_name_default('lstm_step')
-@layer_support()
+@layer_support(ERROR_CLIPPING, DROPOUT)
 def lstm_step_layer(input,
                     state,
                     size=None,
@@ -3531,12 +3531,7 @@ def SubsequenceInput(input):
 
 
 @wrap_name_default("recurrent_group")
-def recurrent_group(step,
-                    input,
-                    reverse=False,
-                    name=None,
-                    targetInlink=None,
-                    is_generating=False):
+def recurrent_group(step, input, reverse=False, name=None, targetInlink=None):
     """
     Recurrent layer group is an extremely flexible recurrent unit in
     PaddlePaddle. As long as the user defines the calculation done within a
@@ -3602,21 +3597,12 @@ def recurrent_group(step,
 
     :type targetInlink: LayerOutput|SubsequenceInput
 
-    :param is_generating: If is generating, none of input type should be LayerOutput;
-                          else, for training or testing, one of the input type must
-                          be LayerOutput.
-
-    :type is_generating: bool
-
     :return: LayerOutput object.
     :rtype: LayerOutput
     """
     model_type('recurrent_nn')
 
-    def is_single_input(x):
-        return isinstance(x, LayerOutput) or isinstance(x, StaticInput)
-
-    if is_single_input(input):
+    if isinstance(input, LayerOutput) or isinstance(input, StaticInput):
         input = [input]
     assert isinstance(input, collections.Sequence)
 
@@ -3630,13 +3616,8 @@ def recurrent_group(step,
         in_links=map(lambda x: x.name, in_links),
         seq_reversed=reverse)
     in_args = []
-    has_LayerOutput = False
     for each_input in input:
-        assert is_single_input(each_input)
-        if isinstance(each_input, LayerOutput):
-            in_args.append(each_input)
-            has_LayerOutput = True
-        else:  # StaticInput
+        if isinstance(each_input, StaticInput):  # StaticInput
             mem_name = "__%s_memory__" % each_input.input.name
             mem = memory(
                 name=None,
@@ -3644,24 +3625,26 @@ def recurrent_group(step,
                 boot_layer=each_input.input)
             mem.set_input(mem)
             in_args.append(mem)
-
-    assert (is_generating != has_LayerOutput)
+        else:
+            in_args.append(each_input)
 
     layer_outs = step(*in_args)
 
     if isinstance(layer_outs, LayerOutput):
         layer_outs = [layer_outs]
 
-    for ot in layer_outs:
-        assert isinstance(ot, LayerOutput)
-        ot.reverse = reverse
-        RecurrentLayerGroupSetOutLink(ot.name)
+    for layer_out in layer_outs:
+        assert isinstance(
+            layer_out, LayerOutput
+        ), "Type of step function's return value must be LayerOutput."
+        layer_out.reverse = reverse
+        RecurrentLayerGroupSetOutLink(layer_out.name)
 
     RecurrentLayerGroupEnd(name=name)
 
     for layer_out in layer_outs:
-        # Thee previous full_name is the name is the rnn group
-        # We need a full_name outside the rnn group
+        # The previous full_name is the name inside the recurrent group.
+        # We need a full_name outside the recurrent group.
         layer_out.full_name = MakeLayerNameInSubmodel(layer_out.name)
 
     if len(layer_outs) == 1:
@@ -3684,7 +3667,20 @@ class BaseGeneratedInput(object):
 
 class GeneratedInput(BaseGeneratedInput):
     def after_real_step(self, input):
-        return maxid_layer(input=input, name='__beam_search_predict__')
+        if isinstance(input, LayerOutput):
+            input = [input]
+        elif isinstance(input, collections.Sequence):
+            input = list(input)
+            if len(input) > 1:
+                logger.info(
+                    ("More than one layers inside the recurrent_group "
+                     "are returned as outputs of the entire recurrent_group "
+                     "PLEASE garantee the first output is probability of "
+                     "the predicted next word."))
+
+        return [maxid_layer(
+            input=input[0], name='__beam_search_predict__')] + (
+                input[1:] if len(input) > 1 else [])
 
     def before_real_step(self):
         predict_id = memory(
@@ -3871,6 +3867,7 @@ def beam_search(step,
     :type step: callable
     :param input: Input data for the recurrent unit, which should include the
                   previously generated words as a GeneratedInput object.
+                  In beam_search, none of the input's type should be LayerOutput.
     :type input: list
     :param bos_id: Index of the start symbol in the dictionary. The start symbol
                    is a special token for NLP task, which indicates the
@@ -3912,15 +3909,18 @@ def beam_search(step,
 
     real_input = []
     for i, each_input in enumerate(input):
-        assert isinstance(each_input, StaticInput) or isinstance(
-            each_input, BaseGeneratedInput)
+        assert not isinstance(each_input, LayerOutput), (
+            "in beam_search, "
+            "none of the input should has a type of LayerOutput.")
         if isinstance(each_input, BaseGeneratedInput):
-            assert generated_input_index == -1
+            assert generated_input_index == -1, ("recurrent_group accepts "
+                                                 "only one GeneratedInput.")
             generated_input_index = i
+
         else:
             real_input.append(each_input)
 
-    assert generated_input_index != -1
+    assert generated_input_index != -1, "No GeneratedInput is given."
 
     gipt = input[generated_input_index]
 
@@ -3941,17 +3941,11 @@ def beam_search(step,
 
         predict = gipt.after_real_step(step(*args))
 
-        eos_layer(input=predict, eos_id=eos_id, name=eos_name)
+        eos_layer(input=predict[0], eos_id=eos_id, name=eos_name)
         return predict
 
-    tmp = recurrent_group(
-        step=__real_step__,
-        input=real_input,
-        reverse=False,
-        name=name,
-        is_generating=True)
-
-    return tmp
+    return recurrent_group(
+        step=__real_step__, input=real_input, reverse=False, name=name)
 
 
 def __cost_input__(input, label, weight=None):
diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py
index 810bea913ec79b2df0eb63ed5a4fd411549ff2e9..dcc4fec4f3313f2ad10073dcecbc015be4021abd 100755
--- a/python/paddle/trainer_config_helpers/networks.py
+++ b/python/paddle/trainer_config_helpers/networks.py
@@ -614,18 +614,17 @@ def simple_lstm(input,
 
 @wrap_name_default('lstm_unit')
 def lstmemory_unit(input,
-                   memory_boot=None,
+                   out_memory=None,
                    name=None,
                    size=None,
                    param_attr=None,
                    act=None,
                    gate_act=None,
                    state_act=None,
-                   mixed_bias_attr=None,
+                   input_proj_bias_attr=None,
+                   input_proj_layer_attr=None,
                    lstm_bias_attr=None,
-                   mixed_layer_attr=None,
-                   lstm_layer_attr=None,
-                   get_output_layer_attr=None):
+                   lstm_layer_attr=None):
     """
     Define calculations that a LSTM unit performs during a single time step.
     This function itself is not a recurrent layer, so it can not be
@@ -662,8 +661,8 @@ def lstmemory_unit(input,
 
     :param input: input layer name.
     :type input: LayerOutput
-    :param memory_boot: the initialization state of the LSTM cell.
-    :type memory_boot: LayerOutput | None
+    :param out_memory: output of previous time step
+    :type out_memory: LayerOutput | None
     :param name: lstmemory unit name.
     :type name: basestring
     :param size: lstmemory unit size.
@@ -676,33 +675,35 @@ def lstmemory_unit(input,
     :type gate_act: BaseActivation
     :param state_act: lstm state activiation type.
     :type state_act: BaseActivation
-    :param mixed_bias_attr: bias parameter attribute of mixed layer.
-                            False means no bias, None means default bias.
-    :type mixed_bias_attr: ParameterAttribute|False
+    :param input_proj_bias_attr: bias attribute for input-to-hidden projection.
+                False means no bias, None means default bias.
+    :type input_proj_bias_attr: ParameterAttribute|False|None
+    :param input_proj_layer_attr: extra layer attribute for input to hidden
+                projection of the LSTM unit, such as dropout, error clipping.
+    :type input_proj_layer_attr: ExtraLayerAttribute
     :param lstm_bias_attr: bias parameter attribute of lstm layer.
-                           False means no bias, None means default bias.
+                False means no bias, None means default bias.
     :type lstm_bias_attr: ParameterAttribute|False
-    :param mixed_layer_attr: mixed layer's extra attribute.
-    :type mixed_layer_attr: ExtraLayerAttribute
     :param lstm_layer_attr: lstm layer's extra attribute.
     :type lstm_layer_attr: ExtraLayerAttribute
-    :param get_output_layer_attr: get output layer's extra attribute.
-    :type get_output_layer_attr: ExtraLayerAttribute
     :return: lstmemory unit name.
     :rtype: LayerOutput
     """
     if size is None:
         assert input.size % 4 == 0
         size = input.size / 4
-    out_mem = memory(name=name, size=size)
-    state_mem = memory(
-        name="%s_state" % name, size=size, boot_layer=memory_boot)
+    if out_memory is None:
+        out_mem = memory(name=name, size=size)
+    else:
+        out_mem = out_memory
+
+    state_mem = memory(name="%s_state" % name, size=size)
 
     with mixed_layer(
             name="%s_input_recurrent" % name,
             size=size * 4,
-            bias_attr=mixed_bias_attr,
-            layer_attr=mixed_layer_attr,
+            bias_attr=input_proj_bias_attr,
+            layer_attr=input_proj_layer_attr,
             act=IdentityActivation()) as m:
         m += identity_projection(input=input)
         m += full_matrix_projection(input=out_mem, param_attr=param_attr)
@@ -717,11 +718,7 @@ def lstmemory_unit(input,
         gate_act=gate_act,
         state_act=state_act,
         layer_attr=lstm_layer_attr)
-    get_output_layer(
-        name='%s_state' % name,
-        input=lstm_out,
-        arg_name='state',
-        layer_attr=get_output_layer_attr)
+    get_output_layer(name='%s_state' % name, input=lstm_out, arg_name='state')
 
     return lstm_out
 
@@ -730,17 +727,16 @@ def lstmemory_unit(input,
 def lstmemory_group(input,
                     size=None,
                     name=None,
-                    memory_boot=None,
+                    out_memory=None,
                     reverse=False,
                     param_attr=None,
                     act=None,
                     gate_act=None,
                     state_act=None,
-                    mixed_bias_attr=None,
+                    input_proj_bias_attr=None,
+                    input_proj_layer_attr=None,
                     lstm_bias_attr=None,
-                    mixed_layer_attr=None,
-                    lstm_layer_attr=None,
-                    get_output_layer_attr=None):
+                    lstm_layer_attr=None):
     """
     lstm_group is a recurrent_group version of Long Short Term Memory. It
     does exactly the same calculation as the lstmemory layer (see lstmemory in
@@ -774,8 +770,8 @@ def lstmemory_group(input,
     :type size: int
     :param name: name of the lstmemory group.
     :type name: basestring
-    :param memory_boot: the initialization state of LSTM cell.
-    :type memory_boot: LayerOutput | None
+    :param out_memory: output of previous time step
+    :type out_memory: LayerOutput | None
     :param reverse: is lstm reversed
     :type reverse: bool
     :param param_attr: Parameter config, None if use default.
@@ -786,18 +782,17 @@ def lstmemory_group(input,
     :type gate_act: BaseActivation
     :param state_act: lstm state activiation type.
     :type state_act: BaseActivation
-    :param mixed_bias_attr: bias parameter attribute of mixed layer.
-                            False means no bias, None means default bias.
-    :type mixed_bias_attr: ParameterAttribute|False
     :param lstm_bias_attr: bias parameter attribute of lstm layer.
                            False means no bias, None means default bias.
     :type lstm_bias_attr: ParameterAttribute|False
-    :param mixed_layer_attr: mixed layer's extra attribute.
-    :type mixed_layer_attr: ExtraLayerAttribute
+    :param input_proj_bias_attr: bias attribute for input-to-hidden projection.
+                False means no bias, None means default bias.
+    :type input_proj_bias_attr: ParameterAttribute|False|None
+    :param input_proj_layer_attr: extra layer attribute for input to hidden
+                projection of the LSTM unit, such as dropout, error clipping.
+    :type input_proj_layer_attr: ExtraLayerAttribute
     :param lstm_layer_attr: lstm layer's extra attribute.
     :type lstm_layer_attr: ExtraLayerAttribute
-    :param get_output_layer_attr: get output layer's extra attribute.
-    :type get_output_layer_attr: ExtraLayerAttribute
     :return: the lstmemory group.
     :rtype: LayerOutput
     """
@@ -805,18 +800,17 @@ def lstmemory_group(input,
     def __lstm_step__(ipt):
         return lstmemory_unit(
             input=ipt,
-            memory_boot=memory_boot,
             name=name,
             size=size,
-            mixed_bias_attr=mixed_bias_attr,
-            mixed_layer_attr=mixed_layer_attr,
-            param_attr=param_attr,
-            lstm_bias_attr=lstm_bias_attr,
             act=act,
             gate_act=gate_act,
             state_act=state_act,
+            out_memory=out_memory,
+            input_proj_bias_attr=input_proj_bias_attr,
+            input_proj_layer_attr=input_proj_layer_attr,
+            param_attr=param_attr,
             lstm_layer_attr=lstm_layer_attr,
-            get_output_layer_attr=get_output_layer_attr)
+            lstm_bias_attr=lstm_bias_attr)
 
     return recurrent_group(
         name='%s_recurrent_group' % name,
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr
index 7f2aa5a0fea1f4628e4effca5ce9af896f6e6c2c..75cf2312032e187dafc66199e933d3ad0fa33050 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr
@@ -104,7 +104,7 @@ layers {
   }
   bias_parameter_name: "lstm_bias"
   active_gate_type: "sigmoid"
-  active_state_type: "sigmoid"
+  active_state_type: "tanh"
 }
 layers {
   name: "__lstm_group_0___state@__lstm_group_0___recurrent_group"
@@ -183,7 +183,7 @@ layers {
   }
   bias_parameter_name: "lstm_bias"
   active_gate_type: "sigmoid"
-  active_state_type: "sigmoid"
+  active_state_type: "tanh"
 }
 layers {
   name: "__lstm_group_1___state@__lstm_group_1___recurrent_group"
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
index af1b63c5dfbf0984a20eda02d608f76a454613c6..711785be37dbe7f2decc161d1b8e1ead62927b20 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
@@ -258,7 +258,7 @@ layers {
   }
   bias_parameter_name: "___lstm_group_0__@__lstm_group_0___recurrent_group.wbias"
   active_gate_type: "sigmoid"
-  active_state_type: "sigmoid"
+  active_state_type: "tanh"
 }
 layers {
   name: "__lstm_group_0___state@__lstm_group_0___recurrent_group"
diff --git a/python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py b/python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py
index 05810597b3154c3b287441465db16ee6e24b0ca2..565e281a6e1deff18aa48f97eb2f0e39ca79752f 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py
@@ -20,12 +20,13 @@ lstm1 = lstmemory_group(
     input=m1,
     param_attr=lstm_param,
     lstm_bias_attr=lstm_bias,
-    mixed_bias_attr=False)
+    input_proj_bias_attr=False)
+
 lstm2 = lstmemory_group(
     input=m2,
     param_attr=lstm_param,
     lstm_bias_attr=lstm_bias,
-    mixed_bias_attr=False)
+    input_proj_bias_attr=False)
 
 softmax_param = ParamAttr(name='softmax_param')