diff --git a/paddle/operators/sequence_slice_op.cc b/paddle/operators/sequence_slice_op.cc
index a5928e4cfec0e64b485127896cee784b1de172c9..3374f04269bbb153ee4681ef551f371c0497d5a9 100755
--- a/paddle/operators/sequence_slice_op.cc
+++ b/paddle/operators/sequence_slice_op.cc
@@ -32,6 +32,14 @@ class SequenceSliceOp : public framework::OperatorWithKernel {
                    "Output(Out) of SequenceSliceOp should not be null.");
     auto input_dims = ctx->GetInputDim("X");
 
+    auto offset_dim = ctx->GetInputDim("Offset");
+    auto length_dim = ctx->GetInputDim("Length");
+
+    PADDLE_ENFORCE_EQ(offset_dim.size(), 2UL,
+                      "Only support one level sequence now.");
+    PADDLE_ENFORCE_EQ(length_dim.size(), 2UL,
+                      "Only support one level sequence now.");
+
     ctx->SetOutputDim("Out", input_dims);
     }
 
@@ -95,7 +103,7 @@ It only supports sequence (LoD Tensor with level number is 1).
        [d1, d2;
         e1, e2]]
     LoD(X) = {{0, 3, 5}}; Dims(X) = (5, 2)
-    Offset = [0, 1]; Length = [2, 1]
+    Offset = [[0], [1]]; Length = [[2], [1]]
 
     Out = [[a1, a2;
             b1, b2]
diff --git a/paddle/operators/sequence_slice_op.h b/paddle/operators/sequence_slice_op.h
index 0be50e680181f5a1d146ec0e43eb0d71b59d3bfe..4ca042b2ddad5612abb34f932037ebdcb3d2dd03 100755
--- a/paddle/operators/sequence_slice_op.h
+++ b/paddle/operators/sequence_slice_op.h
@@ -48,42 +48,42 @@ class SequenceSliceOpKernel : public framework::OpKernel<T> {
     auto* length = ctx.Input<Tensor>("Length");
     auto* out = ctx.Output<LoDTensor>("Out");
 
+    auto lod = in->lod();
+    auto n = lod[0].size() - 1;
+
+    PADDLE_ENFORCE_EQ(lod.size(), 1UL,
+                      "Only support one level sequence now.");
+    PADDLE_ENFORCE_EQ(
+        n, length->dims()[0],
+        "The size of input-sequence and length-array should be the same")
+    PADDLE_ENFORCE_EQ(
+        n, offset->dims()[0],
+        "The size of input-sequence and offset-array should be the same")
+
     const int64_t* offset_data = offset->data<int64_t>();
     const int64_t* length_data = length->data<int64_t>();
+    framework::Tensor offset_cpu;
+    framework::Tensor length_cpu;
 
     if (platform::is_gpu_place(ctx.GetPlace())) {
-      framework::Tensor offset_cpu;
       offset_cpu.mutable_data<T>(offset->dims(), platform::CPUPlace());
       offset_cpu.CopyFrom(*offset, platform::CPUPlace(), ctx.device_context());
       offset_data = offset_cpu.data<int64_t>();
 
-      framework::Tensor length_cpu;
       length_cpu.mutable_data<T>(length->dims(), platform::CPUPlace());
       length_cpu.CopyFrom(*length, platform::CPUPlace(), ctx.device_context());
       length_data = length_cpu.data<int64_t>();
     }
 
-    auto lod = in->lod();
-    auto n = lod[0].size() - 1;
-
-    PADDLE_ENFORCE_EQ(lod.size(), 1UL, "Only support one level sequence now.");
-    PADDLE_ENFORCE_EQ(offset->dims().size(), 1UL,
-                      "Only support one level sequence now.");
-    PADDLE_ENFORCE_EQ(length->dims().size(), 1UL,
-                      "Only support one level sequence now.");
-    PADDLE_ENFORCE_EQ(
-        n, length->dims()[0],
-        "The size of input-sequence and length-array should be the same")
-    PADDLE_ENFORCE_EQ(
-        n, offset->dims()[0],
-        "The size of input-sequence and offset-array should be the same")
-
     for (size_t i = 0; i < n; ++i) {
-      PADDLE_ENFORCE_LT(0, offset_data[i], "The offset must greater than zero")
-      PADDLE_ENFORCE_LT(0, length_data[i], "The length must greater than zero")
-      PADDLE_ENFORCE_LT(lod[0][i] + offset_data[i] + length_data[i],
-                        lod[0][i + 1], "The target tensor's length overflow")
-    }
+      PADDLE_ENFORCE_LT(0, offset_data[i],
+                "The offset must greater than zero")
+      PADDLE_ENFORCE_LT(0, length_data[i],
+                "The length must greater than zero")
+      PADDLE_ENFORCE_LT(
+          lod[0][i] + offset_data[i] + length_data[i],
+          lod[0][i + 1],
+          "The target tensor's length overflow")}
 
     out->mutable_data<T>(ctx.GetPlace());
     auto out_lod = SequenceSliceLoD(*in, offset_data, length_data);
@@ -100,7 +100,7 @@ class SequenceSliceOpKernel : public framework::OpKernel<T> {
       Tensor in_t =
           in->Slice(static_cast<int>(lod[0][i] + offset_data[i]),
                     static_cast<int>(lod[0][i] + offset_data[i] +
-                    length_data[i]));
+                                     length_data[i]));
 
       StridedMemcpy<T>(ctx.device_context(), in_t.data<T>(),
                        in_stride, in_t.dims(), out_stride,
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index 5de1c18950a3236faa91edabf0119b590b22c6d9..a02eba007ddf929ff92df995df253f5a386bac7b 100644
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -2987,8 +2987,10 @@ def img_cmrnorm_layer(input,
                       layer_attr=None):
     """
     Response normalization across feature maps.
-    The details please refer to
-    `Alex's paper <http://www.cs.toronto.edu/~fritz/absps/imagenet.pdf>`_.
+
+    Reference:
+        ImageNet Classification with Deep Convolutional Neural Networks
+        http://www.cs.toronto.edu/~fritz/absps/imagenet.pdf
 
     The example usage is:
 
@@ -2997,7 +2999,7 @@ def img_cmrnorm_layer(input,
         norm = img_cmrnorm_layer(input=net, size=5)
 
     :param name: The name of this layer. It is optional.
-    :type name: None | basestring
+    :type name: basestring
     :param input: The input of this layer.
     :type input: LayerOutput
     :param size: Normalize in number of :math:`size` feature maps.
@@ -3006,9 +3008,11 @@ def img_cmrnorm_layer(input,
     :type scale: float
     :param power: The hyper-parameter.
     :type power: float
-    :param num_channels: input layer's filers number or channels. If
-                         num_channels is None, it will be set automatically.
-    :param layer_attr: Extra Layer Attribute.
+    :param num_channels: The number of input channels. If the parameter is not set or
+                         set to None, its actual value will be automatically set to
+                         the channels number of the input.
+    :param layer_attr: The extra layer attributes. See ExtraLayerAttribute for
+                       details.
     :type layer_attr: ExtraLayerAttribute
     :return: LayerOutput object.
     :rtype: LayerOutput
@@ -3036,7 +3040,7 @@ def batch_norm_layer(input,
                      use_global_stats=None,
                      mean_var_names=None):
     """
-    Batch Normalization Layer. The notation of this layer as follow.
+    Batch Normalization Layer. The notation of this layer is as follows.
 
     :math:`x` is the input features over a mini-batch.
 
@@ -3050,8 +3054,10 @@ def batch_norm_layer(input,
         \\sigma_{\\beta}^{2} + \\epsilon}} \\qquad &//\ normalize \\\\
         y_i &\\gets \\gamma \\hat{x_i} + \\beta \\qquad &//\ scale\ and\ shift
 
-    The details of batch normalization please refer to this
-    `paper <http://arxiv.org/abs/1502.03167>`_.
+    Reference:
+        Batch Normalization: Accelerating Deep Network Training by Reducing
+        Internal Covariate Shift
+        http://arxiv.org/abs/1502.03167
 
     The example usage is:
 
@@ -3061,48 +3067,47 @@ def batch_norm_layer(input,
 
     :param name: The name of this layer. It is optional.
     :type name: basestring
-    :param input: batch normalization input. Better be linear activation.
-                Because there is an activation inside batch_normalization.
+    :param input: This layer's input which is to be performed batch normalization on.
     :type input: LayerOutput
     :param batch_norm_type: We have batch_norm, mkldnn_batch_norm and cudnn_batch_norm.
                             batch_norm supports CPU, MKLDNN and GPU. cudnn_batch_norm
                             requires cuDNN version greater or equal to v4 (>=v4).
                             But cudnn_batch_norm is faster and needs less
                             memory than batch_norm. mkldnn_batch_norm requires
-                            enable use_mkldnn. By default (None), we will
-                            automaticly select cudnn_batch_norm for GPU,
+                            use_mkldnn is enabled. By default (None), we will
+                            automatically select cudnn_batch_norm for GPU,
                             mkldnn_batch_norm for MKLDNN and batch_norm for CPU.
-                            Otherwise, select batch norm type based on the
-                            specified type. If you use cudnn_batch_norm,
-                            we suggested you use latest version, such as v5.1.
+                            Users can specify the batch norm type. If you use
+                            cudnn_batch_norm, we suggested you use latest version,
+                            such as v5.1.
     :type batch_norm_type: None | string, None or "batch_norm" or "cudnn_batch_norm"
                            or "mkldnn_batch_norm"
-    :param act: Activation Type. Better be relu. Because batch
-                     normalization will normalize input near zero.
+    :param act: Activation type. ReluActivation is the default activation.
     :type act: BaseActivation
-    :param num_channels: num of image channels or previous layer's number of
-                         filters. None will automatically get from layer's
-                         input.
+    :param num_channels: The number of input channels. If the parameter is not set or
+                         set to None, its actual value will be automatically set to
+                         the channels number of the input.
     :type num_channels: int
-    :param bias_attr: :math:`\\beta`, better be zero when initialize. So the
-                      initial_std=0, initial_mean=1 is best practice.
+    :param bias_attr: :math:`\\beta`. The bias attribute. If the parameter is set to
+                      False or an object whose type is not ParameterAttribute, no
+                      bias is defined. If the parameter is set to True, the bias is
+                      initialized to zero.
     :type bias_attr: ParameterAttribute | None | bool | Any
-    :param param_attr: :math:`\\gamma`, better be one when initialize. So the
-                       initial_std=0, initial_mean=1 is best practice.
+    :param param_attr: :math:`\\gamma`. The parameter attribute. See ParameterAttribute
+                       for details.
     :type param_attr: ParameterAttribute
-    :param layer_attr: Extra Layer Attribute.
+    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
+                       details.
     :type layer_attr: ExtraLayerAttribute
-    :param use_global_stats: whether use moving mean/variance statistics
-                             during testing peroid. If None or True,
-                             it will use moving mean/variance statistics during
-                             testing. If False, it will use the mean
-                             and variance of current batch of test data for
-                             testing.
+    :param use_global_stats: Whether use moving mean/variance statistics during
+                             testing peroid. If the parameter is set to None or
+                             True, it will use moving mean/variance statistics
+                             during testing. If the parameter is set to False, it
+                             will use the mean and variance of the current batch
+                             of test data.
     :type use_global_stats: bool | None.
-    :param moving_average_fraction: Factor used in the moving average
-                                   computation, referred to as facotr,
-                                   :math:`runningMean = newMean*(1-factor)
-                                   + runningMean*factor`
+    :param moving_average_fraction: Factor used in the moving average computation.
+                                   :math:`runningMean = newMean*(1-factor) + runningMean*factor`
     :type moving_average_fraction: float.
     :param mean_var_names: [mean name, variance name]
     :type mean_var_names: string list
@@ -3164,8 +3169,9 @@ def sum_to_one_norm_layer(input, name=None, layer_attr=None):
     :type input: LayerOutput
     :param name: The name of this layer. It is optional.
     :type name: basestring
-    :param layer_attr: extra layer attributes.
-    :type layer_attr: ExtraLayerAttribute.
+    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute
+                       for details.
+    :type layer_attr: ExtraLayerAttribute
     :return: LayerOutput object.
     :rtype: LayerOutput
     """
@@ -3200,7 +3206,8 @@ def row_l2_norm_layer(input, name=None, layer_attr=None):
     :type input: LayerOutput
     :param name: The name of this layer. It is optional.
     :type name: basestring
-    :param layer_attr: extra layer attributes.
+    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute
+                       for details.
     :type layer_attr: ExtraLayerAttribute.
     :return: LayerOutput object.
     :rtype: LayerOutput
@@ -3237,22 +3244,17 @@ def addto_layer(input, act=None, name=None, bias_attr=None, layer_attr=None):
                             act=ReluActivation(),
                             bias_attr=False)
 
-    This layer just simply add all input layers together, then activate the sum
-    inputs. Each input of this layer should be the same size, which is also the
-    output size of this layer.
+    This layer just simply adds all input layers together, then activates the
+    sum. All inputs should share the same dimension, which is also the dimension
+    of this layer's output.
 
     There is no weight matrix for each input, because it just a simple add
     operation. If you want a complicated operation before add, please use
     mixed_layer.
 
-    It is a very good way to set dropout outside the layers. Since not all
-    PaddlePaddle layer support dropout, you can add an add_to layer, set
-    dropout here.
-    Please refer to dropout_layer for details.
-
     :param name: The name of this layer. It is optional.
     :type name: basestring
-    :param input: Input layers. It could be a LayerOutput or list/tuple of
+    :param input: The input layers. It could be a LayerOutput or list/tuple of
                  LayerOutput.
     :type input: LayerOutput | list | tuple
     :param act: Activation Type. LinearActivation is the default activation.
@@ -3261,7 +3263,8 @@ def addto_layer(input, act=None, name=None, bias_attr=None, layer_attr=None):
                       whose type is not ParameterAttribute, no bias is defined. If the
                       parameter is set to True, the bias is initialized to zero.
     :type bias_attr: ParameterAttribute | None | bool | Any
-    :param layer_attr: Extra Layer attribute.
+    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
+                       details.
     :type layer_attr: ExtraLayerAttribute
     :return: LayerOutput object.
     :rtype: LayerOutput
@@ -3300,8 +3303,8 @@ def addto_layer(input, act=None, name=None, bias_attr=None, layer_attr=None):
 @layer_support(DROPOUT, ERROR_CLIPPING)
 def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None):
     """
-    Concat all input vector into one huge vector.
-    Inputs can be list of LayerOutput or list of projection.
+    Concatenate all input vectors to one vector.
+    Inputs can be a list of LayerOutput or a list of projection.
 
     The example usage is:
 
@@ -3311,11 +3314,12 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None):
 
     :param name: The name of this layer. It is optional.
     :type name: basestring
-    :param input: input layers or projections
+    :param input: The input layers or projections
     :type input: list | tuple | collections.Sequence
     :param act: Activation type. IdentityActivation is the default activation.
     :type act: BaseActivation
-    :param layer_attr: Extra Layer Attribute.
+    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
+                       details.
     :type layer_attr: ExtraLayerAttribute
     :return: LayerOutput object.
     :rtype: LayerOutput
@@ -3385,7 +3389,7 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None):
 def seq_concat_layer(a, b, act=None, name=None, layer_attr=None,
                      bias_attr=None):
     """
-    Concat sequence a with sequence b.
+    Concatenate sequence a and sequence b.
 
     Inputs:
       - a = [a1, a2, ..., am]
@@ -3404,13 +3408,14 @@ def seq_concat_layer(a, b, act=None, name=None, layer_attr=None,
 
     :param name: The name of this layer. It is optional.
     :type name: basestring
-    :param a: input sequence layer
+    :param a: The first input sequence layer
     :type a: LayerOutput
-    :param b: input sequence layer
+    :param b: The second input sequence layer
     :type b: LayerOutput
     :param act: Activation type. IdentityActivation is the default activation.
     :type act: BaseActivation
-    :param layer_attr: Extra Layer Attribute.
+    :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
+                       details.
     :type layer_attr: ExtraLayerAttribute
     :param bias_attr: The bias attribute. If the parameter is set to False or an object
                       whose type is not ParameterAttribute, no bias is defined. If the
@@ -3447,31 +3452,25 @@ def memory(name,
            boot_bias_active_type=None,
            boot_with_const_id=None):
     """
-    The memory layers is a layer cross each time step. Reference this output
-    as previous time step layer :code:`name` 's output.
+    The memory takes a layer's output at previous time step as its own output.
 
-    The default memory is zero in first time step, previous time step's
-    output in the rest time steps.
+    If boot_bias, the activation of the bias is the initial value of the memory.
 
-    If boot_bias, the first time step value is this bias and
-    with activation.
+    If boot_with_const_id is set, then the memory's output at the first time step
+    is a IndexSlot, the Arguments.ids()[0] is this :code:`cost_id`.
 
-    If boot_with_const_id, then the first time stop is a IndexSlot, the
-    Arguments.ids()[0] is this :code:`cost_id`.
+    If boot_layer is specified, the memory's output at the first time step will
+    be the boot_layer's output.
 
-    If boot_layer is not null, the memory is just the boot_layer's output.
-    Set :code:`is_seq` is true boot layer is sequence.
-
-    The same name layer in recurrent group will set memory on each time
-    step.
+    In other case, the default memory's output at the first time step is zero.
 
     .. code-block:: python
 
        mem = memory(size=256, name='state')
        state = fc_layer(input=mem, size=256, name='state')
 
-    If you do not want to specify the name, you can equivalently use set_input()
-    to specify the layer needs to be remembered as the following:
+    If you do not want to specify the name, you can also use set_input()
+    to specify the layer to be remembered as the following:
 
     .. code-block:: python
 
@@ -3479,26 +3478,31 @@ def memory(name,
        state = fc_layer(input=mem, size=256)
        mem.set_input(mem)
 
-    :param name: the name of the layer which this memory remembers.
+    :param name: The name of the layer which this memory remembers.
                  If name is None, user should call set_input() to specify the
                  name of the layer which this memory remembers.
     :type name: basestring
-    :param size: size of memory.
+    :param size: The dimensionality of memory.
     :type size: int
-    :param memory_name: the name of the memory.
-                        It is ignored when name is provided.
+    :param memory_name: The name of the memory. It is ignored when name is provided.
     :type memory_name: basestring
     :param is_seq: DEPRECATED. is sequence for boot_layer
     :type is_seq: bool
-    :param boot_layer: boot layer of memory.
+    :param boot_layer: This parameter specifies memory's output at the first time
+                       step and the output is boot_layer's output.
     :type boot_layer: LayerOutput | None
-    :param boot_bias: boot layer's bias
+    :param boot_bias: The bias attribute of memory's output at the first time step.
+                      If the parameter is set to False or an object whose type is not
+                      ParameterAttribute, no bias is defined. If the parameter is set
+                      to True, the bias is initialized to zero.
     :type boot_bias: ParameterAttribute | None
-    :param boot_bias_active_type: boot layer's active type.
+    :param boot_bias_active_type: Activation type for memory's bias at the first time
+                                  step. LinearActivation is the default activation.
     :type boot_bias_active_type: BaseActivation
-    :param boot_with_const_id: boot layer's id.
+    :param boot_with_const_id: This parameter specifies memory's output at the first
+                               time step and the output is an index.
     :type boot_with_const_id: int
-    :return: LayerOutput object which is a memory.
+    :return: LayerOutput object.
     :rtype: LayerOutput
     """
     if boot_bias_active_type is None:
diff --git a/python/paddle/v2/framework/tests/test_beam_search_op.py b/python/paddle/v2/fluid/tests/test_beam_search_op.py
similarity index 94%
rename from python/paddle/v2/framework/tests/test_beam_search_op.py
rename to python/paddle/v2/fluid/tests/test_beam_search_op.py
index a5a0cc0c96ab6c77abcc20c387eef7b3b1c8ac93..cc7c09bb59de3f83e47b4d95c1203f7f050c5132 100644
--- a/python/paddle/v2/framework/tests/test_beam_search_op.py
+++ b/python/paddle/v2/fluid/tests/test_beam_search_op.py
@@ -1,6 +1,6 @@
 import logging
-from paddle.v2.framework.op import Operator, DynamicRecurrentOp
-import paddle.v2.framework.core as core
+from paddle.v2.fluid.op import Operator, DynamicRecurrentOp
+import paddle.v2.fluid.core as core
 import unittest
 import numpy as np
 
diff --git a/python/paddle/v2/fluid/tests/test_sequence_slice_op.py b/python/paddle/v2/fluid/tests/test_sequence_slice_op.py
index 80f4bfbdd1160c1423c984e73cac54fbd3831241..4351d8e6d77c16e0012f9ae163b118fdbb793a8f 100755
--- a/python/paddle/v2/fluid/tests/test_sequence_slice_op.py
+++ b/python/paddle/v2/fluid/tests/test_sequence_slice_op.py
@@ -9,16 +9,16 @@ class TestSequenceSliceOp(OpTest):
         # only supprot one level LoD
         x = np.random.random(self.x_dim).astype('float32')
         lod = self.x_lod
-        offset = np.array(self.offset).flatten().astype("int64")
-        length = np.array(self.length).flatten().astype("int64")
+        offset = np.array(self.offset).astype("int64")
+        length = np.array(self.length).astype("int64")
 
         self.inputs = {'X': (x, lod), 'Offset': offset, 'Length': length}
         outs = [] #np.zeros((100, 3, 2)).astype('float32')
         out_lod = [[0]]
         out_lod_offset = 0
         for i in range(len(offset)):
-            sub_x = x[lod[0][i] + offset[i]: lod[0]
-                      [i] + offset[i] + length[i], :]
+            sub_x = x[lod[0][i] + offset[i, 0]: lod[0]
+                      [i] + offset[i, 0] + length[i, 0], :]
             out_lod_offset = out_lod_offset + len(sub_x)
             outs.append(sub_x)
             out_lod[0].append(out_lod_offset)
@@ -28,8 +28,8 @@ class TestSequenceSliceOp(OpTest):
     def init_test_case(self):
         self.x_dim = (100, 3, 2)
         self.x_lod = [[0, 20, 40, 60, 80, 100]]
-        self.offset = [1, 2, 3, 4, 5]
-        self.length = [10, 8, 6, 4, 2]
+        self.offset = [[1], [2], [3], [4], [5]]
+        self.length = [[10], [8], [6], [4], [2]]
 
     def setUp(self):
         self.op_type = "sequence_slice"