diff --git a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp
index d29e981ad66a59e3606178834c701df908ec2221..c631c5ef3a9ebf976dc374e513c52e2d47f1e346 100644
--- a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp
+++ b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp
@@ -26,8 +26,10 @@ namespace paddle {
  * If SequenceLevel = kNonseq:
  *   Output: a sequence containing only the last instance of the input sequence
  *   If stride_ > 0:
- *      Output: a shorten sequence containing several last instances of the
- *              input sequence with stride window.
+ *      Output: a shorten sequence. The operation of getting last instance of a
+ *              sequence is independently performed on every slice of the input
+ *              sequence, which is obtained by sliding a window with the window
+ *              size set to stride_.
  * If SequenceLevel = kSeq:
  *   Check input sequence must has sub-sequence
  *   Output: a sequence containing only the last instance of each sub-sequence
diff --git a/paddle/gserver/layers/SequencePoolLayer.h b/paddle/gserver/layers/SequencePoolLayer.h
index ff67c0ccadd20de5ec6a9b3a85c536a09c753873..8e183ecda80a1ea262591d83987345224d7d4166 100644
--- a/paddle/gserver/layers/SequencePoolLayer.h
+++ b/paddle/gserver/layers/SequencePoolLayer.h
@@ -27,9 +27,9 @@ namespace paddle {
  *    output[i] = seqlastin/average/max_{for each instance in this
  * sequence}{input[i]}
  *    If stride_ > 0:
- *        Check input sequence must don't have sub-sequence
+ *        Check input sequence must not have sub-sequence
  *        Output: a shorten sequence, pooling is performed upon a small local
- * area
+ *                area
  * If SequenceLevel = kSeq:
  *    Check input sequence must has sub-sequence
  *    Output: output size is the number of input sub-sequences
@@ -47,9 +47,9 @@ protected:
   size_t newBatchSize_;
   ICpuGpuVectorPtr startPositions_;
   int stride_;
-  // store the start position of each stride window
+  // store the start position of each window
   IVectorPtr stridePositions_;
-  // Whether it is reversed sequence
+  // Whether the input sequence is reversed or not
   bool reversed_ = false;
 
 public:
diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp
index afbda8bdc403f205f918cdf77388361687b568b9..3fa1e50d1e79882ff1e8f798a27e88637d18b47b 100644
--- a/paddle/parameter/Argument.cpp
+++ b/paddle/parameter/Argument.cpp
@@ -589,7 +589,7 @@ void Argument::poolSequenceWithStride(const Argument& input,
     } else {
       int size = ceil((float)seqLength / stride);
       tgtBuf[seqId + 1] = tgtBuf[seqId] + size;
-      for (int i = 0; i < size - 1; i++) {
+      for (int i = 0; i < size - 1; ++i) {
         int cur = reversed ? starts[seqId + 1] - (size - 1 - i) * stride
                            : stridePos.back() + stride;
         stridePos.emplace_back(cur);
diff --git a/paddle/parameter/Argument.h b/paddle/parameter/Argument.h
index 49a0660ccf155f24f70788f54fe0e42f718b6169..91aca98e186aef0ad6b345cf4791ef80c616e3fe 100644
--- a/paddle/parameter/Argument.h
+++ b/paddle/parameter/Argument.h
@@ -294,7 +294,7 @@ struct Argument {
   /*
    After pooling with stride n (n is smaller than sequence length),
    a long sequence will be shorten.
-   This function is not suitable for sequence with sub-sequence now.
+   This function is invalid for sequence having sub-sequence.
    */
   void poolSequenceWithStride(const Argument& input,
                               size_t stride,
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index 5f3250e7224fff8dcbf13cace33f10b272064bdc..e98b1dfc8f9ad3bebd5e58813a86e5a8928bfa49 100755
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -1347,9 +1347,10 @@ def last_seq(input,
     """
     Get Last Timestamp Activation of a sequence.
 
-    If stride > 0, get last timestamp upon a stride window of sequence. 
-    And a long sequence will be shorten. Note that for sequence with 
-    sub-sequence, stride is default -1 now.
+    If stride > 0, this layer slides a window whose size is determined by stride, 
+    and return the last value of the window as the output. Thus, a long sequence 
+    will be shorten. Note that for sequence with sub-sequence, the default value 
+    of stride is -1.
 
     The simple usage is:
 
@@ -1362,7 +1363,7 @@ def last_seq(input,
     :type name: basestring
     :param input: Input layer name.
     :type input: LayerOutput
-    :param stride: parameter of stride window.  
+    :param stride: window size.  
     :type stride: Int
     :param layer_attr: extra layer attributes.
     :type layer_attr: ExtraLayerAttribute.
@@ -1402,9 +1403,10 @@ def first_seq(input,
     """
     Get First Timestamp Activation of a sequence.
 
-    If stride > 0, get first timestamp upon a stride window of sequence,
-    and a long sequence will be shorten. Note that for sequence with 
-    sub-sequence, stride is default -1 now.
+    If stride > 0, this layer slides a window whose size is determined by stride, 
+    and return the first value of the window as the output. Thus, a long sequence 
+    will be shorten. Note that for sequence with sub-sequence, the default value 
+    of stride is -1.
 
     The simple usage is:
 
@@ -1417,7 +1419,7 @@ def first_seq(input,
     :type name: basestring
     :param input: Input layer name.
     :type input: LayerOutput
-    :param stride: parameter of stride window.  
+    :param stride: window size.  
     :type stride: Int
     :param layer_attr: extra layer attributes.
     :type layer_attr: ExtraLayerAttribute.