diff --git a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp index d29e981ad66a59e3606178834c701df908ec2221..c631c5ef3a9ebf976dc374e513c52e2d47f1e346 100644 --- a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp +++ b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp @@ -26,8 +26,10 @@ namespace paddle { * If SequenceLevel = kNonseq: * Output: a sequence containing only the last instance of the input sequence * If stride_ > 0: - * Output: a shorten sequence containing several last instances of the - * input sequence with stride window. + * Output: a shorten sequence. The operation of getting last instance of a + * sequence is independently performed on every slice of the input + * sequence, which is obtained by sliding a window with the window + * size set to stride_. * If SequenceLevel = kSeq: * Check input sequence must has sub-sequence * Output: a sequence containing only the last instance of each sub-sequence diff --git a/paddle/gserver/layers/SequencePoolLayer.h b/paddle/gserver/layers/SequencePoolLayer.h index ff67c0ccadd20de5ec6a9b3a85c536a09c753873..8e183ecda80a1ea262591d83987345224d7d4166 100644 --- a/paddle/gserver/layers/SequencePoolLayer.h +++ b/paddle/gserver/layers/SequencePoolLayer.h @@ -27,9 +27,9 @@ namespace paddle { * output[i] = seqlastin/average/max_{for each instance in this * sequence}{input[i]} * If stride_ > 0: - * Check input sequence must don't have sub-sequence + * Check input sequence must not have sub-sequence * Output: a shorten sequence, pooling is performed upon a small local - * area + * area * If SequenceLevel = kSeq: * Check input sequence must has sub-sequence * Output: output size is the number of input sub-sequences @@ -47,9 +47,9 @@ protected: size_t newBatchSize_; ICpuGpuVectorPtr startPositions_; int stride_; - // store the start position of each stride window + // store the start position of each window IVectorPtr stridePositions_; - // Whether it is reversed sequence + // Whether the input sequence is reversed or not bool reversed_ = false; public: diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp index afbda8bdc403f205f918cdf77388361687b568b9..3fa1e50d1e79882ff1e8f798a27e88637d18b47b 100644 --- a/paddle/parameter/Argument.cpp +++ b/paddle/parameter/Argument.cpp @@ -589,7 +589,7 @@ void Argument::poolSequenceWithStride(const Argument& input, } else { int size = ceil((float)seqLength / stride); tgtBuf[seqId + 1] = tgtBuf[seqId] + size; - for (int i = 0; i < size - 1; i++) { + for (int i = 0; i < size - 1; ++i) { int cur = reversed ? starts[seqId + 1] - (size - 1 - i) * stride : stridePos.back() + stride; stridePos.emplace_back(cur); diff --git a/paddle/parameter/Argument.h b/paddle/parameter/Argument.h index 49a0660ccf155f24f70788f54fe0e42f718b6169..91aca98e186aef0ad6b345cf4791ef80c616e3fe 100644 --- a/paddle/parameter/Argument.h +++ b/paddle/parameter/Argument.h @@ -294,7 +294,7 @@ struct Argument { /* After pooling with stride n (n is smaller than sequence length), a long sequence will be shorten. - This function is not suitable for sequence with sub-sequence now. + This function is invalid for sequence having sub-sequence. */ void poolSequenceWithStride(const Argument& input, size_t stride, diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 5f3250e7224fff8dcbf13cace33f10b272064bdc..e98b1dfc8f9ad3bebd5e58813a86e5a8928bfa49 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -1347,9 +1347,10 @@ def last_seq(input, """ Get Last Timestamp Activation of a sequence. - If stride > 0, get last timestamp upon a stride window of sequence. - And a long sequence will be shorten. Note that for sequence with - sub-sequence, stride is default -1 now. + If stride > 0, this layer slides a window whose size is determined by stride, + and return the last value of the window as the output. Thus, a long sequence + will be shorten. Note that for sequence with sub-sequence, the default value + of stride is -1. The simple usage is: @@ -1362,7 +1363,7 @@ def last_seq(input, :type name: basestring :param input: Input layer name. :type input: LayerOutput - :param stride: parameter of stride window. + :param stride: window size. :type stride: Int :param layer_attr: extra layer attributes. :type layer_attr: ExtraLayerAttribute. @@ -1402,9 +1403,10 @@ def first_seq(input, """ Get First Timestamp Activation of a sequence. - If stride > 0, get first timestamp upon a stride window of sequence, - and a long sequence will be shorten. Note that for sequence with - sub-sequence, stride is default -1 now. + If stride > 0, this layer slides a window whose size is determined by stride, + and return the first value of the window as the output. Thus, a long sequence + will be shorten. Note that for sequence with sub-sequence, the default value + of stride is -1. The simple usage is: @@ -1417,7 +1419,7 @@ def first_seq(input, :type name: basestring :param input: Input layer name. :type input: LayerOutput - :param stride: parameter of stride window. + :param stride: window size. :type stride: Int :param layer_attr: extra layer attributes. :type layer_attr: ExtraLayerAttribute.