diff --git a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp index 7a13cd7ad0fecf202613d8da365ea832b41ab04e..944c7051668dccf39dd2ace14986d43c8a14e452 100644 --- a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp +++ b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp @@ -25,6 +25,11 @@ namespace paddle { * Input: a sequence * If SequenceLevel = kNonseq: * Output: a sequence containing only the last instance of the input sequence + * If stride_ > 0: + * Output: a shorten sequence. The operation of getting last instance of a + * sequence is independently performed on every slice of the input + * sequence, which is obtained by sliding a window with the window + * size set to stride_. * If SequenceLevel = kSeq: * Check input sequence must has sub-sequence * Output: a sequence containing only the last instance of each sub-sequence @@ -37,6 +42,7 @@ class SequenceLastInstanceLayer : public SequencePoolLayer { protected: MatrixPtr tmpSrc_; MatrixPtr tmpDest_; + std::vector instanceIds_; public: explicit SequenceLastInstanceLayer(const LayerConfig& config) @@ -54,6 +60,7 @@ REGISTER_LAYER(seqlastins, SequenceLastInstanceLayer); bool SequenceLastInstanceLayer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) { SequencePoolLayer::init(layerMap, parameterMap); + reversed_ = config_.select_first(); tmpSrc_ = Matrix::create(nullptr, /* height= */ 1, 1, /* trans= */ false, useGpu_); @@ -66,7 +73,8 @@ bool SequenceLastInstanceLayer::init(const LayerMap& layerMap, void SequenceLastInstanceLayer::forward(PassType passType) { SequencePoolLayer::forward(passType); - const int* starts = startPositions_->getData(false); + auto starts = (stride_ > 0) ? stridePositions_->getData() + : startPositions_->getData(false); MatrixPtr inputValue = getInputValue(0); MatrixPtr outputValue = getOutputValue(); @@ -74,9 +82,10 @@ void SequenceLastInstanceLayer::forward(PassType passType) { AsyncGpuBlock asyncGpuBlock; REGISTER_TIMER_INFO("SequenceLastInstanceLayerForward", getName().c_str()); + instanceIds_.clear(); for (size_t seqId = 0; seqId < newBatchSize_; ++seqId) { - int insId = - config_.select_first() ? starts[seqId] : starts[seqId + 1] - 1; + int insId = reversed_ ? starts[seqId] : starts[seqId + 1] - 1; + instanceIds_.push_back(insId); outputValue->subMatrix(seqId, 1, tmpDest_) ->assign(*(inputValue->subMatrix(insId, 1, tmpSrc_))); @@ -96,18 +105,13 @@ void SequenceLastInstanceLayer::backward(const UpdateCallback& callback) { MatrixPtr inputGrad = getInputGrad(0); MatrixPtr outputGrad = getOutputGrad(); - const int* starts = startPositions_->getData(false); - size_t numSequences = startPositions_->getSize() - 1; if (inputGrad) { AsyncGpuBlock asyncGpuBlock; REGISTER_TIMER_INFO("SequenceLastInstanceLayerBackward", getName().c_str()); - for (size_t seqId = 0; seqId < numSequences; ++seqId) { - int insId = - config_.select_first() ? starts[seqId] : starts[seqId + 1] - 1; - - inputGrad->subMatrix(insId, 1, tmpDest_) + for (size_t seqId = 0; seqId < newBatchSize_; ++seqId) { + inputGrad->subMatrix(instanceIds_[seqId], 1, tmpDest_) ->add(*(outputGrad->subMatrix(seqId, 1, tmpSrc_))); } } diff --git a/paddle/gserver/layers/SequencePoolLayer.cpp b/paddle/gserver/layers/SequencePoolLayer.cpp index 5807c4249620db44fed82a6bb69a77d807d9f0a0..8c49502011582b534a2ba4113ffeffaa2f06a51c 100644 --- a/paddle/gserver/layers/SequencePoolLayer.cpp +++ b/paddle/gserver/layers/SequencePoolLayer.cpp @@ -37,6 +37,7 @@ bool SequencePoolLayer::init(const LayerMap& layerMap, } else { LOG(FATAL) << "Unknown trans_type: " << config_.trans_type(); } + stride_ = config_.seq_pool_stride(); setNeedSequenceInfo(false); return true; } @@ -55,8 +56,6 @@ void SequencePoolLayer::forward(PassType passType) { CHECK_EQ(starts->getData()[newBatchSize_], input.getBatchSize()); CHECK_EQ(newBatchSize_, starts->getSize() - 1); - resetOutput(newBatchSize_, dim); - /* If type_ = kNonSeq, both seq has or not has sub-seq degrade to a non-seq, * thus, in this case, output_ has no sequenceStartPositions. * If type_ = kSeq, seq has sub-seq degrades to a seq, thus, only in this @@ -67,6 +66,15 @@ void SequencePoolLayer::forward(PassType passType) { << "when trans_type = seq, input must hasSubseq"; output_.degradeSequence(input); } + if (stride_ > 0) { + CHECK_EQ(input.hasSubseq(), 0UL) + << "sequence stride pooling is invalid for hasSubseq now"; + output_.poolSequenceWithStride( + input, stride_, &stridePositions_, reversed_); + newBatchSize_ = stridePositions_->getSize() - 1; + } + + resetOutput(newBatchSize_, dim); } void SequencePoolLayer::backward(const UpdateCallback& callback) { diff --git a/paddle/gserver/layers/SequencePoolLayer.h b/paddle/gserver/layers/SequencePoolLayer.h index 85b51ccd1dc7e7eb7aa9344b0f7ec6f70a35a0b4..293d1bf27823ffb0ebddba95461883d646f159ae 100644 --- a/paddle/gserver/layers/SequencePoolLayer.h +++ b/paddle/gserver/layers/SequencePoolLayer.h @@ -26,6 +26,10 @@ namespace paddle { * Output: output size is the number of input sequences (NOT input instances) * output[i] = seqlastin/average/max_{for each instance in this * sequence}{input[i]} + * If stride_ > 0: + * Check input sequence must not have sub-sequence + * Output: a shorten sequence, pooling is performed upon a small local + * area * If SequenceLevel = kSeq: * Check input sequence must has sub-sequence * Output: output size is the number of input sub-sequences @@ -42,6 +46,11 @@ protected: enum SequenceLevel { kNonSeq = 0, kSeq = 1 }; size_t newBatchSize_; ICpuGpuVectorPtr startPositions_; + int stride_; + // Store the start position of each window. + IVectorPtr stridePositions_; + // Whether the input sequence is reversed or not. + bool reversed_ = false; public: explicit SequencePoolLayer(const LayerConfig& config) : Layer(config) {} diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 0c22896d6e58f8705f4284b95d0a6e132cb8903d..193b876c31626cee2e6763b3ab540e1a808fe1b0 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -804,10 +804,14 @@ TEST(Layer, ExpandLayer) { testExpandLayer("seq", true); // seq expand to hasSubseq } -void testDegradeLayer(bool hasSubseq, string layer_type, string trans_type) { +void testDegradeLayer(bool hasSubseq, + string layer_type, + string trans_type, + int stride) { TestConfig config; config.layerConfig.set_type(layer_type); config.layerConfig.set_size(10); + config.layerConfig.set_seq_pool_stride(stride); config.biasSize = 0; config.inputDefs.push_back( @@ -827,36 +831,46 @@ void testDegradeLayer(bool hasSubseq, string layer_type, string trans_type) { if (layer_type == "average") { for (auto strategy : {"average", "sum", "squarerootn"}) { LOG(INFO) << " hasSubseq=" << hasSubseq << " trans_type=" << trans_type - << " average_strategy=" << strategy; + << " average_strategy=" << strategy + << " seq_pool_stride=" << stride; config.layerConfig.set_average_strategy(strategy); testDegradeLayerGrad(config, layer_type); } } else { - LOG(INFO) << " hasSubseq=" << hasSubseq << " trans_type=" << trans_type; + LOG(INFO) << " hasSubseq=" << hasSubseq << " trans_type=" << trans_type + << " seq_pool_stride=" << stride; testDegradeLayerGrad(config, layer_type); } } TEST(Layer, MaxLayer) { - testDegradeLayer(false, "max", "non-seq"); // seq max to non-seq - testDegradeLayer(true, "max", "non-seq"); // hasSubseq max to non-seq - testDegradeLayer(true, "max", "seq"); // hasSubseq max to seq + testDegradeLayer(false, "max", "non-seq", -1); // seq max to non-seq + testDegradeLayer(true, "max", "non-seq", -1); // hasSubseq max to non-seq + testDegradeLayer(true, "max", "seq", -1); // hasSubseq max to seq } TEST(Layer, SequenceLastInstanceLayer) { testDegradeLayer(false, "seqlastins", - "non-seq"); // seq seqlastins to non-seq + "non-seq", + -1); // seq seqlastins to non-seq + testDegradeLayer(false, + "seqlastins", + "non-seq", + 5); // seq seqlastins to a shorten seq, stride window = 5 testDegradeLayer(true, "seqlastins", - "non-seq"); // hasSubseq seqlastins to non-seq - testDegradeLayer(true, "seqlastins", "seq"); // hasSubseq seqlastins to seq + "non-seq", + -1); // hasSubseq seqlastins to non-seq + testDegradeLayer( + true, "seqlastins", "seq", -1); // hasSubseq seqlastins to seq } TEST(Layer, AverageLayer) { - testDegradeLayer(false, "average", "non-seq"); // seq average to non-seq - testDegradeLayer(true, "average", "non-seq"); // hasSubseq average to non-seq - testDegradeLayer(true, "average", "seq"); // hasSubseq average to seq + testDegradeLayer(false, "average", "non-seq", -1); // seq average to non-seq + testDegradeLayer( + true, "average", "non-seq", -1); // hasSubseq average to non-seq + testDegradeLayer(true, "average", "seq", -1); // hasSubseq average to seq } TEST(Layer, SequenceConcatLayer) { diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp index 4139f59a2c8e665daf410b5b16539ff74b77ecfe..645bf737990638df042723ed827d0823cb201e72 100644 --- a/paddle/parameter/Argument.cpp +++ b/paddle/parameter/Argument.cpp @@ -559,6 +559,49 @@ void Argument::degradeSequence(const Argument& input) { tgtBuf[numSequences] = numSubSequences; } +void Argument::poolSequenceWithStride(const Argument& input, + size_t stride, + IVectorPtr* stridePostions, + bool reversed) { + // If input.sequenceStartPositions = [0, 9, 14, 17, 30] and stride = 5, + // then sequenceStartPositions = [0, 2, 3, 4, 7]. + // If reversed = false, stridePostions = [0, 5, 9, 14, 17, 22, 27, 30]; + // else reversed = true, stridePostions = [0, 4, 9, 14, 17, 20, 25, 30] + + CHECK(input.sequenceStartPositions); + CHECK_EQ(input.hasSubseq(), 0UL); + CHECK_GT(stride, 0) << "stride must larger than 0"; + size_t numSequences = input.getNumSequences(); + ICpuGpuVector::resizeOrCreate( + sequenceStartPositions, numSequences + 1, false); + const int* starts = input.sequenceStartPositions->getData(false); + int* tgtBuf = sequenceStartPositions->getMutableData(false); + // first index of target sequence and stride positions are both 0 + tgtBuf[0] = 0; + std::vector stridePos; + for (size_t seqId = 0; seqId < numSequences; ++seqId) { + size_t seqLength = starts[seqId + 1] - starts[seqId]; + stridePos.emplace_back(starts[seqId]); + if (seqLength == 0) { + // empty sequence + tgtBuf[seqId + 1] = tgtBuf[seqId]; + } else { + int size = ceil((float)seqLength / stride); + tgtBuf[seqId + 1] = tgtBuf[seqId] + size; + for (int i = 0; i < size - 1; ++i) { + int cur = reversed ? starts[seqId + 1] - (size - 1 - i) * stride + : stridePos.back() + stride; + stridePos.emplace_back(cur); + } + } + } + stridePos.emplace_back(starts[numSequences]); + int size = stridePos.size(); + CHECK_EQ(size - 1, tgtBuf[numSequences]); + IVector::resizeOrCreate(*stridePostions, size, false); + (*stridePostions)->copyFrom(stridePos.data(), size); +} + void Argument::getValueString( std::unordered_map* out) const { if (value) { diff --git a/paddle/parameter/Argument.h b/paddle/parameter/Argument.h index 9fd84bc4b7e0aa54d81f5d5df9e5acb3fbb70d29..91aca98e186aef0ad6b345cf4791ef80c616e3fe 100644 --- a/paddle/parameter/Argument.h +++ b/paddle/parameter/Argument.h @@ -291,6 +291,15 @@ struct Argument { */ void degradeSequence(const Argument& input); + /* + After pooling with stride n (n is smaller than sequence length), + a long sequence will be shorten. + This function is invalid for sequence having sub-sequence. + */ + void poolSequenceWithStride(const Argument& input, + size_t stride, + IVectorPtr* stridePositions, + bool reversed = false); /** * @brief getValueString will return the argument's output in string. There * are several kinds of output. The keys of output dictionary are 'value', diff --git a/paddle/parameter/tests/CMakeLists.txt b/paddle/parameter/tests/CMakeLists.txt index cab264db8e5000e8eb61830ec07e9f590c103119..181ccdc1f099e8d61a44c1741116abe7afe0f11d 100644 --- a/paddle/parameter/tests/CMakeLists.txt +++ b/paddle/parameter/tests/CMakeLists.txt @@ -1 +1,2 @@ add_simple_unittest(test_common) +add_simple_unittest(test_argument) diff --git a/paddle/parameter/tests/test_argument.cpp b/paddle/parameter/tests/test_argument.cpp new file mode 100644 index 0000000000000000000000000000000000000000..81fe4ee397351a013c8616ad08fb8cb4b8dae4d0 --- /dev/null +++ b/paddle/parameter/tests/test_argument.cpp @@ -0,0 +1,57 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include + +using namespace paddle; // NOLINT + +TEST(Argument, poolSequenceWithStride) { + Argument input, output; + ICpuGpuVector::resizeOrCreate(input.sequenceStartPositions, 5, false); + int* inStart = input.sequenceStartPositions->getMutableData(false); + inStart[0] = 0; + inStart[1] = 9; + inStart[2] = 14; + inStart[3] = 17; + inStart[4] = 30; + + int strideResult[] = {0, 5, 9, 14, 17, 22, 27, 30}; + int strideResultReversed[] = {0, 4, 9, 14, 17, 20, 25, 30}; + + for (auto reversed : {false, true}) { + IVectorPtr stridePositions; + output.poolSequenceWithStride( + input, 5 /* stride */, &stridePositions, reversed); + + const int* outStart = output.sequenceStartPositions->getData(false); + CHECK_EQ(outStart[0], 0); + CHECK_EQ(outStart[1], 2); + CHECK_EQ(outStart[2], 3); + CHECK_EQ(outStart[3], 4); + CHECK_EQ(outStart[4], 7); + + CHECK_EQ(stridePositions->getSize(), 8); + auto result = reversed ? strideResultReversed : strideResult; + for (int i = 0; i < 8; i++) { + CHECK_EQ(stridePositions->getData()[i], result[i]); + } + } +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + initMain(argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 65d5d50277b665e7c355202d6e8043f656ae92f1..4f9b53d6f6553e55406dd000029a598a92fd2fb6 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -441,6 +441,11 @@ message LayerConfig { // blank label used in ctc loss optional uint32 blank = 52 [default = 0]; + + // stride parameter for seqlastins layer, AverageLayer, MaxLayer, which + // controls the scope of pooling operation. can be set > 0. + // leave empty or set to -1 to disable this stride pooling. + optional int32 seq_pool_stride = 53 [default = -1]; } message EvaluatorConfig { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 77361f8bc751446d89d8a812f48d33cd3dffc665..dc89419c40f8d527a3de0dc90ede0397f6f650c5 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2485,6 +2485,7 @@ class SequenceLastInstanceLayer(LayerBase): active_type='linear', trans_type='non-seq', bias=False, + stride=-1, **xargs): super(SequenceLastInstanceLayer, self).__init__( name, @@ -2495,10 +2496,11 @@ class SequenceLastInstanceLayer(LayerBase): **xargs) config_assert( len(inputs) == 1, 'SequenceLastInstanceLayer must have 1 input') + if trans_type == 'seq': + config_assert(stride == -1, 'subseq does not support stride window') self.config.trans_type = trans_type - for input_index in xrange(len(self.inputs)): - input_layer = self.get_input_layer(input_index) - self.set_layer_size(input_layer.size) + self.config.seq_pool_stride = stride + self.set_layer_size(self.get_input_layer(0).size) self.create_bias_parameter(bias, self.config.size) @@ -2510,10 +2512,16 @@ class SequenceFirstInstanceLayer(SequenceLastInstanceLayer): active_type='linear', trans_type='non-seq', bias=False, + stride=-1, **xargs): super(SequenceFirstInstanceLayer, self).__init__( - name, inputs=inputs, active_type=active_type, bias=bias, **xargs) - self.config.trans_type = trans_type + name, + inputs=inputs, + active_type=active_type, + trans_type=trans_type, + bias=bias, + stride=stride, + **xargs) self.config.select_first = True diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index a89b7eb0c91973879839f0e7c955db1413bbf1d8..97db3c2d4c018355aaaa1d517cc65db3aefad74a 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -1342,10 +1342,16 @@ def grumemory(input, def last_seq(input, name=None, agg_level=AggregateLevel.EACH_TIMESTEP, + stride=-1, layer_attr=None): """ Get Last Timestamp Activation of a sequence. + If stride > 0, this layer slides a window whose size is determined by stride, + and return the last value of the window as the output. Thus, a long sequence + will be shorten. Note that for sequence with sub-sequence, the default value + of stride is -1. + The simple usage is: .. code-block:: python @@ -1357,6 +1363,8 @@ def last_seq(input, :type name: basestring :param input: Input layer name. :type input: LayerOutput + :param stride: window size. + :type stride: Int :param layer_attr: extra layer attributes. :type layer_attr: ExtraLayerAttribute. :return: LayerOutput object. @@ -1368,11 +1376,15 @@ def last_seq(input, " series information at all. Maybe you want to use" " first_seq instead.") + if agg_level == AggregateLevel.EACH_SEQUENCE: + assert stride == -1 + Layer( name=name, type=LayerType.SEQUENCE_LAST_INSTANCE, inputs=[input.name], trans_type=agg_level, + stride=stride, **ExtraLayerAttribute.to_kwargs(layer_attr)) return LayerOutput( name, @@ -1386,10 +1398,16 @@ def last_seq(input, def first_seq(input, name=None, agg_level=AggregateLevel.EACH_TIMESTEP, + stride=-1, layer_attr=None): """ Get First Timestamp Activation of a sequence. + If stride > 0, this layer slides a window whose size is determined by stride, + and return the first value of the window as the output. Thus, a long sequence + will be shorten. Note that for sequence with sub-sequence, the default value + of stride is -1. + The simple usage is: .. code-block:: python @@ -1401,6 +1419,8 @@ def first_seq(input, :type name: basestring :param input: Input layer name. :type input: LayerOutput + :param stride: window size. + :type stride: Int :param layer_attr: extra layer attributes. :type layer_attr: ExtraLayerAttribute. :return: LayerOutput object. @@ -1413,11 +1433,15 @@ def first_seq(input, ' time series information at all. Maybe you want to use' ' last_seq instead.') + if agg_level == AggregateLevel.EACH_SEQUENCE: + assert stride == -1 + Layer( name=name, type=LayerType.SEQUENCE_FIRST_INSTANCE, inputs=[input.name], trans_type=agg_level, + stride=stride, **ExtraLayerAttribute.to_kwargs(layer_attr)) return LayerOutput( name, diff --git a/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py b/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py index 3a1a0132b64bb928e857905f99c0be2e81ccbda2..3c6dbc95e54898ca1e44c3dc010c9fb73a3bee30 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py +++ b/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py @@ -14,4 +14,7 @@ for op in seq_op: for al in agg_level: opts.append(op(input=din, agg_level=al)) +for op in seq_op: + opts.append(op(input=din, agg_level=AggregateLevel.EACH_TIMESTEP, stride=5)) + outputs(opts) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr index 7b2911f8e367ebf9d0797e815a7532c714ef698e..12b2255f3a41119792d0f993ce2e03ce9ee3e994 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr @@ -15,6 +15,7 @@ layers { } select_first: true trans_type: "seq" + seq_pool_stride: -1 } layers { name: "__first_seq_1__" @@ -26,6 +27,7 @@ layers { } select_first: true trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__last_seq_0__" @@ -36,6 +38,7 @@ layers { input_layer_name: "data" } trans_type: "seq" + seq_pool_stride: -1 } layers { name: "__last_seq_1__" @@ -46,12 +49,38 @@ layers { input_layer_name: "data" } trans_type: "non-seq" + seq_pool_stride: -1 +} +layers { + name: "__first_seq_2__" + type: "seqlastins" + size: 30 + active_type: "linear" + inputs { + input_layer_name: "data" + } + select_first: true + trans_type: "non-seq" + seq_pool_stride: 5 +} +layers { + name: "__last_seq_2__" + type: "seqlastins" + size: 30 + active_type: "linear" + inputs { + input_layer_name: "data" + } + trans_type: "non-seq" + seq_pool_stride: 5 } input_layer_names: "data" output_layer_names: "__first_seq_0__" output_layer_names: "__first_seq_1__" output_layer_names: "__last_seq_0__" output_layer_names: "__last_seq_1__" +output_layer_names: "__first_seq_2__" +output_layer_names: "__last_seq_2__" sub_models { name: "root" layer_names: "data" @@ -59,11 +88,15 @@ sub_models { layer_names: "__first_seq_1__" layer_names: "__last_seq_0__" layer_names: "__last_seq_1__" + layer_names: "__first_seq_2__" + layer_names: "__last_seq_2__" input_layer_names: "data" output_layer_names: "__first_seq_0__" output_layer_names: "__first_seq_1__" output_layer_names: "__last_seq_0__" output_layer_names: "__last_seq_1__" + output_layer_names: "__first_seq_2__" + output_layer_names: "__last_seq_2__" is_recurrent_layer_group: false } diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr index b6905824f0cb090375a38ff67e39fc626df0b2f6..64530146a1458933d4ba0edffc1b1b7e60a21187 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr @@ -128,6 +128,7 @@ layers { input_layer_name: "__simple_gru_0__" } trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__last_seq_1__" @@ -138,6 +139,7 @@ layers { input_layer_name: "__simple_gru_1__" } trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__fc_layer_0__" diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr index 0a83499b724806666a241489467207f3c7151a3a..79fa4c74f081aebadd258e06333de9eafe6a5ee3 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr @@ -210,6 +210,7 @@ layers { input_layer_name: "__lstm_group_0__" } trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__last_seq_1__" @@ -220,6 +221,7 @@ layers { input_layer_name: "__lstm_group_1__" } trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__fc_layer_0__" diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr index dacb40185f863025528c2d4eeb8b325425953a93..68fa881b4f1408b8cd20f2417062ce035c0fda54 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr @@ -143,6 +143,7 @@ layers { input_layer_name: "__recurrent_layer_0__" } trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__first_seq_0__" @@ -154,6 +155,7 @@ layers { } select_first: true trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__last_seq_1__" @@ -164,6 +166,7 @@ layers { input_layer_name: "__lstmemory_0__" } trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__first_seq_1__" @@ -175,6 +178,7 @@ layers { } select_first: true trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__last_seq_2__" @@ -185,6 +189,7 @@ layers { input_layer_name: "__gru_0__" } trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__first_seq_2__" @@ -196,6 +201,7 @@ layers { } select_first: true trans_type: "non-seq" + seq_pool_stride: -1 } parameters { name: "___fc_layer_0__.w0" diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr index a0fb729e062bdf6fd7d2a7c2ae364d1a2b32811d..77b447aa9db2a6c323fd3c322e7e9ca1ed19a6dd 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr @@ -96,6 +96,7 @@ layers { input_layer_name: "rnn_forward" } trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__recurrent_group_1__" @@ -145,6 +146,7 @@ layers { } select_first: true trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__recurrent_group_2__" @@ -193,6 +195,7 @@ layers { input_layer_name: "rnn_subseq_forward" } trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__lstm_group_0___recurrent_group" @@ -282,6 +285,7 @@ layers { input_layer_name: "__lstm_group_0__" } trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__gru_group_0___recurrent_group" @@ -330,6 +334,7 @@ layers { input_layer_name: "__gru_group_0__" } trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__recurrent_group_3__" @@ -378,6 +383,7 @@ layers { input_layer_name: "__fc_layer_0__" } trans_type: "non-seq" + seq_pool_stride: -1 } parameters { name: "___mixed_0__.w0"