From 9298a9ec0d291aa919d59f57ce6a8562d781bd85 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Fri, 24 Mar 2017 14:35:39 +0800 Subject: [PATCH] stride pooling for seqlastin and seqfirstin --- .../layers/SequenceLastInstanceLayer.cpp | 23 ++++---- paddle/gserver/layers/SequencePoolLayer.cpp | 11 +++- paddle/gserver/layers/SequencePoolLayer.h | 7 +++ paddle/gserver/tests/test_LayerGrad.cpp | 16 ++++-- paddle/parameter/Argument.cpp | 40 ++++++++++++++ paddle/parameter/Argument.h | 8 +++ paddle/parameter/tests/CMakeLists.txt | 1 + paddle/parameter/tests/test_argument.cpp | 52 +++++++++++++++++++ proto/ModelConfig.proto | 5 ++ python/paddle/trainer/config_parser.py | 18 +++++-- .../paddle/trainer_config_helpers/layers.py | 21 ++++++++ .../tests/configs/last_first_seq.py | 3 ++ .../configs/protostr/last_first_seq.protostr | 33 ++++++++++++ .../configs/protostr/shared_gru.protostr | 2 + .../configs/protostr/shared_lstm.protostr | 2 + .../protostr/simple_rnn_layers.protostr | 6 +++ .../configs/protostr/test_rnn_group.protostr | 6 +++ 17 files changed, 235 insertions(+), 19 deletions(-) create mode 100644 paddle/parameter/tests/test_argument.cpp diff --git a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp index 7a13cd7ad0..7ac087663a 100644 --- a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp +++ b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp @@ -25,6 +25,9 @@ namespace paddle { * Input: a sequence * If SequenceLevel = kNonseq: * Output: a sequence containing only the last instance of the input sequence + * If stride_ > 0: + * Output: a shorten sequence containing several last instances of the + * input sequence with stride window. * If SequenceLevel = kSeq: * Check input sequence must has sub-sequence * Output: a sequence containing only the last instance of each sub-sequence @@ -37,6 +40,8 @@ class SequenceLastInstanceLayer : public SequencePoolLayer { protected: MatrixPtr tmpSrc_; MatrixPtr tmpDest_; + bool select_first_; + std::vector insId_; public: explicit SequenceLastInstanceLayer(const LayerConfig& config) @@ -54,6 +59,7 @@ REGISTER_LAYER(seqlastins, SequenceLastInstanceLayer); bool SequenceLastInstanceLayer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) { SequencePoolLayer::init(layerMap, parameterMap); + select_first_ = config_.select_first(); tmpSrc_ = Matrix::create(nullptr, /* height= */ 1, 1, /* trans= */ false, useGpu_); @@ -74,9 +80,13 @@ void SequenceLastInstanceLayer::forward(PassType passType) { AsyncGpuBlock asyncGpuBlock; REGISTER_TIMER_INFO("SequenceLastInstanceLayerForward", getName().c_str()); + insId_.clear(); for (size_t seqId = 0; seqId < newBatchSize_; ++seqId) { - int insId = - config_.select_first() ? starts[seqId] : starts[seqId + 1] - 1; + int insId = (stride_ > 0) + ? (select_first_ ? stridePositions_[seqId] + : stridePositions_[seqId + 1] - 1) + : (select_first_ ? starts[seqId] : starts[seqId + 1] - 1); + insId_.push_back(insId); outputValue->subMatrix(seqId, 1, tmpDest_) ->assign(*(inputValue->subMatrix(insId, 1, tmpSrc_))); @@ -96,18 +106,13 @@ void SequenceLastInstanceLayer::backward(const UpdateCallback& callback) { MatrixPtr inputGrad = getInputGrad(0); MatrixPtr outputGrad = getOutputGrad(); - const int* starts = startPositions_->getData(false); - size_t numSequences = startPositions_->getSize() - 1; if (inputGrad) { AsyncGpuBlock asyncGpuBlock; REGISTER_TIMER_INFO("SequenceLastInstanceLayerBackward", getName().c_str()); - for (size_t seqId = 0; seqId < numSequences; ++seqId) { - int insId = - config_.select_first() ? starts[seqId] : starts[seqId + 1] - 1; - - inputGrad->subMatrix(insId, 1, tmpDest_) + for (size_t seqId = 0; seqId < newBatchSize_; ++seqId) { + inputGrad->subMatrix(insId_[seqId], 1, tmpDest_) ->add(*(outputGrad->subMatrix(seqId, 1, tmpSrc_))); } } diff --git a/paddle/gserver/layers/SequencePoolLayer.cpp b/paddle/gserver/layers/SequencePoolLayer.cpp index 5807c42496..2bf180a043 100644 --- a/paddle/gserver/layers/SequencePoolLayer.cpp +++ b/paddle/gserver/layers/SequencePoolLayer.cpp @@ -37,6 +37,7 @@ bool SequencePoolLayer::init(const LayerMap& layerMap, } else { LOG(FATAL) << "Unknown trans_type: " << config_.trans_type(); } + stride_ = config_.seq_pool_stride(); setNeedSequenceInfo(false); return true; } @@ -55,8 +56,6 @@ void SequencePoolLayer::forward(PassType passType) { CHECK_EQ(starts->getData()[newBatchSize_], input.getBatchSize()); CHECK_EQ(newBatchSize_, starts->getSize() - 1); - resetOutput(newBatchSize_, dim); - /* If type_ = kNonSeq, both seq has or not has sub-seq degrade to a non-seq, * thus, in this case, output_ has no sequenceStartPositions. * If type_ = kSeq, seq has sub-seq degrades to a seq, thus, only in this @@ -67,6 +66,14 @@ void SequencePoolLayer::forward(PassType passType) { << "when trans_type = seq, input must hasSubseq"; output_.degradeSequence(input); } + if (stride_ > 0) { + CHECK_EQ(input.hasSubseq(), 0UL) + << "sequence stride pooling is not suitable for hasSubseq now"; + output_.poolSequenceWithStride(input, stride_, &stridePositions_); + newBatchSize_ = stridePositions_.size() - 1; + } + + resetOutput(newBatchSize_, dim); } void SequencePoolLayer::backward(const UpdateCallback& callback) { diff --git a/paddle/gserver/layers/SequencePoolLayer.h b/paddle/gserver/layers/SequencePoolLayer.h index 85b51ccd1d..5ca1c1a82b 100644 --- a/paddle/gserver/layers/SequencePoolLayer.h +++ b/paddle/gserver/layers/SequencePoolLayer.h @@ -26,6 +26,10 @@ namespace paddle { * Output: output size is the number of input sequences (NOT input instances) * output[i] = seqlastin/average/max_{for each instance in this * sequence}{input[i]} + * If stride_ > 0: + * Check input sequence must don't have sub-sequence + * Output: a shorten sequence, pooling is performed upon a small local + * area * If SequenceLevel = kSeq: * Check input sequence must has sub-sequence * Output: output size is the number of input sub-sequences @@ -42,6 +46,9 @@ protected: enum SequenceLevel { kNonSeq = 0, kSeq = 1 }; size_t newBatchSize_; ICpuGpuVectorPtr startPositions_; + int stride_; + // store the start position of each stride window + std::vector stridePositions_; public: explicit SequencePoolLayer(const LayerConfig& config) : Layer(config) {} diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 5f8a7b79a0..ce83531416 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -804,10 +804,14 @@ TEST(Layer, ExpandLayer) { testExpandLayer("seq", true); // seq expand to hasSubseq } -void testDegradeLayer(bool hasSubseq, string layer_type, string trans_type) { +void testDegradeLayer(bool hasSubseq, + string layer_type, + string trans_type, + int stride = -1) { TestConfig config; config.layerConfig.set_type(layer_type); config.layerConfig.set_size(10); + config.layerConfig.set_seq_pool_stride(stride); config.biasSize = 0; config.inputDefs.push_back( @@ -827,12 +831,14 @@ void testDegradeLayer(bool hasSubseq, string layer_type, string trans_type) { if (layer_type == "average") { for (auto strategy : {"average", "sum", "squarerootn"}) { LOG(INFO) << " hasSubseq=" << hasSubseq << " trans_type=" << trans_type - << " average_strategy=" << strategy; + << " average_strategy=" << strategy + << " seq_pool_stride=" << stride; config.layerConfig.set_average_strategy(strategy); testDegradeLayerGrad(config, layer_type); } } else { - LOG(INFO) << " hasSubseq=" << hasSubseq << " trans_type=" << trans_type; + LOG(INFO) << " hasSubseq=" << hasSubseq << " trans_type=" << trans_type + << " seq_pool_stride=" << stride; testDegradeLayerGrad(config, layer_type); } } @@ -847,6 +853,10 @@ TEST(Layer, SequenceLastInstanceLayer) { testDegradeLayer(false, "seqlastins", "non-seq"); // seq seqlastins to non-seq + testDegradeLayer(false, + "seqlastins", + "non-seq", + 5); // seq seqlastins to a shorten seq, stride window = 5 testDegradeLayer(true, "seqlastins", "non-seq"); // hasSubseq seqlastins to non-seq diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp index 4139f59a2c..2657c00ebb 100644 --- a/paddle/parameter/Argument.cpp +++ b/paddle/parameter/Argument.cpp @@ -559,6 +559,46 @@ void Argument::degradeSequence(const Argument& input) { tgtBuf[numSequences] = numSubSequences; } +void Argument::poolSequenceWithStride(const Argument& input, + size_t stride, + std::vector* stridePostions) { + /* + * If input.sequenceStartPositions = [0, 9, 14, 17, 30] and stride = 5, + * then sequenceStartPositions = [0, 2, 3, 4, 7], + * and stridePostions = [0, 5, 9, 14, 17, 22, 27, 30] + */ + CHECK(input.sequenceStartPositions); + CHECK_EQ(input.hasSubseq(), 0UL); + CHECK_GT(stride, 0) << "stride must larger than 0"; + size_t numSequences = input.getNumSequences(); + ICpuGpuVector::resizeOrCreate( + sequenceStartPositions, numSequences + 1, false); + const int* starts = input.sequenceStartPositions->getData(false); + int* tgtBuf = sequenceStartPositions->getMutableData(false); + // first index of target sequence and stride positions are both 0 + tgtBuf[0] = 0; + (*stridePostions).clear(); + for (size_t seqId = 0; seqId < numSequences; ++seqId) { + size_t seqLength = starts[seqId + 1] - starts[seqId]; + (*stridePostions).emplace_back(starts[seqId]); + if (seqLength == 0) { + // empty sequence + tgtBuf[seqId + 1] = tgtBuf[seqId]; + } else if (seqLength < stride) { + tgtBuf[seqId + 1] = tgtBuf[seqId] + 1; + } else { + tgtBuf[seqId + 1] = tgtBuf[seqId] + ceil((float)seqLength / stride); + int size = + (seqLength % stride) ? seqLength / stride : seqLength / stride - 1; + for (int i = 0; i < size; i++) { + (*stridePostions).emplace_back((*stridePostions).back() + stride); + } + } + } + (*stridePostions).emplace_back(starts[numSequences]); + CHECK_EQ((*stridePostions).size() - 1, tgtBuf[numSequences]); +} + void Argument::getValueString( std::unordered_map* out) const { if (value) { diff --git a/paddle/parameter/Argument.h b/paddle/parameter/Argument.h index 9fd84bc4b7..760029c2fe 100644 --- a/paddle/parameter/Argument.h +++ b/paddle/parameter/Argument.h @@ -291,6 +291,14 @@ struct Argument { */ void degradeSequence(const Argument& input); + /* + After pooling with stride n (n is smaller than sequence length), + a long sequence will be shorten. + This function is not suitable for sequence with sub-sequence now. + */ + void poolSequenceWithStride(const Argument& input, + size_t stride, + std::vector* stridePositions); /** * @brief getValueString will return the argument's output in string. There * are several kinds of output. The keys of output dictionary are 'value', diff --git a/paddle/parameter/tests/CMakeLists.txt b/paddle/parameter/tests/CMakeLists.txt index cab264db8e..181ccdc1f0 100644 --- a/paddle/parameter/tests/CMakeLists.txt +++ b/paddle/parameter/tests/CMakeLists.txt @@ -1 +1,2 @@ add_simple_unittest(test_common) +add_simple_unittest(test_argument) diff --git a/paddle/parameter/tests/test_argument.cpp b/paddle/parameter/tests/test_argument.cpp new file mode 100644 index 0000000000..ba17e8a298 --- /dev/null +++ b/paddle/parameter/tests/test_argument.cpp @@ -0,0 +1,52 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include + +using namespace paddle; // NOLINT + +TEST(Argument, poolSequenceWithStride) { + Argument input, output; + ICpuGpuVector::resizeOrCreate(input.sequenceStartPositions, 5, false); + int* inStart = input.sequenceStartPositions->getMutableData(false); + inStart[0] = 0; + inStart[1] = 9; + inStart[2] = 14; + inStart[3] = 17; + inStart[4] = 30; + + std::vector stridePositions; + stridePositions.clear(); + output.poolSequenceWithStride(input, 5 /* stride */, &stridePositions); + + const int* outStart = output.sequenceStartPositions->getData(false); + CHECK_EQ(outStart[0], 0); + CHECK_EQ(outStart[1], 2); + CHECK_EQ(outStart[2], 3); + CHECK_EQ(outStart[3], 4); + CHECK_EQ(outStart[4], 7); + + CHECK_EQ(stridePositions.size(), 8); + int strideResult[] = {0, 5, 9, 14, 17, 22, 27, 30}; + for (int i = 0; i < 8; i++) { + CHECK_EQ(stridePositions[i], strideResult[i]); + } +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + initMain(argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 65d5d50277..4f9b53d6f6 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -441,6 +441,11 @@ message LayerConfig { // blank label used in ctc loss optional uint32 blank = 52 [default = 0]; + + // stride parameter for seqlastins layer, AverageLayer, MaxLayer, which + // controls the scope of pooling operation. can be set > 0. + // leave empty or set to -1 to disable this stride pooling. + optional int32 seq_pool_stride = 53 [default = -1]; } message EvaluatorConfig { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 1394773b4f..bfe7150175 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2480,6 +2480,7 @@ class SequenceLastInstanceLayer(LayerBase): active_type='linear', trans_type='non-seq', bias=False, + stride=-1, **xargs): super(SequenceLastInstanceLayer, self).__init__( name, @@ -2490,10 +2491,11 @@ class SequenceLastInstanceLayer(LayerBase): **xargs) config_assert( len(inputs) == 1, 'SequenceLastInstanceLayer must have 1 input') + if trans_type == 'seq': + config_assert(stride == -1, 'subseq do not support stride window') self.config.trans_type = trans_type - for input_index in xrange(len(self.inputs)): - input_layer = self.get_input_layer(input_index) - self.set_layer_size(input_layer.size) + self.config.seq_pool_stride = stride + self.set_layer_size(self.get_input_layer(0).size) self.create_bias_parameter(bias, self.config.size) @@ -2505,10 +2507,16 @@ class SequenceFirstInstanceLayer(SequenceLastInstanceLayer): active_type='linear', trans_type='non-seq', bias=False, + stride=-1, **xargs): super(SequenceFirstInstanceLayer, self).__init__( - name, inputs=inputs, active_type=active_type, bias=bias, **xargs) - self.config.trans_type = trans_type + name, + inputs=inputs, + active_type=active_type, + trans_type=trans_type, + bias=bias, + stride=stride, + **xargs) self.config.select_first = True diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index b006eb46d9..9e4ca5794b 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -1301,10 +1301,15 @@ def grumemory(input, def last_seq(input, name=None, agg_level=AggregateLevel.EACH_TIMESTEP, + stride=-1, layer_attr=None): """ Get Last Timestamp Activation of a sequence. + If stride > 0, get last timestamp upon a stride window of sequence. + And a long sequence will be shorten. Note that for sequence with + sub-sequence, stride is default -1 now. + The simple usage is: .. code-block:: python @@ -1316,6 +1321,8 @@ def last_seq(input, :type name: basestring :param input: Input layer name. :type input: LayerOutput + :param stride: parameter of stride window. + :type stride: Int :param layer_attr: extra layer attributes. :type layer_attr: ExtraLayerAttribute. :return: LayerOutput object. @@ -1327,11 +1334,15 @@ def last_seq(input, " series information at all. Maybe you want to use" " first_seq instead.") + if agg_level == AggregateLevel.EACH_SEQUENCE: + assert stride == -1 + Layer( name=name, type=LayerType.SEQUENCE_LAST_INSTANCE, inputs=[input.name], trans_type=agg_level, + stride=stride, **ExtraLayerAttribute.to_kwargs(layer_attr)) return LayerOutput( name, @@ -1345,10 +1356,16 @@ def last_seq(input, def first_seq(input, name=None, agg_level=AggregateLevel.EACH_TIMESTEP, + stride=-1, layer_attr=None): """ Get First Timestamp Activation of a sequence. + If stride > 0, get first timestamp upon a stride window of sequence, + and a long sequence will be shorten. Note that for sequence with + sub-sequence, stride is default -1 now. + + The simple usage is: .. code-block:: python @@ -1372,11 +1389,15 @@ def first_seq(input, ' time series information at all. Maybe you want to use' ' last_seq instead.') + if agg_level == AggregateLevel.EACH_SEQUENCE: + assert stride == -1 + Layer( name=name, type=LayerType.SEQUENCE_FIRST_INSTANCE, inputs=[input.name], trans_type=agg_level, + stride=stride, **ExtraLayerAttribute.to_kwargs(layer_attr)) return LayerOutput( name, diff --git a/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py b/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py index 3a1a0132b6..3c6dbc95e5 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py +++ b/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py @@ -14,4 +14,7 @@ for op in seq_op: for al in agg_level: opts.append(op(input=din, agg_level=al)) +for op in seq_op: + opts.append(op(input=din, agg_level=AggregateLevel.EACH_TIMESTEP, stride=5)) + outputs(opts) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr index 7b2911f8e3..12b2255f3a 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr @@ -15,6 +15,7 @@ layers { } select_first: true trans_type: "seq" + seq_pool_stride: -1 } layers { name: "__first_seq_1__" @@ -26,6 +27,7 @@ layers { } select_first: true trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__last_seq_0__" @@ -36,6 +38,7 @@ layers { input_layer_name: "data" } trans_type: "seq" + seq_pool_stride: -1 } layers { name: "__last_seq_1__" @@ -46,12 +49,38 @@ layers { input_layer_name: "data" } trans_type: "non-seq" + seq_pool_stride: -1 +} +layers { + name: "__first_seq_2__" + type: "seqlastins" + size: 30 + active_type: "linear" + inputs { + input_layer_name: "data" + } + select_first: true + trans_type: "non-seq" + seq_pool_stride: 5 +} +layers { + name: "__last_seq_2__" + type: "seqlastins" + size: 30 + active_type: "linear" + inputs { + input_layer_name: "data" + } + trans_type: "non-seq" + seq_pool_stride: 5 } input_layer_names: "data" output_layer_names: "__first_seq_0__" output_layer_names: "__first_seq_1__" output_layer_names: "__last_seq_0__" output_layer_names: "__last_seq_1__" +output_layer_names: "__first_seq_2__" +output_layer_names: "__last_seq_2__" sub_models { name: "root" layer_names: "data" @@ -59,11 +88,15 @@ sub_models { layer_names: "__first_seq_1__" layer_names: "__last_seq_0__" layer_names: "__last_seq_1__" + layer_names: "__first_seq_2__" + layer_names: "__last_seq_2__" input_layer_names: "data" output_layer_names: "__first_seq_0__" output_layer_names: "__first_seq_1__" output_layer_names: "__last_seq_0__" output_layer_names: "__last_seq_1__" + output_layer_names: "__first_seq_2__" + output_layer_names: "__last_seq_2__" is_recurrent_layer_group: false } diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr index b6905824f0..64530146a1 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr @@ -128,6 +128,7 @@ layers { input_layer_name: "__simple_gru_0__" } trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__last_seq_1__" @@ -138,6 +139,7 @@ layers { input_layer_name: "__simple_gru_1__" } trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__fc_layer_0__" diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr index 0a83499b72..79fa4c74f0 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr @@ -210,6 +210,7 @@ layers { input_layer_name: "__lstm_group_0__" } trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__last_seq_1__" @@ -220,6 +221,7 @@ layers { input_layer_name: "__lstm_group_1__" } trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__fc_layer_0__" diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr index dacb40185f..68fa881b4f 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr @@ -143,6 +143,7 @@ layers { input_layer_name: "__recurrent_layer_0__" } trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__first_seq_0__" @@ -154,6 +155,7 @@ layers { } select_first: true trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__last_seq_1__" @@ -164,6 +166,7 @@ layers { input_layer_name: "__lstmemory_0__" } trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__first_seq_1__" @@ -175,6 +178,7 @@ layers { } select_first: true trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__last_seq_2__" @@ -185,6 +189,7 @@ layers { input_layer_name: "__gru_0__" } trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__first_seq_2__" @@ -196,6 +201,7 @@ layers { } select_first: true trans_type: "non-seq" + seq_pool_stride: -1 } parameters { name: "___fc_layer_0__.w0" diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr index a0fb729e06..77b447aa9d 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr @@ -96,6 +96,7 @@ layers { input_layer_name: "rnn_forward" } trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__recurrent_group_1__" @@ -145,6 +146,7 @@ layers { } select_first: true trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__recurrent_group_2__" @@ -193,6 +195,7 @@ layers { input_layer_name: "rnn_subseq_forward" } trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__lstm_group_0___recurrent_group" @@ -282,6 +285,7 @@ layers { input_layer_name: "__lstm_group_0__" } trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__gru_group_0___recurrent_group" @@ -330,6 +334,7 @@ layers { input_layer_name: "__gru_group_0__" } trans_type: "non-seq" + seq_pool_stride: -1 } layers { name: "__recurrent_group_3__" @@ -378,6 +383,7 @@ layers { input_layer_name: "__fc_layer_0__" } trans_type: "non-seq" + seq_pool_stride: -1 } parameters { name: "___mixed_0__.w0" -- GitLab