From 9a9de9240dc3fa03fcb8c0982b26d1b1a2ed01f2 Mon Sep 17 00:00:00 2001 From: xuwei06 Date: Fri, 9 Sep 2016 10:51:54 -0700 Subject: [PATCH] Correctly handle memory in RecurrentGradientMachine for hirarchical RNN Change-Id: I8e0a8ea6fc2760652d9c76440a539c90860062d3 --- .../RecurrentGradientMachine.cpp | 11 ++- .../RecurrentGradientMachine.h | 4 + paddle/gserver/tests/Sequence/dummy.list | 1 + paddle/gserver/tests/rnn_data_provider.py | 35 +++++++++ paddle/gserver/tests/sequenceGen.py | 3 - paddle/gserver/tests/sequence_nest_rnn.conf | 75 +++++++++++++++++++ paddle/gserver/tests/sequence_rnn.conf | 57 ++++++++++++++ .../tests/test_RecurrentGradientMachine.cpp | 21 +++++- paddle/parameter/Argument.h | 9 +++ 9 files changed, 207 insertions(+), 9 deletions(-) create mode 100644 paddle/gserver/tests/Sequence/dummy.list create mode 100644 paddle/gserver/tests/rnn_data_provider.py create mode 100644 paddle/gserver/tests/sequence_nest_rnn.conf create mode 100644 paddle/gserver/tests/sequence_rnn.conf diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp index e000bb2e5..96b0e1988 100644 --- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp @@ -519,7 +519,6 @@ void RecurrentGradientMachine::forward(const std::vector& inArgs, dynamic_cast(outFrameLine.agentLayer.get()); gatherAgent->addRealLayer(outFrameLine.frames[i]); } - // connect memory links // Adopt info_[0].idIndex because seq which has_subseq=True // doesn't support Memory with !hasSubseq bootlayer; @@ -529,7 +528,7 @@ void RecurrentGradientMachine::forward(const std::vector& inArgs, NeuralNetwork::connect( memoryFrameLine.agents[i], i == 0 ? memoryFrameLine.bootLayer : memoryFrameLine.frames[i - 1], - idSize /*height of agent*/); + numSeqs_[i] /*height of agent*/); } } @@ -622,6 +621,8 @@ void RecurrentGradientMachine::createInFrameInfo(int inlinks_id, // numSequences: # samples(sequences) in a batch size_t numSequences = input.getNumSequences(); std::vector allIds; + + numSeqs_.clear(); Info* inlink_info = &info_[inlinks_id]; inlink_info->idIndex.clear(); inlink_info->idIndex.push_back(0); // first idIndex = 0 @@ -634,10 +635,12 @@ void RecurrentGradientMachine::createInFrameInfo(int inlinks_id, // maxSequenceLength_: max number of sentences(subseq) in allsamples for (int i = 0; i < maxSequenceLength_; ++i) { sequenceStartPositions.push_back(0); // first element = 0 + int numSeqs = 0; for (size_t j = 0; j < numSubSequences; ++j) { // for each sentence // seqLengthAndStart_[inlinks_id][j]: // a 4-tuple including if (std::get<3>(seqLengthAndStart_[inlinks_id][j]) == i) { + ++numSeqs; // subseqstart: the cpuSubSequenceStartPositions of this subseq int subSeqStart = std::get<1>(seqLengthAndStart_[inlinks_id][j]); int subSeqLength = std::get<0>(seqLengthAndStart_[inlinks_id][j]); @@ -650,6 +653,7 @@ void RecurrentGradientMachine::createInFrameInfo(int inlinks_id, } inlink_info->idIndex.push_back(allIds.size()); inlink_info->seqStartPosIndex.push_back(sequenceStartPositions.size()); + numSeqs_.push_back(numSeqs); } // inFrameLine create sequenceStartPositions one time CHECK_EQ(sequenceStartPositions.size(), @@ -659,16 +663,19 @@ void RecurrentGradientMachine::createInFrameInfo(int inlinks_id, createSeqPos(sequenceStartPositions, &inlink_info->sequenceStartPositions); } else { // for scatterAgentLayer for (int i = 0; i < maxSequenceLength_; ++i) { + int numSeqs = 0; for (size_t j = 0; j < numSequences; ++j) { int seqLength = std::get<0>(seqLengthAndStart_[inlinks_id][j]); if (i >= seqLength) { break; } + ++numSeqs; int seqStart = std::get<1>(seqLengthAndStart_[inlinks_id][j]); allIds.push_back(reversed_ ? (seqStart + seqLength - 1 - i) : (seqStart + i)); } inlink_info->idIndex.push_back(allIds.size()); + numSeqs_.push_back(numSeqs); } } diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h index 4ca545b50..d9901ad81 100644 --- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h +++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h @@ -333,6 +333,10 @@ protected: }; std::vector info_; + // numSeqs_[i] is the number sequences which is longer than i (for sequence + // data) or has more than i subsequences (for subsequence data) + std::vector numSeqs_; + // each inlinks has a "std::vector>" denotes // its sequence info: // if hasSubSeq, tuple of (subSeqLength, subSeqStart, seqIndex, subSeqIndex) diff --git a/paddle/gserver/tests/Sequence/dummy.list b/paddle/gserver/tests/Sequence/dummy.list new file mode 100644 index 000000000..0e52665e1 --- /dev/null +++ b/paddle/gserver/tests/Sequence/dummy.list @@ -0,0 +1 @@ +dummy_file_no_use diff --git a/paddle/gserver/tests/rnn_data_provider.py b/paddle/gserver/tests/rnn_data_provider.py new file mode 100644 index 000000000..85a83554c --- /dev/null +++ b/paddle/gserver/tests/rnn_data_provider.py @@ -0,0 +1,35 @@ +# Copyright (c) 2016 Baidu, Inc. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.trainer.PyDataProvider2 import * + +data = [ + [[[1, 3, 2], [4, 5, 2]], 0], + [[[0, 2], [2, 5], [0, 1, 2]], 1], +] + +@provider(input_types=[integer_value_sub_sequence(10), + integer_value(2)]) +def process_subseq(settings, file_name): + for d in data: + yield d + +@provider(input_types=[integer_value_sequence(10), + integer_value(2)]) +def process_seq(settings, file_name): + for d in data: + seq = [] + for subseq in d[0]: + seq += subseq + yield seq, d[1] diff --git a/paddle/gserver/tests/sequenceGen.py b/paddle/gserver/tests/sequenceGen.py index e4727e472..cb83d79d7 100644 --- a/paddle/gserver/tests/sequenceGen.py +++ b/paddle/gserver/tests/sequenceGen.py @@ -1,6 +1,3 @@ -#!/usr/bin/env python -#coding=utf-8 - # Copyright (c) 2016 Baidu, Inc. All Rights Reserved # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/paddle/gserver/tests/sequence_nest_rnn.conf b/paddle/gserver/tests/sequence_nest_rnn.conf new file mode 100644 index 000000000..03eef7a21 --- /dev/null +++ b/paddle/gserver/tests/sequence_nest_rnn.conf @@ -0,0 +1,75 @@ +#edit-mode: -*- python -*- +# Copyright (c) 2016 Baidu, Inc. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.trainer_config_helpers import * + +######################## data source ################################ +define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list', + test_list=None, + module='rnn_data_provider', + obj='process_subseq') + + +settings(batch_size=2, learning_rate=0.01) +######################## network configure ################################ +dict_dim = 10 +word_dim = 8 +hidden_dim = 8 +label_dim = 3 + +data = data_layer(name="word", size=dict_dim) + +emb = embedding_layer(input=data, size=word_dim) + +# This hierachical RNN is designed to be equivalent to the simple RNN in +# sequence_rnn.conf + +def outer_step(x): + outer_mem = memory(name="outer_rnn_state", size=hidden_dim) + def inner_step(y): + inner_mem = memory(name="inner_rnn_state", + size=hidden_dim, + boot_layer=outer_mem) + return fc_layer(input=[y, inner_mem], + size=hidden_dim, + act=TanhActivation(), + bias_attr=True, + name="inner_rnn_state") + + inner_rnn_output = recurrent_group( + step=inner_step, + input=x) + last = last_seq(input=inner_rnn_output, name="outer_rnn_state") + + # "return last" should also work. But currently RecurrentGradientMachine + # does not handle it correctly. Current implementation requires that + # all the out links are from sequences. However, it does not report error + # when the out links are not sequences. + return inner_rnn_output + +out = recurrent_group( + step=outer_step, + input=SubsequenceInput(emb)) + +value_printer_evaluator(input=out) + +rep = last_seq(input=out) +prob = fc_layer(size=label_dim, + input=rep, + act=SoftmaxActivation(), + bias_attr=True) + +outputs(classification_cost(input=prob, + label=data_layer(name="label", size=label_dim))) diff --git a/paddle/gserver/tests/sequence_rnn.conf b/paddle/gserver/tests/sequence_rnn.conf new file mode 100644 index 000000000..73e7a5935 --- /dev/null +++ b/paddle/gserver/tests/sequence_rnn.conf @@ -0,0 +1,57 @@ +#edit-mode: -*- python -*- +# Copyright (c) 2016 Baidu, Inc. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.trainer_config_helpers import * + +######################## data source ################################ +define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list', + test_list=None, + module='rnn_data_provider', + obj='process_seq') + + +settings(batch_size=2, learning_rate=0.01) +######################## network configure ################################ +dict_dim = 10 +word_dim = 8 +hidden_dim = 8 +label_dim = 3 + +data = data_layer(name="word", size=dict_dim) + +emb = embedding_layer(input=data, size=word_dim) + +def step(y): + mem = memory(name="rnn_state", size=hidden_dim) + return fc_layer(input=[y, mem], + size=hidden_dim, + act=TanhActivation(), + bias_attr=True, + name="rnn_state") + +out = recurrent_group( + step=step, + input=emb) + +value_printer_evaluator(input=out) + +rep = last_seq(input=out) +prob = fc_layer(size=label_dim, + input=rep, + act=SoftmaxActivation(), + bias_attr=True) + +outputs(classification_cost(input=prob, + label=data_layer(name="label", size=label_dim))) diff --git a/paddle/gserver/tests/test_RecurrentGradientMachine.cpp b/paddle/gserver/tests/test_RecurrentGradientMachine.cpp index 35d6ee7f4..f6989e9a6 100644 --- a/paddle/gserver/tests/test_RecurrentGradientMachine.cpp +++ b/paddle/gserver/tests/test_RecurrentGradientMachine.cpp @@ -21,6 +21,8 @@ limitations under the License. */ #include #include +P_DECLARE_int32(seed); + using namespace paddle; // NOLINT using namespace std; // NOLINT class TrainerForTest : public paddle::Trainer { @@ -68,7 +70,9 @@ void CalCost(const string& conf, const string& dir, real* cost, CpuVector vecMomentum(dim); // vecW needs to be assigned, otherwise the variable is an uncertain value. - vecW.zeroMem(); + + *ThreadLocalRand::getSeed() = FLAGS_seed; + vecW.randnorm(0, 0.1); trainer.startTrain(); for (int i = 0; i < num_passes; ++i) { @@ -88,15 +92,13 @@ void CalCost(const string& conf, const string& dir, real* cost, rmDir(dir.c_str()); } -TEST(RecurrentGradientMachine, HasSubSequence) { +void test(const string& conf1, const string& conf2) { int num_passes = 5; real* cost1 = new real[num_passes]; - const string conf1 = "gserver/tests/sequence_layer_group.conf"; const string dir1 = "gserver/tests/t1"; CalCost(conf1, dir1, cost1, num_passes); real* cost2 = new real[num_passes]; - const string conf2 = "gserver/tests/sequence_nest_layer_group.conf"; const string dir2 = "gserver/tests/t2"; CalCost(conf2, dir2, cost2, num_passes); @@ -109,6 +111,17 @@ TEST(RecurrentGradientMachine, HasSubSequence) { delete[] cost2; } +TEST(RecurrentGradientMachine, HasSubSequence) { + test("gserver/tests/sequence_layer_group.conf", + "gserver/tests/sequence_nest_layer_group.conf"); +} + +TEST(RecurrentGradientMachine, rnn) { + test("gserver/tests/sequence_rnn.conf", + "gserver/tests/sequence_nest_rnn.conf"); +} + + int main(int argc, char** argv) { if (paddle::version::isWithPyDataProvider()) { if (!paddle::version::isWithGpu()) { diff --git a/paddle/parameter/Argument.h b/paddle/parameter/Argument.h index 3cab87c70..5474a05b8 100644 --- a/paddle/parameter/Argument.h +++ b/paddle/parameter/Argument.h @@ -255,6 +255,15 @@ struct Argument { /* Get Sequence Length, startPositions and max Length according to input + 1. For sequence data: + Each tuple is (seq_length, seq_start, seq_id, seq_id) + The tuples are sorted according to seq_length or subseq_length + *maxSequenceLength is the maximal sequence length + + 2. For subsequence data: + Each tuple is (subseq_length, subseq_start, seq_id, subseq_id) + The tuples are not sorted. They are in the original order. + *maxSequenceLenth is the maximal number of subsequences in each sequence. */ void getSeqLengthAndStart( std::vector>* seqLengthAndStart, -- GitLab