提交 9a9de924 编写于 作者: X xuwei06 提交者: Yu Yang

Correctly handle memory in RecurrentGradientMachine for hirarchical RNN

Change-Id: I8e0a8ea6fc2760652d9c76440a539c90860062d3
上级 699d5f26
...@@ -519,7 +519,6 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs, ...@@ -519,7 +519,6 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
dynamic_cast<GatherAgentLayer*>(outFrameLine.agentLayer.get()); dynamic_cast<GatherAgentLayer*>(outFrameLine.agentLayer.get());
gatherAgent->addRealLayer(outFrameLine.frames[i]); gatherAgent->addRealLayer(outFrameLine.frames[i]);
} }
// connect memory links // connect memory links
// Adopt info_[0].idIndex because seq which has_subseq=True // Adopt info_[0].idIndex because seq which has_subseq=True
// doesn't support Memory with !hasSubseq bootlayer; // doesn't support Memory with !hasSubseq bootlayer;
...@@ -529,7 +528,7 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs, ...@@ -529,7 +528,7 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
NeuralNetwork::connect( NeuralNetwork::connect(
memoryFrameLine.agents[i], memoryFrameLine.agents[i],
i == 0 ? memoryFrameLine.bootLayer : memoryFrameLine.frames[i - 1], i == 0 ? memoryFrameLine.bootLayer : memoryFrameLine.frames[i - 1],
idSize /*height of agent*/); numSeqs_[i] /*height of agent*/);
} }
} }
...@@ -622,6 +621,8 @@ void RecurrentGradientMachine::createInFrameInfo(int inlinks_id, ...@@ -622,6 +621,8 @@ void RecurrentGradientMachine::createInFrameInfo(int inlinks_id,
// numSequences: # samples(sequences) in a batch // numSequences: # samples(sequences) in a batch
size_t numSequences = input.getNumSequences(); size_t numSequences = input.getNumSequences();
std::vector<int> allIds; std::vector<int> allIds;
numSeqs_.clear();
Info* inlink_info = &info_[inlinks_id]; Info* inlink_info = &info_[inlinks_id];
inlink_info->idIndex.clear(); inlink_info->idIndex.clear();
inlink_info->idIndex.push_back(0); // first idIndex = 0 inlink_info->idIndex.push_back(0); // first idIndex = 0
...@@ -634,10 +635,12 @@ void RecurrentGradientMachine::createInFrameInfo(int inlinks_id, ...@@ -634,10 +635,12 @@ void RecurrentGradientMachine::createInFrameInfo(int inlinks_id,
// maxSequenceLength_: max number of sentences(subseq) in allsamples // maxSequenceLength_: max number of sentences(subseq) in allsamples
for (int i = 0; i < maxSequenceLength_; ++i) { for (int i = 0; i < maxSequenceLength_; ++i) {
sequenceStartPositions.push_back(0); // first element = 0 sequenceStartPositions.push_back(0); // first element = 0
int numSeqs = 0;
for (size_t j = 0; j < numSubSequences; ++j) { // for each sentence for (size_t j = 0; j < numSubSequences; ++j) { // for each sentence
// seqLengthAndStart_[inlinks_id][j]: // seqLengthAndStart_[inlinks_id][j]:
// a 4-tuple including <subseqlen, subseqstart, seqid, subseqid> // a 4-tuple including <subseqlen, subseqstart, seqid, subseqid>
if (std::get<3>(seqLengthAndStart_[inlinks_id][j]) == i) { if (std::get<3>(seqLengthAndStart_[inlinks_id][j]) == i) {
++numSeqs;
// subseqstart: the cpuSubSequenceStartPositions of this subseq // subseqstart: the cpuSubSequenceStartPositions of this subseq
int subSeqStart = std::get<1>(seqLengthAndStart_[inlinks_id][j]); int subSeqStart = std::get<1>(seqLengthAndStart_[inlinks_id][j]);
int subSeqLength = std::get<0>(seqLengthAndStart_[inlinks_id][j]); int subSeqLength = std::get<0>(seqLengthAndStart_[inlinks_id][j]);
...@@ -650,6 +653,7 @@ void RecurrentGradientMachine::createInFrameInfo(int inlinks_id, ...@@ -650,6 +653,7 @@ void RecurrentGradientMachine::createInFrameInfo(int inlinks_id,
} }
inlink_info->idIndex.push_back(allIds.size()); inlink_info->idIndex.push_back(allIds.size());
inlink_info->seqStartPosIndex.push_back(sequenceStartPositions.size()); inlink_info->seqStartPosIndex.push_back(sequenceStartPositions.size());
numSeqs_.push_back(numSeqs);
} }
// inFrameLine create sequenceStartPositions one time // inFrameLine create sequenceStartPositions one time
CHECK_EQ(sequenceStartPositions.size(), CHECK_EQ(sequenceStartPositions.size(),
...@@ -659,16 +663,19 @@ void RecurrentGradientMachine::createInFrameInfo(int inlinks_id, ...@@ -659,16 +663,19 @@ void RecurrentGradientMachine::createInFrameInfo(int inlinks_id,
createSeqPos(sequenceStartPositions, &inlink_info->sequenceStartPositions); createSeqPos(sequenceStartPositions, &inlink_info->sequenceStartPositions);
} else { // for scatterAgentLayer } else { // for scatterAgentLayer
for (int i = 0; i < maxSequenceLength_; ++i) { for (int i = 0; i < maxSequenceLength_; ++i) {
int numSeqs = 0;
for (size_t j = 0; j < numSequences; ++j) { for (size_t j = 0; j < numSequences; ++j) {
int seqLength = std::get<0>(seqLengthAndStart_[inlinks_id][j]); int seqLength = std::get<0>(seqLengthAndStart_[inlinks_id][j]);
if (i >= seqLength) { if (i >= seqLength) {
break; break;
} }
++numSeqs;
int seqStart = std::get<1>(seqLengthAndStart_[inlinks_id][j]); int seqStart = std::get<1>(seqLengthAndStart_[inlinks_id][j]);
allIds.push_back(reversed_ ? (seqStart + seqLength - 1 - i) allIds.push_back(reversed_ ? (seqStart + seqLength - 1 - i)
: (seqStart + i)); : (seqStart + i));
} }
inlink_info->idIndex.push_back(allIds.size()); inlink_info->idIndex.push_back(allIds.size());
numSeqs_.push_back(numSeqs);
} }
} }
......
...@@ -333,6 +333,10 @@ protected: ...@@ -333,6 +333,10 @@ protected:
}; };
std::vector<Info> info_; std::vector<Info> info_;
// numSeqs_[i] is the number sequences which is longer than i (for sequence
// data) or has more than i subsequences (for subsequence data)
std::vector<int> numSeqs_;
// each inlinks has a "std::vector<std::tuple<int, int, int, int>>" denotes // each inlinks has a "std::vector<std::tuple<int, int, int, int>>" denotes
// its sequence info: // its sequence info:
// if hasSubSeq, tuple of (subSeqLength, subSeqStart, seqIndex, subSeqIndex) // if hasSubSeq, tuple of (subSeqLength, subSeqStart, seqIndex, subSeqIndex)
......
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer.PyDataProvider2 import *
data = [
[[[1, 3, 2], [4, 5, 2]], 0],
[[[0, 2], [2, 5], [0, 1, 2]], 1],
]
@provider(input_types=[integer_value_sub_sequence(10),
integer_value(2)])
def process_subseq(settings, file_name):
for d in data:
yield d
@provider(input_types=[integer_value_sequence(10),
integer_value(2)])
def process_seq(settings, file_name):
for d in data:
seq = []
for subseq in d[0]:
seq += subseq
yield seq, d[1]
#!/usr/bin/env python
#coding=utf-8
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved # Copyright (c) 2016 Baidu, Inc. All Rights Reserved
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
......
#edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
######################## data source ################################
define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list',
test_list=None,
module='rnn_data_provider',
obj='process_subseq')
settings(batch_size=2, learning_rate=0.01)
######################## network configure ################################
dict_dim = 10
word_dim = 8
hidden_dim = 8
label_dim = 3
data = data_layer(name="word", size=dict_dim)
emb = embedding_layer(input=data, size=word_dim)
# This hierachical RNN is designed to be equivalent to the simple RNN in
# sequence_rnn.conf
def outer_step(x):
outer_mem = memory(name="outer_rnn_state", size=hidden_dim)
def inner_step(y):
inner_mem = memory(name="inner_rnn_state",
size=hidden_dim,
boot_layer=outer_mem)
return fc_layer(input=[y, inner_mem],
size=hidden_dim,
act=TanhActivation(),
bias_attr=True,
name="inner_rnn_state")
inner_rnn_output = recurrent_group(
step=inner_step,
input=x)
last = last_seq(input=inner_rnn_output, name="outer_rnn_state")
# "return last" should also work. But currently RecurrentGradientMachine
# does not handle it correctly. Current implementation requires that
# all the out links are from sequences. However, it does not report error
# when the out links are not sequences.
return inner_rnn_output
out = recurrent_group(
step=outer_step,
input=SubsequenceInput(emb))
value_printer_evaluator(input=out)
rep = last_seq(input=out)
prob = fc_layer(size=label_dim,
input=rep,
act=SoftmaxActivation(),
bias_attr=True)
outputs(classification_cost(input=prob,
label=data_layer(name="label", size=label_dim)))
#edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
######################## data source ################################
define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list',
test_list=None,
module='rnn_data_provider',
obj='process_seq')
settings(batch_size=2, learning_rate=0.01)
######################## network configure ################################
dict_dim = 10
word_dim = 8
hidden_dim = 8
label_dim = 3
data = data_layer(name="word", size=dict_dim)
emb = embedding_layer(input=data, size=word_dim)
def step(y):
mem = memory(name="rnn_state", size=hidden_dim)
return fc_layer(input=[y, mem],
size=hidden_dim,
act=TanhActivation(),
bias_attr=True,
name="rnn_state")
out = recurrent_group(
step=step,
input=emb)
value_printer_evaluator(input=out)
rep = last_seq(input=out)
prob = fc_layer(size=label_dim,
input=rep,
act=SoftmaxActivation(),
bias_attr=True)
outputs(classification_cost(input=prob,
label=data_layer(name="label", size=label_dim)))
...@@ -21,6 +21,8 @@ limitations under the License. */ ...@@ -21,6 +21,8 @@ limitations under the License. */
#include <paddle/trainer/TrainerInternal.h> #include <paddle/trainer/TrainerInternal.h>
#include <paddle/gserver/gradientmachines/GradientMachine.h> #include <paddle/gserver/gradientmachines/GradientMachine.h>
P_DECLARE_int32(seed);
using namespace paddle; // NOLINT using namespace paddle; // NOLINT
using namespace std; // NOLINT using namespace std; // NOLINT
class TrainerForTest : public paddle::Trainer { class TrainerForTest : public paddle::Trainer {
...@@ -68,7 +70,9 @@ void CalCost(const string& conf, const string& dir, real* cost, ...@@ -68,7 +70,9 @@ void CalCost(const string& conf, const string& dir, real* cost,
CpuVector vecMomentum(dim); CpuVector vecMomentum(dim);
// vecW needs to be assigned, otherwise the variable is an uncertain value. // vecW needs to be assigned, otherwise the variable is an uncertain value.
vecW.zeroMem();
*ThreadLocalRand::getSeed() = FLAGS_seed;
vecW.randnorm(0, 0.1);
trainer.startTrain(); trainer.startTrain();
for (int i = 0; i < num_passes; ++i) { for (int i = 0; i < num_passes; ++i) {
...@@ -88,15 +92,13 @@ void CalCost(const string& conf, const string& dir, real* cost, ...@@ -88,15 +92,13 @@ void CalCost(const string& conf, const string& dir, real* cost,
rmDir(dir.c_str()); rmDir(dir.c_str());
} }
TEST(RecurrentGradientMachine, HasSubSequence) { void test(const string& conf1, const string& conf2) {
int num_passes = 5; int num_passes = 5;
real* cost1 = new real[num_passes]; real* cost1 = new real[num_passes];
const string conf1 = "gserver/tests/sequence_layer_group.conf";
const string dir1 = "gserver/tests/t1"; const string dir1 = "gserver/tests/t1";
CalCost(conf1, dir1, cost1, num_passes); CalCost(conf1, dir1, cost1, num_passes);
real* cost2 = new real[num_passes]; real* cost2 = new real[num_passes];
const string conf2 = "gserver/tests/sequence_nest_layer_group.conf";
const string dir2 = "gserver/tests/t2"; const string dir2 = "gserver/tests/t2";
CalCost(conf2, dir2, cost2, num_passes); CalCost(conf2, dir2, cost2, num_passes);
...@@ -109,6 +111,17 @@ TEST(RecurrentGradientMachine, HasSubSequence) { ...@@ -109,6 +111,17 @@ TEST(RecurrentGradientMachine, HasSubSequence) {
delete[] cost2; delete[] cost2;
} }
TEST(RecurrentGradientMachine, HasSubSequence) {
test("gserver/tests/sequence_layer_group.conf",
"gserver/tests/sequence_nest_layer_group.conf");
}
TEST(RecurrentGradientMachine, rnn) {
test("gserver/tests/sequence_rnn.conf",
"gserver/tests/sequence_nest_rnn.conf");
}
int main(int argc, char** argv) { int main(int argc, char** argv) {
if (paddle::version::isWithPyDataProvider()) { if (paddle::version::isWithPyDataProvider()) {
if (!paddle::version::isWithGpu()) { if (!paddle::version::isWithGpu()) {
......
...@@ -255,6 +255,15 @@ struct Argument { ...@@ -255,6 +255,15 @@ struct Argument {
/* /*
Get Sequence Length, startPositions and max Length according to input Get Sequence Length, startPositions and max Length according to input
1. For sequence data:
Each tuple is (seq_length, seq_start, seq_id, seq_id)
The tuples are sorted according to seq_length or subseq_length
*maxSequenceLength is the maximal sequence length
2. For subsequence data:
Each tuple is (subseq_length, subseq_start, seq_id, subseq_id)
The tuples are not sorted. They are in the original order.
*maxSequenceLenth is the maximal number of subsequences in each sequence.
*/ */
void getSeqLengthAndStart( void getSeqLengthAndStart(
std::vector<std::tuple<int, int, int, int>>* seqLengthAndStart, std::vector<std::tuple<int, int, int, int>>* seqLengthAndStart,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册