From 1c2ebe467bb3c88b858f5b583ab48c8944c7f0f2 Mon Sep 17 00:00:00 2001 From: Zrachel Date: Sat, 8 Oct 2016 11:28:57 +0800 Subject: [PATCH] add interface and test of RecurrentGradientMachine (#156) * add interface and unittest of RecurrentGradientMachine for the function of multiple Subsequence inlinks with unequal token length --- paddle/gserver/tests/rnn_data_provider.py | 30 ++++- ...ce_nest_rnn_multi_unequalength_inputs.conf | 106 ++++++++++++++++++ ...equence_rnn_multi_unequalength_inputs.conf | 75 +++++++++++++ .../tests/test_RecurrentGradientMachine.cpp | 9 ++ .../paddle/trainer_config_helpers/layers.py | 30 ++++- 5 files changed, 246 insertions(+), 4 deletions(-) create mode 100644 paddle/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.conf create mode 100644 paddle/gserver/tests/sequence_rnn_multi_unequalength_inputs.conf diff --git a/paddle/gserver/tests/rnn_data_provider.py b/paddle/gserver/tests/rnn_data_provider.py index 347d5891b90..5c3b062309c 100644 --- a/paddle/gserver/tests/rnn_data_provider.py +++ b/paddle/gserver/tests/rnn_data_provider.py @@ -21,7 +21,7 @@ data = [ @provider(input_types=[integer_value_sub_sequence(10), - integer_value(2)], + integer_value(3)], should_shuffle=False) def process_subseq(settings, file_name): for d in data: @@ -29,7 +29,7 @@ def process_subseq(settings, file_name): @provider(input_types=[integer_value_sequence(10), - integer_value(2)], + integer_value(3)], should_shuffle=False) def process_seq(settings, file_name): for d in data: @@ -37,3 +37,29 @@ def process_seq(settings, file_name): for subseq in d[0]: seq += subseq yield seq, d[1] + +data2 = [ + [[[1, 2], [4, 5, 2]], [[5, 4, 1], [3, 1]] ,0], + [[[0, 2], [2, 5], [0, 1, 2]],[[1, 5], [4], [2, 3, 6, 1]], 1], +] + +@provider(input_types=[integer_value_sub_sequence(10), + integer_value_sub_sequence(10), + integer_value(2)], + should_shuffle=False) +def process_unequalength_subseq(settings, file_name): + for d in data2: + yield d + + +@provider(input_types=[integer_value_sequence(10), + integer_value_sequence(10), + integer_value(2)], + should_shuffle=False) +def process_unequalength_seq(settings, file_name): + for d in data2: + words1=reduce(lambda x,y: x+y, d[0]) + words2=reduce(lambda x,y: x+y, d[1]) + yield words1, words2, d[2] + + diff --git a/paddle/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.conf b/paddle/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.conf new file mode 100644 index 00000000000..d0b9450f4b9 --- /dev/null +++ b/paddle/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.conf @@ -0,0 +1,106 @@ +#edit-mode: -*- python -*- +# Copyright (c) 2016 Baidu, Inc. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.trainer_config_helpers import * + +######################## data source ################################ +define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list', + test_list=None, + module='rnn_data_provider', + obj='process_unequalength_subseq') + + +settings(batch_size=2, learning_rate=0.01) +######################## network configure ################################ +dict_dim = 10 +word_dim = 8 +hidden_dim = 8 +label_dim = 2 + +speaker1 = data_layer(name="word1", size=dict_dim) +speaker2 = data_layer(name="word2", size=dict_dim) + +emb1 = embedding_layer(input=speaker1, size=word_dim) +emb2 = embedding_layer(input=speaker2, size=word_dim) + +# This hierachical RNN is designed to be equivalent to the simple RNN in +# sequence_rnn_multi_unequalength_inputs.conf + +def outer_step(x1, x2): + outer_mem1 = memory(name = "outer_rnn_state1", size = hidden_dim) + outer_mem2 = memory(name = "outer_rnn_state2", size = hidden_dim) + def inner_step1(y): + inner_mem = memory(name = 'inner_rnn_state_' + y.name, + size = hidden_dim, + boot_layer = outer_mem1) + out = fc_layer(input = [y, inner_mem], + size = hidden_dim, + act = TanhActivation(), + bias_attr = True, + name = 'inner_rnn_state_' + y.name) + return out + + def inner_step2(y): + inner_mem = memory(name = 'inner_rnn_state_' + y.name, + size = hidden_dim, + boot_layer = outer_mem2) + out = fc_layer(input = [y, inner_mem], + size = hidden_dim, + act = TanhActivation(), + bias_attr = True, + name = 'inner_rnn_state_' + y.name) + return out + + encoder1 = recurrent_group( + step = inner_step1, + name = 'inner1', + input = x1) + + encoder2 = recurrent_group( + step = inner_step2, + name = 'inner2', + input = x2) + + sentence_last_state1 = last_seq(input = encoder1, name = 'outer_rnn_state1') + sentence_last_state2_ = last_seq(input = encoder2, name = 'outer_rnn_state2') + + encoder1_expand = expand_layer(input = sentence_last_state1, + expand_as = encoder2) + + return [encoder1_expand, encoder2] + + +encoder1_rep, encoder2_rep = recurrent_group( + name="outer", + step=outer_step, + input=[SubsequenceInput(emb1), SubsequenceInput(emb2)], + targetInlink=emb2) + +encoder1_last = last_seq(input = encoder1_rep) +encoder1_expandlast = expand_layer(input = encoder1_last, + expand_as = encoder2_rep) +context = mixed_layer(input = [identity_projection(encoder1_expandlast), + identity_projection(encoder2_rep)], + size = hidden_dim) + +rep = last_seq(input=context) +prob = fc_layer(size=label_dim, + input=rep, + act=SoftmaxActivation(), + bias_attr=True) + +outputs(classification_cost(input=prob, + label=data_layer(name="label", size=label_dim))) + diff --git a/paddle/gserver/tests/sequence_rnn_multi_unequalength_inputs.conf b/paddle/gserver/tests/sequence_rnn_multi_unequalength_inputs.conf new file mode 100644 index 00000000000..28b1cb98cf1 --- /dev/null +++ b/paddle/gserver/tests/sequence_rnn_multi_unequalength_inputs.conf @@ -0,0 +1,75 @@ +#edit-mode: -*- python -*- +# Copyright (c) 2016 Baidu, Inc. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.trainer_config_helpers import * + +######################## data source ################################ +define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list', + test_list=None, + module='rnn_data_provider', + obj='process_unequalength_seq') + + +settings(batch_size=2, learning_rate=0.01) +######################## network configure ################################ +dict_dim = 10 +word_dim = 8 +hidden_dim = 8 +label_dim = 2 + +speaker1 = data_layer(name="word1", size=dict_dim) +speaker2 = data_layer(name="word2", size=dict_dim) + +emb1 = embedding_layer(input=speaker1, size=word_dim) +emb2 = embedding_layer(input=speaker2, size=word_dim) + +# This hierachical RNN is designed to be equivalent to the RNN in +# sequence_nest_rnn_multi_unequalength_inputs.conf + +def step(x1, x2): + def calrnn(y): + mem = memory(name = 'rnn_state_' + y.name, size = hidden_dim) + out = fc_layer(input = [y, mem], + size = hidden_dim, + act = TanhActivation(), + bias_attr = True, + name = 'rnn_state_' + y.name) + return out + + encoder1 = calrnn(x1) + encoder2 = calrnn(x2) + return [encoder1, encoder2] + +encoder1_rep, encoder2_rep = recurrent_group( + name="stepout", + step=step, + input=[emb1, emb2]) + +encoder1_last = last_seq(input = encoder1_rep) +encoder1_expandlast = expand_layer(input = encoder1_last, + expand_as = encoder2_rep) +context = mixed_layer(input = [identity_projection(encoder1_expandlast), + identity_projection(encoder2_rep)], + size = hidden_dim) + +rep = last_seq(input=context) +prob = fc_layer(size=label_dim, + input=rep, + act=SoftmaxActivation(), + bias_attr=True) + +outputs(classification_cost(input=prob, + label=data_layer(name="label", size=label_dim))) + diff --git a/paddle/gserver/tests/test_RecurrentGradientMachine.cpp b/paddle/gserver/tests/test_RecurrentGradientMachine.cpp index 550df0a3184..ae7f617371c 100644 --- a/paddle/gserver/tests/test_RecurrentGradientMachine.cpp +++ b/paddle/gserver/tests/test_RecurrentGradientMachine.cpp @@ -73,6 +73,7 @@ void CalCost(const string& conf, const string& dir, real* cost, *ThreadLocalRand::getSeed() = FLAGS_seed; vecW.randnorm(0, 0.1); + vecMomentum.randnorm(0, 0.1); trainer.startTrain(); for (int i = 0; i < num_passes; ++i) { @@ -140,6 +141,14 @@ TEST(RecurrentGradientMachine, rnn_multi_input) { } } +TEST(RecurrentGradientMachine, rnn_multi_unequalength_input) { + for (bool useGpu : {false, true}) { + test("gserver/tests/sequence_rnn_multi_unequalength_inputs.conf", + "gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.conf", + 1e-6, useGpu); + } +} + int main(int argc, char** argv) { if (paddle::version::isWithPyDataProvider()) { if (!paddle::version::isWithGpu()) { diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index c355dc042ac..47db197f422 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -2347,7 +2347,7 @@ class SubsequenceInput(object): @wrap_name_default("recurrent_group") -def recurrent_group(step, input, reverse=False, name=None): +def recurrent_group(step, input, reverse=False, name=None, targetInlink=None): """ Recurrent layer group is an extremely flexible recurrent unit in PaddlePaddle. As long as the user defines the calculation done within a @@ -2401,6 +2401,17 @@ def recurrent_group(step, input, reverse=False, name=None): :param reverse: If reverse is set true, the recurrent unit will process the input sequence in a reverse order. :type reverse: bool + + :param targetInlink: the input layer which share info with layer group's output + + Param input specifies multiple input layers. For + SubsequenceInput inputs, config should assign one input + layer that share info(the number of sentences and the number + of words in each sentence) with all layer group's outputs. + targetInlink should be one of the layer group's input. + + :type targetInlink: LayerOutput|SubsequenceInput + :return: LayerOutput object. :rtype: LayerOutput """ @@ -2419,6 +2430,20 @@ def recurrent_group(step, input, reverse=False, name=None): in_links = filter(is_in_links, input) + def targetInlink_in_inlinks(): + for inlink in in_links: + if isinstance(inlink, SubsequenceInput): + if targetInlink == inlink.input: + return True + elif targetInlink == inlink: + return True + return False + + assert(targetInlink == None or targetInlink_in_inlinks()) + targetInlinkName = None if targetInlink == None \ + else targetInlink.name if isinstance(targetInlink, LayerOutput) \ + else targetInlink.input.name + contains_sub_seq = [False] def map_in_links(x): @@ -2430,7 +2455,8 @@ def recurrent_group(step, input, reverse=False, name=None): RecurrentLayerGroupWithoutOutLinksBegin( name=name, in_links=map(map_in_links, in_links), - seq_reversed=reverse) + seq_reversed=reverse, + target_inlinkname=targetInlinkName) in_args = [] for each_input in input: assert is_single_input(each_input) -- GitLab