add interface and test of RecurrentGradientMachine (#156)

* add interface and unittest of RecurrentGradientMachine for the function of multiple Subsequence inlinks with unequal token length

add interface and test of RecurrentGradientMachine (#156)
* add interface and unittest of RecurrentGradientMachine for the function of multiple Subsequence inlinks with unequal token length
1c2ebe46 · Zrachel · luotao1 · 1c09e9d5 · 1c2ebe46 · 1c2ebe46
5 changed file
--- a/paddle/gserver/tests/rnn_data_provider.py
+++ b/paddle/gserver/tests/rnn_data_provider.py
@@ -21,7 +21,7 @@ data = [


 @provider(input_types=[integer_value_sub_sequence(10),
-                       integer_value(2)],
+                       integer_value(3)],
          should_shuffle=False)
 def process_subseq(settings, file_name):
    for d in data:
@@ -29,7 +29,7 @@ def process_subseq(settings, file_name):


 @provider(input_types=[integer_value_sequence(10),
-                       integer_value(2)],
+                       integer_value(3)],
          should_shuffle=False)
 def process_seq(settings, file_name):
    for d in data:
@@ -37,3 +37,29 @@ def process_seq(settings, file_name):
        for subseq in d[0]:
            seq += subseq
        yield seq, d[1]
+
+data2 = [
+    [[[1, 2], [4, 5, 2]], [[5, 4, 1], [3, 1]] ,0],
+    [[[0, 2], [2, 5], [0, 1, 2]],[[1, 5], [4], [2, 3, 6, 1]], 1],
+]
+
+@provider(input_types=[integer_value_sub_sequence(10),
+                       integer_value_sub_sequence(10),
+                       integer_value(2)],
+          should_shuffle=False)
+def process_unequalength_subseq(settings, file_name):
+    for d in data2:
+        yield d
+
+
+@provider(input_types=[integer_value_sequence(10),
+                       integer_value_sequence(10),
+                       integer_value(2)],
+          should_shuffle=False)
+def process_unequalength_seq(settings, file_name):
+    for d in data2:
+        words1=reduce(lambda x,y: x+y, d[0])
+        words2=reduce(lambda x,y: x+y, d[1])
+        yield words1, words2, d[2]
+
+
--- a/paddle/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.conf
+++ b/paddle/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.conf
+#edit-mode: -*- python -*-
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle.trainer_config_helpers import *
+
+######################## data source ################################
+define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list',
+                        test_list=None,
+                        module='rnn_data_provider',
+                        obj='process_unequalength_subseq')
+
+
+settings(batch_size=2, learning_rate=0.01)
+######################## network configure ################################
+dict_dim = 10
+word_dim = 8
+hidden_dim = 8
+label_dim = 2
+
+speaker1 = data_layer(name="word1", size=dict_dim)
+speaker2 = data_layer(name="word2", size=dict_dim)
+
+emb1 = embedding_layer(input=speaker1, size=word_dim)
+emb2 = embedding_layer(input=speaker2, size=word_dim)
+
+# This hierachical RNN is designed to be equivalent to the simple RNN in
+# sequence_rnn_multi_unequalength_inputs.conf
+
+def outer_step(x1, x2):
+    outer_mem1 = memory(name = "outer_rnn_state1", size = hidden_dim)
+    outer_mem2 = memory(name = "outer_rnn_state2", size = hidden_dim)
+    def inner_step1(y):
+        inner_mem = memory(name = 'inner_rnn_state_' + y.name,
+                           size = hidden_dim,
+                           boot_layer = outer_mem1)
+        out = fc_layer(input = [y, inner_mem],
+                       size = hidden_dim,
+                       act = TanhActivation(),
+                       bias_attr = True,
+                       name = 'inner_rnn_state_' + y.name)
+        return out
+
+    def inner_step2(y):
+        inner_mem = memory(name = 'inner_rnn_state_' + y.name,
+                           size = hidden_dim,
+                           boot_layer = outer_mem2)
+        out = fc_layer(input = [y, inner_mem],
+                       size = hidden_dim,
+                       act = TanhActivation(),
+                       bias_attr = True,
+                       name = 'inner_rnn_state_' + y.name)
+        return out
+
+    encoder1 = recurrent_group(
+        step = inner_step1,
+        name = 'inner1',
+        input = x1)
+
+    encoder2 = recurrent_group(
+        step = inner_step2,
+        name = 'inner2',
+        input = x2)
+
+    sentence_last_state1 = last_seq(input = encoder1, name = 'outer_rnn_state1')
+    sentence_last_state2_ = last_seq(input = encoder2, name = 'outer_rnn_state2')
+
+    encoder1_expand = expand_layer(input = sentence_last_state1,
+                                   expand_as = encoder2)
+
+    return [encoder1_expand, encoder2]
+
+
+encoder1_rep, encoder2_rep = recurrent_group(
+    name="outer",
+    step=outer_step,
+    input=[SubsequenceInput(emb1), SubsequenceInput(emb2)],
+    targetInlink=emb2)
+
+encoder1_last = last_seq(input = encoder1_rep)
+encoder1_expandlast = expand_layer(input = encoder1_last,
+                                   expand_as = encoder2_rep)
+context = mixed_layer(input = [identity_projection(encoder1_expandlast),
+                               identity_projection(encoder2_rep)],
+                      size = hidden_dim)
+
+rep = last_seq(input=context)
+prob = fc_layer(size=label_dim,
+                input=rep,
+                act=SoftmaxActivation(),
+                bias_attr=True)
+
+outputs(classification_cost(input=prob,
+                            label=data_layer(name="label", size=label_dim)))
+
--- a/paddle/gserver/tests/sequence_rnn_multi_unequalength_inputs.conf
+++ b/paddle/gserver/tests/sequence_rnn_multi_unequalength_inputs.conf
+#edit-mode: -*- python -*-
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle.trainer_config_helpers import *
+
+######################## data source ################################
+define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list',
+                        test_list=None,
+                        module='rnn_data_provider',
+                        obj='process_unequalength_seq')
+
+
+settings(batch_size=2, learning_rate=0.01)
+######################## network configure ################################
+dict_dim = 10
+word_dim = 8
+hidden_dim = 8
+label_dim = 2
+
+speaker1 = data_layer(name="word1", size=dict_dim)
+speaker2 = data_layer(name="word2", size=dict_dim)
+
+emb1 = embedding_layer(input=speaker1, size=word_dim)
+emb2 = embedding_layer(input=speaker2, size=word_dim)
+
+# This hierachical RNN is designed to be equivalent to the RNN in
+# sequence_nest_rnn_multi_unequalength_inputs.conf
+
+def step(x1, x2):
+	def calrnn(y):
+		mem = memory(name = 'rnn_state_' + y.name, size = hidden_dim)
+		out = fc_layer(input = [y, mem],
+					   size = hidden_dim,
+					   act = TanhActivation(),
+					   bias_attr = True,
+					   name = 'rnn_state_' + y.name)
+		return out
+	
+	encoder1 = calrnn(x1)
+	encoder2 = calrnn(x2)
+	return [encoder1, encoder2]
+
+encoder1_rep, encoder2_rep = recurrent_group(
+    name="stepout",
+    step=step,
+    input=[emb1, emb2])
+
+encoder1_last = last_seq(input = encoder1_rep)
+encoder1_expandlast = expand_layer(input = encoder1_last,
+                                   expand_as = encoder2_rep)
+context = mixed_layer(input = [identity_projection(encoder1_expandlast),
+                               identity_projection(encoder2_rep)],
+                      size = hidden_dim)
+
+rep = last_seq(input=context)
+prob = fc_layer(size=label_dim,
+                input=rep,
+                act=SoftmaxActivation(),
+                bias_attr=True)
+
+outputs(classification_cost(input=prob,
+                            label=data_layer(name="label", size=label_dim)))
+
--- a/paddle/gserver/tests/test_RecurrentGradientMachine.cpp
+++ b/paddle/gserver/tests/test_RecurrentGradientMachine.cpp
@@ -73,6 +73,7 @@ void CalCost(const string& conf, const string& dir, real* cost,

  *ThreadLocalRand::getSeed() = FLAGS_seed;
  vecW.randnorm(0, 0.1);
+  vecMomentum.randnorm(0, 0.1);

  trainer.startTrain();
  for (int i = 0; i < num_passes; ++i) {
@@ -140,6 +141,14 @@ TEST(RecurrentGradientMachine, rnn_multi_input) {
  }
 }

+TEST(RecurrentGradientMachine, rnn_multi_unequalength_input) {
+    for (bool useGpu : {false, true}) {
+        test("gserver/tests/sequence_rnn_multi_unequalength_inputs.conf",
+        "gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.conf",
+             1e-6, useGpu);
+    }
+}
+
 int main(int argc, char** argv) {
  if (paddle::version::isWithPyDataProvider()) {
    if (!paddle::version::isWithGpu()) {

--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -2347,7 +2347,7 @@ class SubsequenceInput(object):


 @wrap_name_default("recurrent_group")
-def recurrent_group(step, input, reverse=False, name=None):
+def recurrent_group(step, input, reverse=False, name=None, targetInlink=None):
    """
    Recurrent layer group is an extremely flexible recurrent unit in
    PaddlePaddle. As long as the user defines the calculation done within a
@@ -2401,6 +2401,17 @@ def recurrent_group(step, input, reverse=False, name=None):
    :param reverse: If reverse is set true, the recurrent unit will process the
                    input sequence in a reverse order.
    :type reverse: bool
+
+    :param targetInlink: the input layer which share info with layer group's output
+
+                         Param input specifies multiple input layers. For
+                         SubsequenceInput inputs, config should assign one input
+                         layer that share info(the number of sentences and the number
+                         of words in each sentence) with all layer group's outputs.
+                         targetInlink should be one of the layer group's input.
+
+    :type targetInlink: LayerOutput|SubsequenceInput
+
    :return: LayerOutput object.
    :rtype: LayerOutput
    """
@@ -2419,6 +2430,20 @@ def recurrent_group(step, input, reverse=False, name=None):

    in_links = filter(is_in_links, input)

+    def targetInlink_in_inlinks():
+        for inlink in in_links:
+            if isinstance(inlink, SubsequenceInput):
+                if targetInlink == inlink.input:
+                    return True
+            elif targetInlink == inlink:
+                return True
+        return False
+
+    assert(targetInlink == None or targetInlink_in_inlinks())
+    targetInlinkName = None if targetInlink == None \
+                            else targetInlink.name if isinstance(targetInlink, LayerOutput) \
+                                                   else targetInlink.input.name
+
    contains_sub_seq = [False]

    def map_in_links(x):
@@ -2430,7 +2455,8 @@ def recurrent_group(step, input, reverse=False, name=None):

    RecurrentLayerGroupWithoutOutLinksBegin(
        name=name, in_links=map(map_in_links, in_links),
-        seq_reversed=reverse)
+        seq_reversed=reverse,
+        target_inlinkname=targetInlinkName)
    in_args = []
    for each_input in input:
        assert is_single_input(each_input)