modify RecurrentGradientMachine to support unequal length inputs

* modify RecurrentGradientMachine to support hasSubSeq sequence inlinks with the same number of sentence but different number of tokens for each sentence Change-Id: Ic71f00a4bb346b4fa93e650dfb4b1a0d8d2338b0

modify RecurrentGradientMachine to support unequal length inputs
* modify RecurrentGradientMachine to support hasSubSeq sequence inlinks with the same number of sentence but different number of tokens for each sentence Change-Id: Ic71f00a4bb346b4fa93e650dfb4b1a0d8d2338b0
699d5f26 · zhangruiqing01 · Yu Yang · 0f91ea7e · 699d5f26 · 699d5f26
4 changed file
--- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
+++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
--- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h
+++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-
 #pragma once

 #include "GradientMachine.h"
@@ -101,7 +100,7 @@ public:
   * Return true if this prefix or candidate is expected to be dropped.
   */
  typedef std::function<bool(int seqId, const std::vector<int>&,
-      const std::vector<real>&)> DropCallback;
+                             const std::vector<real>&)> DropCallback;

  /**
    * @brief NormOrDropNodeCallback
@@ -117,7 +116,7 @@ public:
    * The fourth parameter is the probability of the whole path.
    */
  typedef std::function<void(int seqId, const std::vector<int>&,
-      std::vector<real>&, real*)> NormOrDropNodeCallback;
+                             std::vector<real>&, real*)> NormOrDropNodeCallback;

  /**
   * @brief Register beam search control callbacks. Used for prediction.
@@ -192,7 +191,7 @@ public:

    int machineId;  // index of sample in frame
    int topIndex;   // index of MaxIdLayer output in one sample
-    int seqId;  // index of sequence in batch generation
+    int seqId;      // index of sequence in batch generation
    std::vector<int> machineIdVec;

    /**
@@ -206,7 +205,10 @@ public:
    /**
     * @brief Path default ctor, first logProb is 0.
     */
-    Path() { logProb = 0; seqId = 0; }
+    Path() {
+      logProb = 0;
+      seqId = 0;
+    }
    explicit Path(size_t seqId) : seqId(seqId) { logProb = 0; }

    /**
@@ -319,21 +321,33 @@ protected:
  };
  std::vector<MemoryFrameLine> memoryFrameLines_;

-  // All inFrameLines and outFrameLines have the same element as follows.
+  // Each inFrameLines(inlinks) has its own info(elements) below,
+  // and all outFrameLines(outlinks) share the info with one inFrameLine,
+  // which is assigned by targetInfoInlinkId_.
  struct Info {
    IVectorPtr allIds;         // scattered id of realLayer
    std::vector<int> idIndex;  // index of allIds
    ICpuGpuVectorPtr
-        sequenceStartPositions;      // scattered sequenceStartPositions
+        sequenceStartPositions;         // scattered sequenceStartPositions
    std::vector<int> seqStartPosIndex;  // index of sequenceStartPositions
  };
-  Info info_;
+  std::vector<Info> info_;

-  // if no subSeq, tuple of (seqLength, seqStart, seqIndex, seqIndex)
-  // else, tuple of (subSeqLength, subSeqStart, seqIndex, subSeqIndex)
-  std::vector<std::tuple<int, int, int, int>> seqLengthAndStart_;
+  // each inlinks has a "std::vector<std::tuple<int, int, int, int>>" denotes
+  // its sequence info:
+  //  if hasSubSeq, tuple of (subSeqLength, subSeqStart, seqIndex, subSeqIndex)
+  //  else, tuple of (seqLength, seqStart, seqIndex, seqIndex)
+  std::vector<std::vector<std::tuple<int, int, int, int>>> seqLengthAndStart_;

-  void createInFrameInfo(const Argument& input, PassType passType);
+  // the id of inlink which share info with outlinks
+  int targetInfoInlinkId_;
+
+  /* create scattered id infomation for all realLayer of inFrameLines one time.
+  *  If hasSubseq, will also create scattered sequenceStartPositions infomation
+  *  for all realLayer of inFrameLines one time.
+  */
+  void createInFrameInfo(int inlinks_id, const Argument& input,
+                         PassType passType);

  void createMemoryFrameInfo(MemoryFrameLine* memoryFrameLine,
                             PassType passType);
@@ -363,6 +377,9 @@ protected:

  NeuralNetwork* rootNetwork_;
  bool reversed_;
+
+  // if hasSubseq: max number of sentences(subseq)in batchsize samples
+  // else: max number of tokens in batchsize samples(sentences)
  int maxSequenceLength_;
  bool useGpu_;
  bool stopBeamSearch_;
@@ -415,7 +432,7 @@ private:
   * @param machineIdVec : select a row of output matrix in each frame
   * that the generation process expanded.
   */
-  void createDataOutlink(std::vector<int> & machineIdVec);
+  void createDataOutlink(std::vector<int>& machineIdVec);

  /*
   * @brief used in beam search, connect previous frame to form recurrent link

--- a/proto/ModelConfig.proto.m4
+++ b/proto/ModelConfig.proto.m4
@@ -452,6 +452,9 @@ message SubModelConfig {
  repeated LinkConfig out_links = 10;

  optional GeneratorConfig generator = 11;
+
+  // the id of inlink which share info with outlinks, used in recurrent layer group
+  optional int32 target_inlinkid = 12;
 }

 message ModelConfig {

--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -303,7 +303,8 @@ def MakeLayerNameInSubmodel(name, submodel_name = None):
 @config_func
 def RecurrentLayerGroupWithoutOutLinksBegin(name,
                                            in_links,
-                                            seq_reversed=False):
+                                            seq_reversed=False,
+                                            target_inlinkname=""):
    global g_current_submodel
    config_assert(g_config.model_config.type == "recurrent_nn",
                  "RecurrentLayerGroup should be used only in recurrent_nn")
@@ -311,14 +312,19 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name,
    SubModelBegin(name)
    g_current_submodel.is_recurrent_layer_group = True
    g_current_submodel.reversed = seq_reversed
+    g_current_submodel.target_inlinkid = -1
    in_links_count = 0
-    for link in in_links:
+    for linkid, link in enumerate(in_links):
        if isinstance(link, basestring):
            name = link
            has_subseq = False
        else:
            name = link.link_name
            has_subseq = link.has_subseq
+        # assign target_inlinkid according to target_inlinkname
+        if target_inlinkname == name:
+            g_current_submodel.target_inlinkid = linkid
+
        if in_links_count == 0:
            in_links_has_subseq = has_subseq
        else:
@@ -331,6 +337,7 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name,
            SequenceScatterAgentLayer(name=name, size=layer.size)
        else:
            ScatterAgentLayer(name=name, size=layer.size)
+
        pair = g_current_submodel.in_links.add()
        pair.layer_name = layer_name
        pair.link_name = MakeLayerNameInSubmodel(name)
@@ -362,10 +369,12 @@ def RecurrentLayerGroupBegin(name,
                             in_links,
                             out_links,
                             generator=None,
+                             target_inlinkname="",
                             seq_reversed=False):
    RecurrentLayerGroupWithoutOutLinksBegin(name,
                                            in_links,
-                                            seq_reversed)
+                                            seq_reversed,
+                                            target_inlinkname)
    for link in out_links:
        RecurrentLayerGroupSetOutLink(link)