add op comment and python layer

f4a4a4cb · fengjiayi · ce182d90 · f4a4a4cb · f4a4a4cb · f4a4a4cb
3 changed file
--- a/paddle/fluid/operators/math/sequence_padding.cu
+++ b/paddle/fluid/operators/math/sequence_padding.cu
@@ -66,6 +66,9 @@ class PaddingLoDTensorFunctor<platform::CUDADeviceContext, T> {
    if (pad_seq_len == -1) {
      pad_seq_len = max_seq_len;
    }
+    PADDLE_ENFORCE_GE(pad_seq_len, max_seq_len,
+                      "The pad_seq_len must be equal to or greater than the "
+                      "original max sequence length.");
    int step_width = seq_tensor.numel() / seq_tensor_dims[0];
    int seq_num = seq_offsets.size() - 1;


--- a/paddle/fluid/operators/sequence_pad_op.cc
+++ b/paddle/fluid/operators/sequence_pad_op.cc
@@ -101,6 +101,52 @@ class SequencePadOpMaker : public framework::OpProtoAndCheckerMaker {
        "sequence.")
        .SetDefault(-1);
    AddComment(R"DOC(
+      Sequence Pad Operator
+
+      This operator pads sequences in a same batch to a consistent length. 
+      The length is specified by attribute 'padded_length'. New elements, 
+      whose values are specified by input 'PadValue', will be appended to 
+      the end of each sequence, to make their final lengths consistent.
+
+      Following are cases to better explain how this works:
+
+      Case 1:
+
+      Given a 1-level LoDTensor input(X):
+          X.lod = [[0, 2,       5]]
+          X.data = [a, b, c, d, e]
+      and Input(PadValue):
+          PadValue.data = [0]
+      and attribite 'padded_length' = 4,
+      then we get 1-level LoDTensor:
+          Out.lod = [[0,       4,          8]]
+          Out.data = [a, b, 0, 0, c, d, e, 0]
+      
+      Case 2:
+
+      Given a 1-level LoDTensor input(X):
+          X.lod = [[0,               2,                           5]]
+          X.data = [[a1, a2], [b1, b2], [c1, c2], [d1, d2], [e1, e2]]
+      and Input(PadValue):
+          PadValue.data = [0]
+      and attribite 'padded_length' = -1, which mean using the length 
+      of longest input sequence(3 in this case),
+      then we get 1-level LoDTensor:
+          Out.lod = [[0,                       3,                           6]]
+          Out.data = [[a1, a2], [b1, b2], [0, 0], [c1, c2], [d1, d2], [e1, e2]]
+
+      Case 3:
+
+      Given a 1-level LoDTensor input(X):
+          X.lod = [[0,               2,                           5]]
+          X.data = [[a1, a2], [b1, b2], [c1, c2], [d1, d2], [e1, e2]]
+      and Input(PadValue):
+          PadValue.data = [p1, p2]
+      and attribite 'padded_length' = -1, which mean using the length 
+      of longest input sequence(3 in this case),
+      then we get 1-level LoDTensor:
+          Out.lod = [[0,                         3,                           6]]
+          Out.data = [[a1, a2], [b1, b2], [p1, p2], [c1, c2], [d1, d2], [e1, e2]]

    )DOC");
  }

--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -2662,6 +2662,51 @@ def sequence_expand(x, y, ref_level=-1, name=None):
    return tmp


+@templatedoc()
+def sequence_pad(x, pad_value, maxlen=None):
+    """
+    ${comment}
+
+    Args:
+        x(Variable): Input variable which should contain lod information.
+        pad_value(Variable): The Variable that holds values that will be fill 
+            into padded steps. It can be a scalar or a tensor whose shape 
+            equals to time steps in sequences. If it's a scalar, it will be 
+            automatically broadcasted to the shape of time step.
+        maxlen(int, default None): The length of padded sequences. It can be 
+            None or any positive int. When it is None, all sequences will be 
+            padded up to the length of the longest one among them; when it a 
+            certain positive value, it must be greater than the length of the 
+            longest original sequence."
+    
+    Returns:
+        Variable: The padded sequence batch. All sequences has the same length.
+    
+    Examples:
+        .. code-block:: python
+
+            import numpy
+
+            x = fluid.layers.data(name='y', shape=[10, 5],
+                             dtype='float32', lod_level=1)
+            pad_value = fluid.layers.assign(input=numpy.array([0]))
+            out = fluid.layers.sequence_pad(x=x, pad_value=pad_value)
+    """
+
+    helper = LayerHelper('sequence_pad', input=x, **locals())
+    dtype = helper.input_dtype()
+    out = helper.create_tmp_variable(dtype)
+    if maxlen is None:
+        maxlen = -1
+    helper.append_op(
+        type='sequence_pad',
+        inputs={'X': x,
+                'PadValue': pad_value},
+        outputs={'Out': out},
+        attrs={'padded_length': maxlen})
+    return out
+
+
 def beam_search(pre_ids,
                pre_scores,
                ids,