diff --git a/paddle/fluid/operators/math/sequence_padding.cu b/paddle/fluid/operators/math/sequence_padding.cu index 93d239351aaaf6080b38bc2b36180b9094384753..f94e8dbc3a4c4d082e3a4be0f16fd04d93771501 100644 --- a/paddle/fluid/operators/math/sequence_padding.cu +++ b/paddle/fluid/operators/math/sequence_padding.cu @@ -66,6 +66,9 @@ class PaddingLoDTensorFunctor { if (pad_seq_len == -1) { pad_seq_len = max_seq_len; } + PADDLE_ENFORCE_GE(pad_seq_len, max_seq_len, + "The pad_seq_len must be equal to or greater than the " + "original max sequence length."); int step_width = seq_tensor.numel() / seq_tensor_dims[0]; int seq_num = seq_offsets.size() - 1; diff --git a/paddle/fluid/operators/sequence_pad_op.cc b/paddle/fluid/operators/sequence_pad_op.cc index f23710cf4dec92df1154e5c17c2f8b80d9953420..a08804cfba28aaca86a14788701e64a6ce6d1d4f 100644 --- a/paddle/fluid/operators/sequence_pad_op.cc +++ b/paddle/fluid/operators/sequence_pad_op.cc @@ -101,6 +101,52 @@ class SequencePadOpMaker : public framework::OpProtoAndCheckerMaker { "sequence.") .SetDefault(-1); AddComment(R"DOC( + Sequence Pad Operator + + This operator pads sequences in a same batch to a consistent length. + The length is specified by attribute 'padded_length'. New elements, + whose values are specified by input 'PadValue', will be appended to + the end of each sequence, to make their final lengths consistent. + + Following are cases to better explain how this works: + + Case 1: + + Given a 1-level LoDTensor input(X): + X.lod = [[0, 2, 5]] + X.data = [a, b, c, d, e] + and Input(PadValue): + PadValue.data = [0] + and attribite 'padded_length' = 4, + then we get 1-level LoDTensor: + Out.lod = [[0, 4, 8]] + Out.data = [a, b, 0, 0, c, d, e, 0] + + Case 2: + + Given a 1-level LoDTensor input(X): + X.lod = [[0, 2, 5]] + X.data = [[a1, a2], [b1, b2], [c1, c2], [d1, d2], [e1, e2]] + and Input(PadValue): + PadValue.data = [0] + and attribite 'padded_length' = -1, which mean using the length + of longest input sequence(3 in this case), + then we get 1-level LoDTensor: + Out.lod = [[0, 3, 6]] + Out.data = [[a1, a2], [b1, b2], [0, 0], [c1, c2], [d1, d2], [e1, e2]] + + Case 3: + + Given a 1-level LoDTensor input(X): + X.lod = [[0, 2, 5]] + X.data = [[a1, a2], [b1, b2], [c1, c2], [d1, d2], [e1, e2]] + and Input(PadValue): + PadValue.data = [p1, p2] + and attribite 'padded_length' = -1, which mean using the length + of longest input sequence(3 in this case), + then we get 1-level LoDTensor: + Out.lod = [[0, 3, 6]] + Out.data = [[a1, a2], [b1, b2], [p1, p2], [c1, c2], [d1, d2], [e1, e2]] )DOC"); } diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index c5c92619ed6d61b17a06d4c0f671eeb1d8c04d17..990174be2098f0a747a212cad7d9cacd870442ba 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -2657,6 +2657,51 @@ def sequence_expand(x, y, ref_level=-1, name=None): return tmp +@templatedoc() +def sequence_pad(x, pad_value, maxlen=None): + """ + ${comment} + + Args: + x(Variable): Input variable which should contain lod information. + pad_value(Variable): The Variable that holds values that will be fill + into padded steps. It can be a scalar or a tensor whose shape + equals to time steps in sequences. If it's a scalar, it will be + automatically broadcasted to the shape of time step. + maxlen(int, default None): The length of padded sequences. It can be + None or any positive int. When it is None, all sequences will be + padded up to the length of the longest one among them; when it a + certain positive value, it must be greater than the length of the + longest original sequence." + + Returns: + Variable: The padded sequence batch. All sequences has the same length. + + Examples: + .. code-block:: python + + import numpy + + x = fluid.layers.data(name='y', shape=[10, 5], + dtype='float32', lod_level=1) + pad_value = fluid.layers.assign(input=numpy.array([0])) + out = fluid.layers.sequence_pad(x=x, pad_value=pad_value) + """ + + helper = LayerHelper('sequence_pad', input=x, **locals()) + dtype = helper.input_dtype() + out = helper.create_tmp_variable(dtype) + if maxlen is None: + maxlen = -1 + helper.append_op( + type='sequence_pad', + inputs={'X': x, + 'PadValue': pad_value}, + outputs={'Out': out}, + attrs={'padded_length': maxlen}) + return out + + def beam_search(pre_ids, pre_scores, ids,