diff --git a/paddle/fluid/operators/sequence_mask_op.cc b/paddle/fluid/operators/sequence_mask_op.cc index e45c18d6aff65ecac565ef05e36b2d47ad8744b8..798211f481659eb71248f7a6210e6522273d387f 100644 --- a/paddle/fluid/operators/sequence_mask_op.cc +++ b/paddle/fluid/operators/sequence_mask_op.cc @@ -23,4 +23,8 @@ REGISTER_OP_CPU_KERNEL( paddle::operators::SequenceMaskKernel, paddle::operators::SequenceMaskKernel); + int64_t>, + paddle::operators::SequenceMaskKernel, + paddle::operators::SequenceMaskKernel); diff --git a/paddle/fluid/operators/sequence_mask_op.cu b/paddle/fluid/operators/sequence_mask_op.cu index ff5acf4d9edd5f0f15cbcb22eae212c2d49ccaab..2ad23774579533b62b9189c1564ad7c7db5c298a 100644 --- a/paddle/fluid/operators/sequence_mask_op.cu +++ b/paddle/fluid/operators/sequence_mask_op.cu @@ -19,4 +19,8 @@ REGISTER_OP_CUDA_KERNEL( paddle::operators::SequenceMaskKernel, paddle::operators::SequenceMaskKernel); + int64_t>, + paddle::operators::SequenceMaskKernel, + paddle::operators::SequenceMaskKernel); diff --git a/paddle/fluid/operators/sequence_pad_op.cc b/paddle/fluid/operators/sequence_pad_op.cc index 44d73aa4076abfe15c906478702ac7c4a55303d4..4583b26256ba2e084bf7477c54d468df860d9b43 100644 --- a/paddle/fluid/operators/sequence_pad_op.cc +++ b/paddle/fluid/operators/sequence_pad_op.cc @@ -29,10 +29,12 @@ class SequencePadOp : public framework::OperatorWithKernel { "Input(PadValue) of SequencePadOp should not be null."); PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) of SequencePadOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Length"), + "Output(Length) of SequencePadOp should not be null."); auto x_dims = ctx->GetInputDim("X"); PADDLE_ENFORCE_GE(x_dims.size(), 2, - "The rank of Input(x) can't be less than 2."); + "The rank of Input(X) can't be less than 2."); auto time_step_dims = framework::slice_ddim(x_dims, 1, x_dims.size()); auto pad_value_dims = ctx->GetInputDim("PadValue"); PADDLE_ENFORCE(pad_value_dims == framework::make_ddim({1}) || @@ -41,8 +43,8 @@ class SequencePadOp : public framework::OperatorWithKernel { "shape equals to time steps in sequences"); int out_dim_0 = -1; - int out_dim_1 = -1; + int padded_length = ctx->Attrs().Get("padded_length"); if (ctx->IsRuntime()) { // run time framework::Variable* x_var = @@ -58,7 +60,6 @@ class SequencePadOp : public framework::OperatorWithKernel { int seq_num = x_lod_0.size() - 1; int max_seq_len = math::MaximumSequenceLength(x_lod_0); - int padded_length = ctx->Attrs().Get("padded_length"); if (padded_length == -1) { padded_length = max_seq_len; } @@ -66,19 +67,30 @@ class SequencePadOp : public framework::OperatorWithKernel { "The Attr(padded_length) must be -1 or an int greater " "than the length of the longest original sequence."); out_dim_0 = seq_num; - out_dim_1 = padded_length; } else { // compile time + if (padded_length == -1) { + padded_length = 1; + } framework::VarDesc* x_desc = boost::get(ctx->GetInputVarPtrs("X")[0]); PADDLE_ENFORCE_GE(x_desc->GetLoDLevel(), 1); } - std::vector out_dims_vec{out_dim_0, out_dim_1}; + std::vector out_dims_vec{out_dim_0, padded_length}; + std::vector len_dims_vec{out_dim_0, 1}; auto time_step_dims_vec = framework::vectorize2int(time_step_dims); out_dims_vec.insert(out_dims_vec.end(), time_step_dims_vec.begin(), time_step_dims_vec.end()); ctx->SetOutputDim("Out", framework::make_ddim(out_dims_vec)); + ctx->SetOutputDim("Length", framework::make_ddim(len_dims_vec)); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + auto data_type = framework::GetDataTypeOfVar(ctx.InputVar("X")); + return framework::OpKernelType(data_type, ctx.device_context()); } }; @@ -96,6 +108,10 @@ class SequencePadOpMaker : public framework::OpProtoAndCheckerMaker { AddOutput( "Out", "(LoDTensor) The output vairable, which contains padded sequences."); + AddOutput( + "Length", + "(LoDTensor) The output vairable, which contains the actual length of " + "sequences before padding."); AddAttr( "padded_length", "The length of padded sequences. It can be setted to -1 or " @@ -125,6 +141,7 @@ class SequencePadOpMaker : public framework::OpProtoAndCheckerMaker { then we get LoDTensor: Out.data = [[a, b, 0, 0], [c, d, e, 0]] + Length.data = [[2], [3]] Case 2: @@ -138,7 +155,8 @@ class SequencePadOpMaker : public framework::OpProtoAndCheckerMaker { then we get LoDTensor: Out.data = [[[a1, a2], [b1, b2], [0, 0]], [[c1, c2], [d1, d2], [e1, e2]]] - + Length.data = [[2], [3]] + Case 3: Given a 1-level LoDTensor input(X): @@ -151,6 +169,7 @@ class SequencePadOpMaker : public framework::OpProtoAndCheckerMaker { then we get LoDTensor: Out.data = [[[a1, a2], [b1, b2], [p1, p2]], [[c1, c2], [d1, d2], [e1, e2]]] + Length.data = [[2], [3]] )DOC"); } @@ -171,6 +190,13 @@ class SequencePadGradOp : public framework::OperatorWithKernel { ctx->ShareLoD("X", /*->*/ framework::GradVarName("X")); } } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + auto data_type = framework::GetDataTypeOfVar(ctx.InputVar("X")); + return framework::OpKernelType(data_type, ctx.device_context()); + } }; } // namespace operators diff --git a/paddle/fluid/operators/sequence_pad_op.h b/paddle/fluid/operators/sequence_pad_op.h index 5fc9da69d787ff3aeffa716689d44772ad8f7bd2..840bd39a7f3eaca6cb03bca59016fc032e9a3068 100644 --- a/paddle/fluid/operators/sequence_pad_op.h +++ b/paddle/fluid/operators/sequence_pad_op.h @@ -32,6 +32,7 @@ class SequencePadOpKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { const auto* x = ctx.Input("X"); auto* out = ctx.Output("Out"); + auto* len_t = ctx.Output("Length"); out->mutable_data(ctx.GetPlace()); const auto* pad_value = ctx.Input("PadValue"); @@ -41,6 +42,15 @@ class SequencePadOpKernel : public framework::OpKernel { math::PaddingLoDTensorFunctor()( ctx.template device_context(), *x, out, *pad_value, padded_length, 0, false, math::kBatchLengthWidth); + + LoDTensor seq_len; + seq_len.Resize(len_t->dims()); + int64_t* len_data = seq_len.mutable_data(platform::CPUPlace()); + for (size_t i = 1; i < x->lod()[0].size(); ++i) { + len_data[i - 1] = x->lod()[0][i] - x->lod()[0][i - 1]; + } + framework::TensorCopy(seq_len, ctx.GetPlace(), + ctx.template device_context(), len_t); } }; diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 25e7373699a24faa785b1775295e80614f4201a4..c6de22f996184c7f07b22b6255829b5a65aad32a 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -2776,7 +2776,8 @@ def sequence_pad(x, pad_value, maxlen=None): longest original sequence." Returns: - Variable: The padded sequence batch. All sequences has the same length. + Variable: The padded sequence batch and the original lengths before + padding. All sequences has the same length. Examples: .. code-block:: python @@ -2792,15 +2793,21 @@ def sequence_pad(x, pad_value, maxlen=None): helper = LayerHelper('sequence_pad', input=x, **locals()) dtype = helper.input_dtype() out = helper.create_tmp_variable(dtype) + length = helper.create_tmp_variable(dtype) + + pad_value.stop_gradient = True + length.stop_gradient = True + if maxlen is None: maxlen = -1 helper.append_op( type='sequence_pad', inputs={'X': x, 'PadValue': pad_value}, - outputs={'Out': out}, + outputs={'Out': out, + 'Length': length}, attrs={'padded_length': maxlen}) - return out + return out, length def beam_search(pre_ids, @@ -6017,7 +6024,7 @@ def sequence_mask(x, maxlen=None, dtype='int64', name=None): inputs={'X': [x]}, outputs={'Y': out}, attrs={ - 'max_len': maxlen if maxlen is not None else -1, + 'maxlen': maxlen if maxlen is not None else -1, 'out_dtype': out.dtype }) return out diff --git a/python/paddle/fluid/tests/unittests/test_sequence_pad_op.py b/python/paddle/fluid/tests/unittests/test_sequence_pad_op.py index 471515c817541976a06eb024fa3d4f77b78f920d..3067294e5bb3edcb2f1ce77f5e60b885a39a6475 100644 --- a/python/paddle/fluid/tests/unittests/test_sequence_pad_op.py +++ b/python/paddle/fluid/tests/unittests/test_sequence_pad_op.py @@ -62,7 +62,8 @@ class TestSequencePadOp(OpTest): start_idx = end_idx out_data = np.array(padded_sequences) - self.outputs = {'Out': out_data} + length = np.array(self.x_len_lod[0]).reshape((-1, 1)) + self.outputs = {'Out': out_data, 'Length': length} def setUp(self): self.op_type = 'sequence_pad' @@ -129,3 +130,7 @@ class TestSequencePadOp7(TestSequencePadOp): self.pad_value = [1.0] self.padded_length = 7 self.dtype = 'float32' + + +if __name__ == '__main__': + unittest.main()