Merge pull request #13400 from kuke/fix_seq_pad

Get sequence length in sequence_pad op & fix sequence_mask op

Merge pull request #13400 from kuke/fix_seq_pad
Get sequence length in sequence_pad op & fix sequence_mask op
9cc1937d · Yibing Liu · GitHub · 020d13c1 · 1c87558c · 9cc1937d
6 changed file
--- a/paddle/fluid/operators/sequence_mask_op.cc
+++ b/paddle/fluid/operators/sequence_mask_op.cc
@@ -23,4 +23,8 @@ REGISTER_OP_CPU_KERNEL(
    paddle::operators::SequenceMaskKernel<paddle::platform::CPUDeviceContext,
                                          int>,
    paddle::operators::SequenceMaskKernel<paddle::platform::CPUDeviceContext,
-                                          int64_t>);
+                                          int64_t>,
+    paddle::operators::SequenceMaskKernel<paddle::platform::CPUDeviceContext,
+                                          float>,
+    paddle::operators::SequenceMaskKernel<paddle::platform::CPUDeviceContext,
+                                          double>);
--- a/paddle/fluid/operators/sequence_mask_op.cu
+++ b/paddle/fluid/operators/sequence_mask_op.cu
@@ -19,4 +19,8 @@ REGISTER_OP_CUDA_KERNEL(
    paddle::operators::SequenceMaskKernel<paddle::platform::CUDADeviceContext,
                                          int>,
    paddle::operators::SequenceMaskKernel<paddle::platform::CUDADeviceContext,
-                                          int64_t>);
+                                          int64_t>,
+    paddle::operators::SequenceMaskKernel<paddle::platform::CUDADeviceContext,
+                                          float>,
+    paddle::operators::SequenceMaskKernel<paddle::platform::CUDADeviceContext,
+                                          double>);
--- a/paddle/fluid/operators/sequence_pad_op.cc
+++ b/paddle/fluid/operators/sequence_pad_op.cc
@@ -29,10 +29,12 @@ class SequencePadOp : public framework::OperatorWithKernel {
                   "Input(PadValue) of SequencePadOp should not be null.");
    PADDLE_ENFORCE(ctx->HasOutput("Out"),
                   "Output(Out) of SequencePadOp should not be null.");
+    PADDLE_ENFORCE(ctx->HasOutput("Length"),
+                   "Output(Length) of SequencePadOp should not be null.");
    auto x_dims = ctx->GetInputDim("X");
    PADDLE_ENFORCE_GE(x_dims.size(), 2,
-                      "The rank of Input(x) can't be less than 2.");
+                      "The rank of Input(X) can't be less than 2.");
    auto time_step_dims = framework::slice_ddim(x_dims, 1, x_dims.size());
    auto pad_value_dims = ctx->GetInputDim("PadValue");
    PADDLE_ENFORCE(pad_value_dims == framework::make_ddim({1}) ||
@@ -41,8 +43,8 @@ class SequencePadOp : public framework::OperatorWithKernel {
                   "shape equals to time steps in sequences");
    int out_dim_0 = -1;
-    int out_dim_1 = -1;
+    int padded_length = ctx->Attrs().Get<int>("padded_length");
    if (ctx->IsRuntime()) {
      // run time
      framework::Variable* x_var =
@@ -58,7 +60,6 @@ class SequencePadOp : public framework::OperatorWithKernel {
      int seq_num = x_lod_0.size() - 1;
      int max_seq_len = math::MaximumSequenceLength(x_lod_0);
-      int padded_length = ctx->Attrs().Get<int>("padded_length");
      if (padded_length == -1) {
        padded_length = max_seq_len;
      }
@@ -66,19 +67,30 @@ class SequencePadOp : public framework::OperatorWithKernel {
                        "The Attr(padded_length) must be -1 or an int greater "
                        "than the length of the longest original sequence.");
      out_dim_0 = seq_num;
-      out_dim_1 = padded_length;
    } else {
      // compile time
+      if (padded_length == -1) {
+        padded_length = 1;
+      }
      framework::VarDesc* x_desc =
          boost::get<framework::VarDesc*>(ctx->GetInputVarPtrs("X")[0]);
      PADDLE_ENFORCE_GE(x_desc->GetLoDLevel(), 1);
    }
-    std::vector<int> out_dims_vec{out_dim_0, out_dim_1};
+    std::vector<int> out_dims_vec{out_dim_0, padded_length};
+    std::vector<int> len_dims_vec{out_dim_0, 1};
    auto time_step_dims_vec = framework::vectorize2int(time_step_dims);
    out_dims_vec.insert(out_dims_vec.end(), time_step_dims_vec.begin(),
                        time_step_dims_vec.end());
    ctx->SetOutputDim("Out", framework::make_ddim(out_dims_vec));
+    ctx->SetOutputDim("Length", framework::make_ddim(len_dims_vec));
+  }
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override {
+    auto data_type = framework::GetDataTypeOfVar(ctx.InputVar("X"));
+    return framework::OpKernelType(data_type, ctx.device_context());
  }
 };
@@ -96,6 +108,10 @@ class SequencePadOpMaker : public framework::OpProtoAndCheckerMaker {
    AddOutput(
        "Out",
        "(LoDTensor) The output vairable, which contains padded sequences.");
+    AddOutput(
+        "Length",
+        "(LoDTensor) The output vairable, which contains the actual length of "
+        "sequences before padding.");
    AddAttr<int>(
        "padded_length",
        "The length of padded sequences. It can be setted to -1 or "
@@ -125,6 +141,7 @@ class SequencePadOpMaker : public framework::OpProtoAndCheckerMaker {
      then we get LoDTensor:
          Out.data = [[a, b, 0, 0], 
                      [c, d, e, 0]]
+          Length.data = [[2], [3]]
      Case 2:
@@ -138,7 +155,8 @@ class SequencePadOpMaker : public framework::OpProtoAndCheckerMaker {
      then we get LoDTensor:
          Out.data = [[[a1, a2], [b1, b2], [0, 0]], 
                      [[c1, c2], [d1, d2], [e1, e2]]]
+          Length.data = [[2], [3]]
      Case 3:
      Given a 1-level LoDTensor input(X):
@@ -151,6 +169,7 @@ class SequencePadOpMaker : public framework::OpProtoAndCheckerMaker {
      then we get LoDTensor:
          Out.data = [[[a1, a2], [b1, b2], [p1, p2]], 
                      [[c1, c2], [d1, d2], [e1, e2]]]
+          Length.data = [[2], [3]]
    )DOC");
  }
@@ -171,6 +190,13 @@ class SequencePadGradOp : public framework::OperatorWithKernel {
      ctx->ShareLoD("X", /*->*/ framework::GradVarName("X"));
    }
  }
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override {
+    auto data_type = framework::GetDataTypeOfVar(ctx.InputVar("X"));
+    return framework::OpKernelType(data_type, ctx.device_context());
+  }
 };
 }  // namespace operators

--- a/paddle/fluid/operators/sequence_pad_op.h
+++ b/paddle/fluid/operators/sequence_pad_op.h
@@ -32,6 +32,7 @@ class SequencePadOpKernel : public framework::OpKernel<T> {
  void Compute(const framework::ExecutionContext& ctx) const override {
    const auto* x = ctx.Input<LoDTensor>("X");
    auto* out = ctx.Output<LoDTensor>("Out");
+    auto* len_t = ctx.Output<LoDTensor>("Length");
    out->mutable_data<T>(ctx.GetPlace());
    const auto* pad_value = ctx.Input<LoDTensor>("PadValue");
@@ -41,6 +42,15 @@ class SequencePadOpKernel : public framework::OpKernel<T> {
    math::PaddingLoDTensorFunctor<DeviceContext, T>()(
        ctx.template device_context<DeviceContext>(), *x, out, *pad_value,
        padded_length, 0, false, math::kBatchLengthWidth);
+    LoDTensor seq_len;
+    seq_len.Resize(len_t->dims());
+    int64_t* len_data = seq_len.mutable_data<int64_t>(platform::CPUPlace());
+    for (size_t i = 1; i < x->lod()[0].size(); ++i) {
+      len_data[i - 1] = x->lod()[0][i] - x->lod()[0][i - 1];
+    }
+    framework::TensorCopy(seq_len, ctx.GetPlace(),
+                          ctx.template device_context<DeviceContext>(), len_t);
  }
 };

--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -2776,7 +2776,8 @@ def sequence_pad(x, pad_value, maxlen=None):
            longest original sequence."
    Returns:
-        Variable: The padded sequence batch. All sequences has the same length.
+        Variable: The padded sequence batch and the original lengths before 
+                  padding. All sequences has the same length.
    Examples:
        .. code-block:: python
@@ -2792,15 +2793,21 @@ def sequence_pad(x, pad_value, maxlen=None):
    helper = LayerHelper('sequence_pad', input=x, **locals())
    dtype = helper.input_dtype()
    out = helper.create_tmp_variable(dtype)
+    length = helper.create_tmp_variable(dtype)
+    pad_value.stop_gradient = True
+    length.stop_gradient = True
    if maxlen is None:
        maxlen = -1
    helper.append_op(
        type='sequence_pad',
        inputs={'X': x,
                'PadValue': pad_value},
-        outputs={'Out': out},
+        outputs={'Out': out,
+                 'Length': length},
        attrs={'padded_length': maxlen})
-    return out
+    return out, length
 def beam_search(pre_ids,
@@ -6017,7 +6024,7 @@ def sequence_mask(x, maxlen=None, dtype='int64', name=None):
        inputs={'X': [x]},
        outputs={'Y': out},
        attrs={
-            'max_len': maxlen if maxlen is not None else -1,
+            'maxlen': maxlen if maxlen is not None else -1,
            'out_dtype': out.dtype
        })
    return out

--- a/python/paddle/fluid/tests/unittests/test_sequence_pad_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sequence_pad_op.py
@@ -62,7 +62,8 @@ class TestSequencePadOp(OpTest):
            start_idx = end_idx
        out_data = np.array(padded_sequences)
-        self.outputs = {'Out': out_data}
+        length = np.array(self.x_len_lod[0]).reshape((-1, 1))
+        self.outputs = {'Out': out_data, 'Length': length}
    def setUp(self):
        self.op_type = 'sequence_pad'
@@ -129,3 +130,7 @@ class TestSequencePadOp7(TestSequencePadOp):
        self.pad_value = [1.0]
        self.padded_length = 7
        self.dtype = 'float32'
+if __name__ == '__main__':
+    unittest.main()