Refactor linear chain crf op & crf decoding op (#19982) (#20171)

* Update crf_decoding api & example * Update api spec * Fix linear chain crf api * Avoid sharing data pointer with input * Simplify the logic in linear_chain_crf_decoding * Add unittest for crf_decoding when label & path both are set * Update API spec * Add unittest for layers && correct infer_shape in chunk_eval test=release/1.6

Refactor linear chain crf op & crf decoding op (#19982) (#20171)
* Update crf_decoding api & example * Update api spec * Fix linear chain crf api * Avoid sharing data pointer with input * Simplify the logic in linear_chain_crf_decoding * Add unittest for crf_decoding when label & path both are set * Update API spec * Add unittest for layers && correct infer_shape in chunk_eval test=release/1.6
ea065e32 · Yibing Liu · GitHub · 3b49372f · ea065e32 · ea065e32
10 changed file
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@@ -132,8 +132,8 @@ paddle.fluid.layers.dynamic_lstm (ArgSpec(args=['input', 'size', 'h_0', 'c_0', '
 paddle.fluid.layers.dynamic_lstmp (ArgSpec(args=['input', 'size', 'proj_size', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'proj_activation', 'dtype', 'name', 'h_0', 'c_0', 'cell_clip', 'proj_clip'], varargs=None, keywords=None, defaults=(None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'tanh', 'float32', None, None, None, None, None)), ('document', 'c37d51aad655c8a9f9b045c64717320a'))
 paddle.fluid.layers.dynamic_gru (ArgSpec(args=['input', 'size', 'param_attr', 'bias_attr', 'is_reverse', 'gate_activation', 'candidate_activation', 'h_0', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, False, 'sigmoid', 'tanh', None, False)), ('document', '83617c165827e030636c80486d5de6f3'))
 paddle.fluid.layers.gru_unit (ArgSpec(args=['input', 'hidden', 'size', 'param_attr', 'bias_attr', 'activation', 'gate_activation', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, 'tanh', 'sigmoid', False)), ('document', '33974b9bfa69f2f1eb85e6f956dff04e'))
-paddle.fluid.layers.linear_chain_crf (ArgSpec(args=['input', 'label', 'param_attr', 'length'], varargs=None, keywords=None, defaults=(None, None)), ('document', '9045b8971e4232132ec9952695f4c3ae'))
-paddle.fluid.layers.crf_decoding (ArgSpec(args=['input', 'param_attr', 'label'], varargs=None, keywords=None, defaults=(None,)), ('document', '5ce117258e243be1c81539e254178d90'))
+paddle.fluid.layers.linear_chain_crf (ArgSpec(args=['input', 'label', 'param_attr', 'length'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'bc7a0fd2bb2b35dfd2f54947320e78fa'))
+paddle.fluid.layers.crf_decoding (ArgSpec(args=['input', 'param_attr', 'label', 'length'], varargs=None, keywords=None, defaults=(None, None)), ('document', '933b7e268c4ffa3d5c3ef953a5ee9f0b'))
 paddle.fluid.layers.cos_sim (ArgSpec(args=['X', 'Y'], varargs=None, keywords=None, defaults=None), ('document', '8e6ce424cf9e261ef32ee229c06a6e66'))
 paddle.fluid.layers.cross_entropy (ArgSpec(args=['input', 'label', 'soft_label', 'ignore_index'], varargs=None, keywords=None, defaults=(False, -100)), ('document', '789a141e97fd0b37241f630935936d08'))
 paddle.fluid.layers.bpr_loss (ArgSpec(args=['input', 'label', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6263dfdeb6c670fa0922c9cbc8fb1bf4'))

--- a/paddle/fluid/operators/chunk_eval_op.cc
+++ b/paddle/fluid/operators/chunk_eval_op.cc
@@ -24,37 +24,45 @@ class ChunkEvalOp : public framework::OperatorWithKernel {
  using framework::OperatorWithKernel::OperatorWithKernel;

  void InferShape(framework::InferShapeContext *ctx) const override {
-    PADDLE_ENFORCE(ctx->HasInput("Inference"),
-                   "Input(Inference) of ChunkEvalOp should not be null.");
-    PADDLE_ENFORCE(ctx->HasInput("Label"),
-                   "Input(Label) of ChunkEvalOp should not be null.");
-    PADDLE_ENFORCE(ctx->HasOutput("Precision"),
-                   "Output(Precision) of ChunkEvalOp should not be null.");
-    PADDLE_ENFORCE(ctx->HasOutput("Recall"),
-                   "Output(Recall) of ChunkEvalOp should not be null.");
-    PADDLE_ENFORCE(ctx->HasOutput("F1-Score"),
-                   "Output(F1-Score) of ChunkEvalOp should not be null.");
-    PADDLE_ENFORCE(ctx->HasOutput("NumInferChunks"),
-                   "Output(NumInferChunks) of ChunkEvalOp should not be null.");
-    PADDLE_ENFORCE(ctx->HasOutput("NumLabelChunks"),
-                   "Output(NumLabelChunks) of ChunkEvalOp should not be null.");
-    PADDLE_ENFORCE(
-        ctx->HasOutput("NumCorrectChunks"),
+    PADDLE_ENFORCE_EQ(ctx->HasInput("Inference"), true,
+                      "Input(Inference) of ChunkEvalOp should not be null.");
+    PADDLE_ENFORCE_EQ(ctx->HasInput("Label"), true,
+                      "Input(Label) of ChunkEvalOp should not be null.");
+    PADDLE_ENFORCE_EQ(ctx->HasOutput("Precision"), true,
+                      "Output(Precision) of ChunkEvalOp should not be null.");
+    PADDLE_ENFORCE_EQ(ctx->HasOutput("Recall"), true,
+                      "Output(Recall) of ChunkEvalOp should not be null.");
+    PADDLE_ENFORCE_EQ(ctx->HasOutput("F1-Score"), true,
+                      "Output(F1-Score) of ChunkEvalOp should not be null.");
+    PADDLE_ENFORCE_EQ(
+        ctx->HasOutput("NumInferChunks"), true,
+        "Output(NumInferChunks) of ChunkEvalOp should not be null.");
+    PADDLE_ENFORCE_EQ(
+        ctx->HasOutput("NumLabelChunks"), true,
+        "Output(NumLabelChunks) of ChunkEvalOp should not be null.");
+    PADDLE_ENFORCE_EQ(
+        ctx->HasOutput("NumCorrectChunks"), true,
        "Output(NumCorrectChunks) of ChunkEvalOp should not be null.");

    auto inference_dim = ctx->GetInputDim("Inference");
    auto label_dim = ctx->GetInputDim("Label");

-    PADDLE_ENFORCE(inference_dim == label_dim,
-                   "Inference's shape must be the same as Label's shape.");
+    PADDLE_ENFORCE_EQ(
+        inference_dim, label_dim,
+        "Input(Inference)'s shape must be the same as Input(Label)'s shape.");

    bool use_padding = ctx->HasInput("SeqLength");
    if (use_padding) {
-      PADDLE_ENFORCE(inference_dim.size() == 3,
-                     "when SeqLength is provided, Inference should be of dim 3 "
-                     "(batch, bucket, 1)");
+      PADDLE_ENFORCE_EQ((inference_dim.size() == 3 && inference_dim[2] == 1) ||
+                            inference_dim.size() == 2,
+                        true,
+                        "when Input(SeqLength) is provided, Input(Inference) "
+                        "should be of dim 3 (batch_size, bucket, 1) or dim 2 "
+                        "(batch_size, bucket).");
      auto seq_length_dim = ctx->GetInputDim("SeqLength");
-      PADDLE_ENFORCE(seq_length_dim.size() == 1, "seq_length should be rank 1");
+      PADDLE_ENFORCE_LE(
+          seq_length_dim.size(), 2,
+          "Input(SeqLength)'s rank should not be greater than 2.");
    }

    ctx->SetOutputDim("Precision", {1});

--- a/paddle/fluid/operators/crf_decoding_op.cc
+++ b/paddle/fluid/operators/crf_decoding_op.cc
@@ -39,8 +39,7 @@ class CRFDecodingOpMaker : public framework::OpProtoAndCheckerMaker {
        "Label",
        "(Tensor<int64_t>/LoDTensor<int64_t>). The ground truth with shape "
        "[N x 1] (for LoDTensor) or [B x S] (for Tensor). This input is "
-        "optional. "
-        "See more details in the operator's comments.")
+        "optional. See more details in the operator's comments.")
        .AsDispensable();
    AddOutput(
        "ViterbiPath",
@@ -126,12 +125,24 @@ class CRFDecodingOp : public framework::OperatorWithKernel {
    }
    if (ctx->HasInput("Label")) {
      auto label_dims = ctx->GetInputDim("Label");
-      PADDLE_ENFORCE_EQ(label_dims.size(), 2UL,
-                        "The Input(Label) should be a 2-D tensor");
+      if (ctx->HasInput("Length")) {
+        PADDLE_ENFORCE_EQ(
+            (label_dims.size() == 3UL && label_dims[2] == 1) ||
+                label_dims.size() == 2UL,
+            true,
+            "The Input(Label) should be a 3-D tensor with last dimension "
+            "fixed to 1 or a 2-D tensor in padding mode.");
+      } else {
+        PADDLE_ENFORCE_EQ((label_dims.size() == 2UL && label_dims[1] == 1) ||
+                              label_dims.size() == 1UL,
+                          true,
+                          "The Input(Label) should be a 2-D tensor with last "
+                          "dimension fixed to 1 or a 1-D tensor.");
+      }
      if (ctx->IsRuntime() || (emission_dims[0] > 0 && label_dims[0] > 0)) {
        PADDLE_ENFORCE_EQ(
            emission_dims[0], label_dims[0],
-            "The height of Input(Emission) and the height of Input(Label) "
+            "The first dimension of Input(Emission) and Input(Label) "
            "should be the same.");
      }
    }

--- a/paddle/fluid/operators/crf_decoding_op.h
+++ b/paddle/fluid/operators/crf_decoding_op.h
@@ -46,23 +46,34 @@ class CRFDecodingOpKernel : public framework::OpKernel<T> {
      const int64_t* length_data = length->data<int64_t>();
      auto in_dims = emission_weights->dims();

-      auto& dev_ctx = ctx.template device_context<DeviceContext>();
-      framework::Tensor emission_weights_tmp =
-          ctx.AllocateTmpTensor<T, DeviceContext>(emission_weights->dims(),
-                                                  dev_ctx);
-      emission_weights_tmp.ShareDataWith(*emission_weights);
+      Tensor emission_weights_tmp = *emission_weights;
      emission_weights_tmp.Resize({in_dims[0] * in_dims[1], in_dims[2]});

      decoded_path->Resize({in_dims[0] * in_dims[1], 1});
      for (size_t i = 0; i < seq_num; ++i) {
        if (length_data[i] == 0) continue;
-        int start_pos = i * in_dims[1];
-        int end_pos = start_pos + static_cast<int>(length_data[i]);
+        int64_t start_pos = i * in_dims[1];
+        int64_t end_pos = start_pos + static_cast<int64_t>(length_data[i]);
        Tensor decoded_path_one_seq = decoded_path->Slice(start_pos, end_pos);
        Decode(emission_weights_tmp.Slice(start_pos, end_pos),
               *transition_weights, &decoded_path_one_seq);
      }
      decoded_path->Resize({in_dims[0], in_dims[1]});
+
+      if (label) {
+        const int64_t* label_value = label->data<int64_t>();
+        for (size_t i = 0; i < seq_num; ++i) {
+          for (int64_t j = 0; j < in_dims[1]; ++j) {
+            int64_t start_pos = i * in_dims[1];
+            if (j < length_data[i]) {
+              path[start_pos + j] =
+                  label_value[start_pos + j] == path[start_pos + j] ? 1 : 0;
+            } else {
+              path[start_pos + j] = 0;
+            }
+          }
+        }
+      }
    } else {
      PADDLE_ENFORCE_EQ(emission_weights->NumLevels(), 1UL,
                        "The Input(Emission) should be a sequence.");
@@ -73,22 +84,20 @@ class CRFDecodingOpKernel : public framework::OpKernel<T> {

      for (size_t i = 0; i < seq_num; ++i) {
        if (lod[level][i] == lod[level][i + 1]) continue;
-        int start_pos = static_cast<int>(lod[level][i]);
-        int end_pos = static_cast<int>(lod[level][i + 1]);
+        int64_t start_pos = static_cast<int64_t>(lod[level][i]);
+        int64_t end_pos = static_cast<int64_t>(lod[level][i + 1]);
        Tensor decoded_path_one_seq = decoded_path->Slice(start_pos, end_pos);
        Decode(emission_weights->Slice(start_pos, end_pos), *transition_weights,
               &decoded_path_one_seq);
      }
-    }
-    if (label) {
-      if (!has_length) {
+      if (label) {
        PADDLE_ENFORCE_EQ(label->NumLevels(), 1UL,
                          "The Input(Label) should be a sequence.");
-      }
-      const int64_t* label_value = label->data<int64_t>();
-      size_t numel = label->numel();
-      for (size_t i = 0; i < numel; ++i) {
-        path[i] = label_value[i] == path[i] ? 1 : 0;
+        const int64_t* label_value = label->data<int64_t>();
+        size_t numel = label->numel();
+        for (size_t i = 0; i < numel; ++i) {
+          path[i] = label_value[i] == path[i] ? 1 : 0;
+        }
      }
    }
  }

--- a/paddle/fluid/operators/linear_chain_crf_op.cc
+++ b/paddle/fluid/operators/linear_chain_crf_op.cc
@@ -22,13 +22,14 @@ namespace operators {
 class LinearChainCRFOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
-    AddInput("Emission",
-             "(LoDTensor/Tensor<float>). When a LoDTensor input,A 2-D LoDTensor"
-             " with shape [N x D], where N is the size of the "
-             "mini-batch and D is the total tag number. The unscaled emission "
-             "weight matrix for the linear chain CRF. When a Tensor input,"
-             "A Tensor with shape [N x S x D], where N is batch number,"
-             "S is max length of sequences, D is the total tag number.");
+    AddInput(
+        "Emission",
+        "(LoDTensor/Tensor<float>). When a LoDTensor input, A 2-D LoDTensor"
+        " with shape [N x D], where N is the size of the "
+        "mini-batch and D is the total tag number. The unscaled emission "
+        "weight matrix for the linear chain CRF. When a Tensor input,"
+        "A Tensor with shape [N x S x D], where N is batch size,"
+        "S is max length of sequences, D is the total tag number.");
    AddInput("Transition",
             "(Tensor, default Tensor<float>) A 2-D Tensor with shape "
             "[(D + 2) x D]. The learnable parameter for the linear_chain_crf "
@@ -38,7 +39,7 @@ class LinearChainCRFOpMaker : public framework::OpProtoAndCheckerMaker {
             "[N x 1], where N is the total element number in a mini-batch. "
             "when a Tensor input, [N x S], where N is batch number. "
             "S is max length of sequences. The ground truth.");
-    AddInput("length",
+    AddInput("Length",
             "(Tensor, default Tensor<int64_t>) A Tensor with shape "
             "[M x 1], where M is the sequence number in a mini-batch.")
        .AsDispensable();
@@ -169,12 +170,16 @@ class LinearChainCRFOp : public framework::OperatorWithKernel {
    auto emission_dims = ctx->GetInputDim("Emission");
    PADDLE_ENFORCE_NE(emission_dims[0], 0,
                      "An empty mini-batch is not allowed.");
-    if (ctx->HasInput("length")) {
+    if (ctx->HasInput("Length")) {
      PADDLE_ENFORCE_EQ(emission_dims.size(), 3,
                        "The Input(Emission) should be a 3-D tensor.");
      auto label_dims = ctx->GetInputDim("Label");
-      PADDLE_ENFORCE_EQ(label_dims.size(), 3,
-                        "The Input(Label) should be a 3-D tensor");
+      PADDLE_ENFORCE_EQ(
+          (label_dims.size() == 3UL && label_dims[2] == 1) ||
+              (label_dims.size() == 2UL),
+          true,
+          "The Input(Label) should be a 3-D tensor with last "
+          "dimension fixed to 1 or a 2-D tensor in padding mode.");
      PADDLE_INFERSHAPE_ENFORCE_EQ(
          ctx, emission_dims[0], label_dims[0],
          "The batch size of Input(Emission) and Input(Label) "
@@ -249,7 +254,7 @@ class LinearChainCRFGradOp : public framework::OperatorWithKernel {

    auto emission_exps_dims = ctx->GetInputDim("EmissionExps");
    auto label_dims = ctx->GetInputDim("Label");
-    if (ctx->HasInput("length")) {
+    if (ctx->HasInput("Length")) {
      PADDLE_ENFORCE_EQ(emission_exps_dims.size(), 3,
                        "The Input(EmissionExps) should be a 3-D tensor.");
      PADDLE_INFERSHAPE_ENFORCE_EQ(
@@ -281,7 +286,7 @@ class LinearChainCRFGradOp : public framework::OperatorWithKernel {

    if (ctx->HasOutput(framework::GradVarName("Emission"))) {
      ctx->SetOutputDim(framework::GradVarName("Emission"), emission_exps_dims);
-      if (ctx->HasInput("length") == false) {
+      if (ctx->HasInput("Length") == false) {
        ctx->ShareLoD("Emission", framework::GradVarName("Emission"));
      }
    }
@@ -320,8 +325,8 @@ class LinearChainCRFGradDescMaker : public framework::SingleGradOpDescMaker {
    op->SetInput("Alpha", Output("Alpha"));
    op->SetInput("EmissionExps", Output("EmissionExps"));
    op->SetInput("TransitionExps", Output("TransitionExps"));
-    if (ForwardOp().Inputs().count("length") > 0) {
-      op->SetInput("length", Input("length"));
+    if (ForwardOp().Inputs().count("Length") > 0) {
+      op->SetInput("Length", Input("Length"));
    }
    op->SetInput(framework::GradVarName("LogLikelihood"),
                 OutputGrad("LogLikelihood"));

--- a/paddle/fluid/operators/linear_chain_crf_op.h
+++ b/paddle/fluid/operators/linear_chain_crf_op.h
@@ -65,62 +65,51 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> {

    // Because the computation codes only runs on CPU, here the memory for all
    // the outputs is FIXED to be allocated on the CPU memory.
-    auto* emission_exps_data =
-        emission_exps->mutable_data<T>(platform::CPUPlace());
-    auto* alpha_data = alpha->mutable_data<T>(platform::CPUPlace());
+    emission_exps->mutable_data<T>(platform::CPUPlace());
+    alpha->mutable_data<T>(platform::CPUPlace());
    transition_exps->mutable_data<T>(platform::CPUPlace());
-    // Resize the output tensor to its correct dimension.
-    memset(emission_exps_data, 0, emission_exps->numel() * sizeof(T));
-    memset(alpha_data, 0, alpha->numel() * sizeof(T));
    auto emission_dims = emission_weights->dims();

    const Tensor* label = ctx.Input<framework::Tensor>("Label");
-    auto& dev_ctx = ctx.template device_context<DeviceContext>();
-    Tensor emission_weights_tmp = ctx.AllocateTmpTensor<T, DeviceContext>(
-        emission_weights->dims(), dev_ctx);
-    emission_weights_tmp.ShareDataWith(*emission_weights);
-    Tensor label_tmp =
-        ctx.AllocateTmpTensor<T, DeviceContext>(label->dims(), dev_ctx);
-    label_tmp.ShareDataWith(*label);
-    Tensor emission_exps_tmp =
-        ctx.AllocateTmpTensor<T, DeviceContext>(emission_exps->dims(), dev_ctx);
-    emission_exps_tmp.ShareDataWith(*emission_exps);
-    Tensor alpha_tmp =
-        ctx.AllocateTmpTensor<T, DeviceContext>(alpha->dims(), dev_ctx);
-    alpha_tmp.ShareDataWith(*alpha);
-    size_t seq_num = 0;
-    size_t batch_size;
-    size_t tag_num;
+    Tensor emission_weights_tmp = *emission_weights;
+    Tensor label_tmp = *label;
+    Tensor emission_exps_tmp = *emission_exps;
+    Tensor alpha_tmp = *alpha;
+    int64_t seq_num = 0;
+    int64_t batch_size;
+    int64_t tag_num;
    const int64_t* length_data = nullptr;
-    framework::Vector<size_t> in_lod;
-    if (ctx.HasInput("length")) {
-      const Tensor* label_length = ctx.Input<framework::Tensor>("length");
+    framework::LoD in_lod;
+    if (ctx.HasInput("Length")) {
+      const Tensor* label_length = ctx.Input<framework::Tensor>("Length");
      length_data = label_length->data<int64_t>();
      seq_num = label_length->numel();
-      batch_size = emission_dims[0] * emission_dims[1];
-      tag_num = emission_dims[2];
-      emission_weights_tmp.Resize(
-          {emission_dims[0] * emission_dims[1], emission_dims[2]});
-      auto label_dims = label->dims();
-      label_tmp.Resize({label_dims[0] * label_dims[1], label_dims[2]});
-      alpha_tmp.Resize({emission_dims[0] * emission_dims[1], emission_dims[2]});
-      emission_exps_tmp.Resize(
-          {emission_dims[0] * emission_dims[1], emission_dims[2]});
      PADDLE_ENFORCE_EQ(seq_num, emission_dims[0],
                        "the size of Input(length) must be equal to "
                        "emission_dims[0].");
+      auto label_dims = label->dims();
      PADDLE_ENFORCE_EQ(seq_num, label_dims[0],
                        "the size of Input(length) must be equal to "
                        "label_dims[0].");
+
+      batch_size = emission_dims[0] * emission_dims[1];
+      tag_num = emission_dims[2];
+      emission_weights_tmp.Resize({batch_size, tag_num});
+      label_tmp.Resize({batch_size, 1});
+      alpha_tmp.Resize({batch_size, tag_num});
+      emission_exps_tmp.Resize({batch_size, tag_num});
+      math::set_constant(ctx.device_context(), emission_exps, 0.0);
+      math::set_constant(ctx.device_context(), alpha, 0.0);
    } else {
-      seq_num = ctx.Input<LoDTensor>("Label")->lod()[0].size() - 1;
+      in_lod = ctx.Input<LoDTensor>("Label")->lod();
+      PADDLE_ENFORCE_NE(in_lod.size(), 0, "Input(Label) must be a sequence.");
+      seq_num = in_lod[0].size() - 1;
      batch_size = emission_dims[0];
      tag_num = emission_dims[1];
-      in_lod = ctx.Input<LoDTensor>("Label")->lod()[0];
-      PADDLE_ENFORCE_NE(in_lod.size(), 0, "Input(Label) must be a sequence.");
    }

-    ll->Resize({static_cast<int>(seq_num), 1});
+    // Resize the output tensor to its correct dimension.
+    ll->Resize({seq_num, 1});
    ll->mutable_data<T>(platform::CPUPlace());
    // Now, all the inputs and outputs should be on the CPU memory.
    Tensor emission_row_max;
@@ -141,16 +130,15 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> {
    auto w_exps = EigenMatrix<T>::From(*transition_exps);
    w_exps.device(place) = w.exp();
    T* log_likelihood = ll->data<T>();
-    for (size_t i = 0; i < seq_num; ++i) {
-      int start_pos = 0;
-      int end_pos = 0;
-      if (ctx.HasInput("length")) {
-        if (length_data[i] == 0) continue;
+    for (int64_t i = 0; i < seq_num; ++i) {
+      int64_t start_pos = 0;
+      int64_t end_pos = 0;
+      if (ctx.HasInput("Length")) {
        start_pos = i * emission_dims[1];
-        end_pos = start_pos + static_cast<int>(length_data[i]);
+        end_pos = start_pos + length_data[i];
      } else {
-        start_pos = static_cast<int>(in_lod[i]);
-        end_pos = static_cast<int>(in_lod[i + 1]);
+        start_pos = static_cast<int64_t>(in_lod[0][i]);
+        end_pos = static_cast<int64_t>(in_lod[0][i + 1]);
      }
      if (end_pos == start_pos) {
        // If an empty input sequence is given, pad 0 for its cost.
@@ -239,44 +227,35 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel<T> {
    const Tensor* alpha = ctx.Input<Tensor>("Alpha");
    const T* ll_grad =
        ctx.Input<Tensor>(framework::GradVarName("LogLikelihood"))->data<T>();
-    auto& dev_ctx = ctx.template device_context<DeviceContext>();
    Tensor* emission_grad =
        ctx.Output<Tensor>(framework::GradVarName("Emission"));
    auto* emission_grad_data =
        emission_grad->mutable_data<T>(platform::CPUPlace());
    memset(emission_grad_data, 0, emission_grad->numel() * sizeof(T));
-    Tensor alpha_tmp =
-        ctx.AllocateTmpTensor<T, DeviceContext>(alpha->dims(), dev_ctx);
-    alpha_tmp.ShareDataWith(*alpha);
-    Tensor label_tmp =
-        ctx.AllocateTmpTensor<T, DeviceContext>(label->dims(), dev_ctx);
-    label_tmp.ShareDataWith(*label);
-    Tensor emission_exps_tmp =
-        ctx.AllocateTmpTensor<T, DeviceContext>(emission_exps->dims(), dev_ctx);
-    emission_exps_tmp.ShareDataWith(*emission_exps);
-    Tensor emission_grad_tmp =
-        ctx.AllocateTmpTensor<T, DeviceContext>(emission_grad->dims(), dev_ctx);
-    emission_grad_tmp.ShareDataWith(*emission_grad);
+    Tensor alpha_tmp = *alpha;
+    Tensor label_tmp = *label;
+    Tensor emission_exps_tmp = *emission_exps;
+    Tensor emission_grad_tmp = *emission_grad;
    // getting seq_num  using padding or not
-    size_t seq_num = 0;
-    framework::Vector<size_t> lod;
+    int64_t seq_num = 0;
+    framework::LoD in_lod;
    const int64_t* length_data = nullptr;
-    if (ctx.HasInput("length")) {
-      const Tensor* label_length = ctx.Input<framework::Tensor>("length");
+    if (ctx.HasInput("Length")) {
+      const Tensor* label_length = ctx.Input<framework::Tensor>("Length");
      length_data = label_length->data<int64_t>();
      seq_num = label_length->numel();
      auto emission_dims = emission_grad->dims();
      auto label_dims = label->dims();
      emission_grad_tmp.Resize(
          {emission_dims[0] * emission_dims[1], emission_dims[2]});
-      label_tmp.Resize({label_dims[0] * label_dims[1], label_dims[2]});
+      label_tmp.Resize({label_dims[0] * label_dims[1], 1});
      alpha_tmp.Resize({emission_dims[0] * emission_dims[1], emission_dims[2]});
      emission_exps_tmp.Resize(
          {emission_dims[0] * emission_dims[1], emission_dims[2]});
    } else {
-      seq_num = ctx.Input<LoDTensor>("Label")->lod()[0].size() - 1;
-      lod = ctx.Input<LoDTensor>("Label")->lod()[0];
-      PADDLE_ENFORCE_NE(lod.size(), 0, "Input(Label) must be a sequence.");
+      in_lod = ctx.Input<LoDTensor>("Label")->lod();
+      PADDLE_ENFORCE_NE(in_lod.size(), 0, "Input(Label) must be a sequence.");
+      seq_num = static_cast<int64_t>(in_lod[0].size() - 1);
    }

    Tensor* transition_grad =
@@ -295,21 +274,24 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel<T> {
    // captures the unnormalized probabilities of partial sequences starting
    // at position i.
    Tensor beta;
-    auto* beta_data = beta.mutable_data<T>(emission_dims, platform::CPUPlace());
-    memset(beta_data, 0, beta.numel() * sizeof(T));
-    if (ctx.HasInput("length")) {
+    beta.mutable_data<T>(emission_dims, platform::CPUPlace());
+    if (ctx.HasInput("Length")) {
      beta.Resize({emission_dims[0] * emission_dims[1], emission_dims[2]});
    }
-    for (size_t i = 0; i < seq_num; ++i) {
-      int start_pos = 0;
-      int end_pos = 0;
-      if (ctx.HasInput("length")) {
-        if (length_data[i] == 0) continue;
+
+    for (int64_t i = 0; i < seq_num; ++i) {
+      int64_t start_pos = 0;
+      int64_t end_pos = 0;
+      if (ctx.HasInput("Length")) {
        start_pos = i * emission_dims[1];
-        end_pos = start_pos + static_cast<int>(length_data[i]);
+        end_pos = start_pos + length_data[i];
      } else {
-        start_pos = static_cast<int>(lod[i]);
-        end_pos = static_cast<int>(lod[i + 1]);
+        start_pos = static_cast<int64_t>(in_lod[0][i]);
+        end_pos = static_cast<int64_t>(in_lod[0][i + 1]);
+      }
+
+      if (end_pos == start_pos) {
+        continue;
      }
      const Tensor one_seq_emission_exps =
          emission_exps_tmp.Slice(start_pos, end_pos);

--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -1491,7 +1491,7 @@ def linear_chain_crf(input, label, param_attr=None, length=None):
            print(transition)
    """
    helper = LayerHelper('linear_chain_crf', **locals())
-    size = input.shape[1]
+    size = input.shape[2] if length else input.shape[1]
    transition = helper.create_parameter(
        attr=helper.param_attr,
        shape=[size + 2, size],
@@ -1510,7 +1510,7 @@ def linear_chain_crf(input, label, param_attr=None, length=None):
        "Label": [label]
    }
    if length:
-        this_inputs['length'] = [length]
+        this_inputs['Length'] = [length]
    helper.append_op(
        type='linear_chain_crf',
        inputs=this_inputs,
@@ -1525,7 +1525,7 @@ def linear_chain_crf(input, label, param_attr=None, length=None):


 @templatedoc()
-def crf_decoding(input, param_attr, label=None):
+def crf_decoding(input, param_attr, label=None, length=None):
    """
    ${comment}

@@ -1535,6 +1535,8 @@ def crf_decoding(input, param_attr, label=None):
        param_attr(ParamAttr): The parameter attribute for training.

        label(${label_type}): ${label_comment}
+        
+        label(${length_type}): ${length_comment}

    Returns:
        Variable: ${viterbi_path_comment}
@@ -1543,23 +1545,41 @@ def crf_decoding(input, param_attr, label=None):
        .. code-block:: python

           import paddle.fluid as fluid
-           images = fluid.layers.data(name='pixel', shape=[784], dtype='float32')
-           label = fluid.layers.data(name='label', shape=[1], dtype='int32')
-           hidden = fluid.layers.fc(input=images, size=2)
-           crf = fluid.layers.linear_chain_crf(input=hidden, label=label, 
+
+           # LoDTensor-based example
+           num_labels = 10
+           feature = fluid.layers.data(name='word_emb', shape=[784], dtype='float32', lod_level=1)
+           label = fluid.layers.data(name='label', shape=[1], dtype='int64', lod_level=1)
+           emission = fluid.layers.fc(input=feature, size=num_labels)
+           
+           crf_cost = fluid.layers.linear_chain_crf(input=emission, label=label, 
                     param_attr=fluid.ParamAttr(name="crfw"))
-           crf_decode = fluid.layers.crf_decoding(input=hidden, 
+           crf_decode = fluid.layers.crf_decoding(input=emission, 
                     param_attr=fluid.ParamAttr(name="crfw"))
+
+           # Common tensor example
+           num_labels, max_len = 10, 20
+           feature = fluid.layers.data(name='word_emb_pad', shape=[max_len, 784], dtype='float32')
+           label = fluid.layers.data(name='label_pad', shape=[max_len, 1], dtype='int64')
+           length = fluid.layers.data(name='length', shape=[1], dtype='int64')
+           emission = fluid.layers.fc(input=feature, size=num_labels,
+                                      num_flatten_dims=2)
+           
+           crf_cost = fluid.layers.linear_chain_crf(input=emission, label=label, length=length, 
+                     param_attr=fluid.ParamAttr(name="crfw_pad"))
+           crf_decode = fluid.layers.crf_decoding(input=emission, length=length,
+                     param_attr=fluid.ParamAttr(name="crfw_pad"))
    """
    helper = LayerHelper('crf_decoding', **locals())
    transition = helper.get_parameter(param_attr.name)
    viterbi_path = helper.create_variable_for_type_inference(
        dtype=helper.input_dtype())
+    inputs = {"Emission": [input], "Transition": transition, "Label": label}
+    if length:
+        inputs['Length'] = length
    helper.append_op(
        type='crf_decoding',
-        inputs={"Emission": [input],
-                "Transition": transition,
-                "Label": label},
+        inputs=inputs,
        outputs={"ViterbiPath": [viterbi_path]})

    return viterbi_path

--- a/python/paddle/fluid/tests/unittests/test_crf_decoding_op.py
+++ b/python/paddle/fluid/tests/unittests/test_crf_decoding_op.py
@@ -176,22 +176,23 @@ class TestCRFDecodingOp4(TestCRFDecodingOp2):
        self.lod = [[0, 2, 3, 0]]


+def seq_pad(data, length):
+    max_len = np.max(length)
+    shape = [len(length), max_len] + list(data.shape[1:])
+    padded = np.zeros(shape).astype(data.dtype)
+    offset = 0
+    for i, l in enumerate(length):
+        padded[i, 0:l] = data[offset:offset + l]
+        offset += l
+    return np.squeeze(padded)
+
+
 class TestCRFDecodingOp5(OpTest):
    """
    Compare the dynamic program with random generated parameters and inputs
    with grouth truth not being given.
    """

-    def seq_pad(self, data, length):
-        max_len = np.max(length)
-        shape = [len(length), max_len] + list(data.shape[1:])
-        padded = np.zeros(shape).astype(data.dtype)
-        offset = 0
-        for i, l in enumerate(length):
-            padded[i, 0:l] = data[offset:offset + l]
-            offset += l
-        return np.squeeze(padded)
-
    def set_test_data(self):
        SEQ_NUM = 3
        TAG_NUM = 17
@@ -208,7 +209,7 @@ class TestCRFDecodingOp5(OpTest):
                                       [TAG_NUM + 2, TAG_NUM]).astype("float64")

        self.inputs = {
-            "Emission": self.seq_pad(emission, lod[0]),
+            "Emission": seq_pad(emission, lod[0]),
            "Transition": transition,
            "Length": np.array(lod).astype('int64'),
        }
@@ -216,7 +217,7 @@ class TestCRFDecodingOp5(OpTest):
        decoder = CRFDecoding(emission, transition, lod[0])
        decoded_path = decoder.decode()

-        self.outputs = {"ViterbiPath": self.seq_pad(decoded_path, lod[0])}
+        self.outputs = {"ViterbiPath": seq_pad(decoded_path, lod[0])}

    def setUp(self):
        self.op_type = "crf_decoding"
@@ -226,5 +227,45 @@ class TestCRFDecodingOp5(OpTest):
        self.check_output()


+class TestCRFDecodingOp6(OpTest):
+    def init_lod(self):
+        self.lod = [[1, 2, 3, 4]]
+
+    def setUp(self):
+        self.op_type = "crf_decoding"
+        TAG_NUM = 5
+
+        self.init_lod()
+        total_len = sum(self.lod[-1])
+        transition = np.repeat(
+            np.arange(
+                TAG_NUM, dtype="float64").reshape(1, TAG_NUM),
+            TAG_NUM + 2,
+            axis=0)
+        emission = np.repeat(
+            np.arange(
+                TAG_NUM, dtype="float64").reshape(1, TAG_NUM),
+            total_len,
+            axis=0)
+
+        labels = np.random.randint(
+            low=0, high=TAG_NUM, size=(total_len, 1), dtype="int64")
+        predicted_labels = np.ones(
+            (total_len, 1), dtype="int64") * (TAG_NUM - 1)
+        expected_output = (labels == predicted_labels).astype("int64")
+
+        self.inputs = {
+            "Emission": seq_pad(emission, self.lod[0]),
+            "Transition": transition,
+            "Label": seq_pad(labels, self.lod[0]),
+            "Length": np.array(self.lod).astype('int64'),
+        }
+
+        self.outputs = {"ViterbiPath": seq_pad(expected_output, self.lod[0])}
+
+    def test_check_output(self):
+        self.check_output()
+
+
 if __name__ == "__main__":
    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -2556,21 +2556,46 @@ class TestBook(LayerTest):
                    input=fc_out, size=4 * hidden_dim, proj_size=proj_dim))

    def test_linear_chain_crf(self):
-        # TODO(minqiyang): dygraph do not support lod now
        with self.static_graph():
            label_dict_len = 10
-            images = layers.data(name='pixel', shape=[784], dtype='float32')
-            label = layers.data(name='label', shape=[1], dtype='int32')
-            hidden = layers.fc(input=images, size=2)
+            feature = layers.data(name='feature', shape=[784], dtype='float32')
+            label = layers.data(name='label', shape=[1], dtype='int64')
+            emission = layers.fc(input=feature, size=10)
            crf = layers.linear_chain_crf(
-                input=hidden, label=label, param_attr=ParamAttr(name="crfw"))
+                input=emission, label=label, param_attr=ParamAttr(name="crfw"))
+            crf_decode = layers.crf_decoding(
+                input=emission, param_attr=ParamAttr(name="crfw"))
+            self.assertFalse(crf is None)
+            self.assertFalse(crf_decode is None)
+            return layers.chunk_eval(
+                input=crf_decode,
+                label=label,
+                chunk_scheme="IOB",
+                num_chunk_types=(label_dict_len - 1) // 2)
+
+    def test_linear_chain_crf_padding(self):
+        with self.static_graph():
+            label_dict_len, max_len = 10, 20
+            feature = layers.data(
+                name='feature', shape=[max_len, 784], dtype='float32')
+            label = layers.data(name='label', shape=[max_len], dtype='int64')
+            length = layers.data(name='length', shape=[1], dtype='int64')
+            emission = layers.fc(input=feature, size=10, num_flatten_dims=2)
+            crf = layers.linear_chain_crf(
+                input=emission,
+                label=label,
+                length=length,
+                param_attr=ParamAttr(name="crfw"))
            crf_decode = layers.crf_decoding(
-                input=hidden, param_attr=ParamAttr(name="crfw"))
+                input=emission,
+                length=length,
+                param_attr=ParamAttr(name="crfw"))
            self.assertFalse(crf is None)
            self.assertFalse(crf_decode is None)
            return layers.chunk_eval(
                input=crf_decode,
                label=label,
+                seq_length=length,
                chunk_scheme="IOB",
                num_chunk_types=(label_dict_len - 1) // 2)


--- a/python/paddle/fluid/tests/unittests/test_linear_chain_crf_op.py
+++ b/python/paddle/fluid/tests/unittests/test_linear_chain_crf_op.py
@@ -205,7 +205,7 @@ class TestLinearChainCrfPaddingTensor(OpTest):
            "Emission": self.seq_pad(emission, lod[0]),
            "Transition": transition,
            "Label": self.seq_pad(labels, lod[0]),
-            "length": np.array(lod).astype("int64")
+            "Length": np.array(lod).astype("int64")
        }
        crf = LinearChainCrfForward(seq_start_pos, emission, emission_row_max,
                                    emission_exps, transition, transition_exps,