From 01ad8d2e069bdd149d3bcbb07e73ab784374a501 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Mon, 7 Oct 2019 10:51:32 +0800 Subject: [PATCH] Refactor linear chain crf op & crf decoding op (#19982) * Update crf_decoding api & example test=develop * Update api spec test=develop * Fix linear chain crf api test=develop * Avoid sharing data pointer with input test=develop * Simplify the logic in linear_chain_crf_decoding * Add unittest for crf_decoding when label & path both are set test=develop * Update API spec test=develop * Add unittest for layers && correct infer_shape in chunk_eval test=develop --- paddle/fluid/API.spec | 4 +- paddle/fluid/operators/chunk_eval_op.cc | 52 ++++--- paddle/fluid/operators/crf_decoding_op.cc | 21 ++- paddle/fluid/operators/crf_decoding_op.h | 43 +++--- paddle/fluid/operators/linear_chain_crf_op.cc | 35 +++-- paddle/fluid/operators/linear_chain_crf_op.h | 138 ++++++++---------- python/paddle/fluid/layers/nn.py | 42 ++++-- .../tests/unittests/test_crf_decoding_op.py | 65 +++++++-- .../fluid/tests/unittests/test_layers.py | 37 ++++- .../unittests/test_linear_chain_crf_op.py | 2 +- 10 files changed, 270 insertions(+), 169 deletions(-) mode change 100755 => 100644 paddle/fluid/operators/linear_chain_crf_op.h diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 05b73730a23..26616f77201 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -132,8 +132,8 @@ paddle.fluid.layers.dynamic_lstm (ArgSpec(args=['input', 'size', 'h_0', 'c_0', ' paddle.fluid.layers.dynamic_lstmp (ArgSpec(args=['input', 'size', 'proj_size', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'proj_activation', 'dtype', 'name', 'h_0', 'c_0', 'cell_clip', 'proj_clip'], varargs=None, keywords=None, defaults=(None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'tanh', 'float32', None, None, None, None, None)), ('document', 'c37d51aad655c8a9f9b045c64717320a')) paddle.fluid.layers.dynamic_gru (ArgSpec(args=['input', 'size', 'param_attr', 'bias_attr', 'is_reverse', 'gate_activation', 'candidate_activation', 'h_0', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, False, 'sigmoid', 'tanh', None, False)), ('document', '83617c165827e030636c80486d5de6f3')) paddle.fluid.layers.gru_unit (ArgSpec(args=['input', 'hidden', 'size', 'param_attr', 'bias_attr', 'activation', 'gate_activation', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, 'tanh', 'sigmoid', False)), ('document', '33974b9bfa69f2f1eb85e6f956dff04e')) -paddle.fluid.layers.linear_chain_crf (ArgSpec(args=['input', 'label', 'param_attr', 'length'], varargs=None, keywords=None, defaults=(None, None)), ('document', '9045b8971e4232132ec9952695f4c3ae')) -paddle.fluid.layers.crf_decoding (ArgSpec(args=['input', 'param_attr', 'label'], varargs=None, keywords=None, defaults=(None,)), ('document', '5ce117258e243be1c81539e254178d90')) +paddle.fluid.layers.linear_chain_crf (ArgSpec(args=['input', 'label', 'param_attr', 'length'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'bc7a0fd2bb2b35dfd2f54947320e78fa')) +paddle.fluid.layers.crf_decoding (ArgSpec(args=['input', 'param_attr', 'label', 'length'], varargs=None, keywords=None, defaults=(None, None)), ('document', '933b7e268c4ffa3d5c3ef953a5ee9f0b')) paddle.fluid.layers.cos_sim (ArgSpec(args=['X', 'Y'], varargs=None, keywords=None, defaults=None), ('document', '8e6ce424cf9e261ef32ee229c06a6e66')) paddle.fluid.layers.cross_entropy (ArgSpec(args=['input', 'label', 'soft_label', 'ignore_index'], varargs=None, keywords=None, defaults=(False, -100)), ('document', '789a141e97fd0b37241f630935936d08')) paddle.fluid.layers.bpr_loss (ArgSpec(args=['input', 'label', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6263dfdeb6c670fa0922c9cbc8fb1bf4')) diff --git a/paddle/fluid/operators/chunk_eval_op.cc b/paddle/fluid/operators/chunk_eval_op.cc index 21dfaf912a1..2987deda54e 100644 --- a/paddle/fluid/operators/chunk_eval_op.cc +++ b/paddle/fluid/operators/chunk_eval_op.cc @@ -24,37 +24,45 @@ class ChunkEvalOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext *ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("Inference"), - "Input(Inference) of ChunkEvalOp should not be null."); - PADDLE_ENFORCE(ctx->HasInput("Label"), - "Input(Label) of ChunkEvalOp should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("Precision"), - "Output(Precision) of ChunkEvalOp should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("Recall"), - "Output(Recall) of ChunkEvalOp should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("F1-Score"), - "Output(F1-Score) of ChunkEvalOp should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("NumInferChunks"), - "Output(NumInferChunks) of ChunkEvalOp should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("NumLabelChunks"), - "Output(NumLabelChunks) of ChunkEvalOp should not be null."); - PADDLE_ENFORCE( - ctx->HasOutput("NumCorrectChunks"), + PADDLE_ENFORCE_EQ(ctx->HasInput("Inference"), true, + "Input(Inference) of ChunkEvalOp should not be null."); + PADDLE_ENFORCE_EQ(ctx->HasInput("Label"), true, + "Input(Label) of ChunkEvalOp should not be null."); + PADDLE_ENFORCE_EQ(ctx->HasOutput("Precision"), true, + "Output(Precision) of ChunkEvalOp should not be null."); + PADDLE_ENFORCE_EQ(ctx->HasOutput("Recall"), true, + "Output(Recall) of ChunkEvalOp should not be null."); + PADDLE_ENFORCE_EQ(ctx->HasOutput("F1-Score"), true, + "Output(F1-Score) of ChunkEvalOp should not be null."); + PADDLE_ENFORCE_EQ( + ctx->HasOutput("NumInferChunks"), true, + "Output(NumInferChunks) of ChunkEvalOp should not be null."); + PADDLE_ENFORCE_EQ( + ctx->HasOutput("NumLabelChunks"), true, + "Output(NumLabelChunks) of ChunkEvalOp should not be null."); + PADDLE_ENFORCE_EQ( + ctx->HasOutput("NumCorrectChunks"), true, "Output(NumCorrectChunks) of ChunkEvalOp should not be null."); auto inference_dim = ctx->GetInputDim("Inference"); auto label_dim = ctx->GetInputDim("Label"); - PADDLE_ENFORCE(inference_dim == label_dim, - "Inference's shape must be the same as Label's shape."); + PADDLE_ENFORCE_EQ( + inference_dim, label_dim, + "Input(Inference)'s shape must be the same as Input(Label)'s shape."); bool use_padding = ctx->HasInput("SeqLength"); if (use_padding) { - PADDLE_ENFORCE(inference_dim.size() == 3, - "when SeqLength is provided, Inference should be of dim 3 " - "(batch, bucket, 1)"); + PADDLE_ENFORCE_EQ((inference_dim.size() == 3 && inference_dim[2] == 1) || + inference_dim.size() == 2, + true, + "when Input(SeqLength) is provided, Input(Inference) " + "should be of dim 3 (batch_size, bucket, 1) or dim 2 " + "(batch_size, bucket)."); auto seq_length_dim = ctx->GetInputDim("SeqLength"); - PADDLE_ENFORCE(seq_length_dim.size() == 1, "seq_length should be rank 1"); + PADDLE_ENFORCE_LE( + seq_length_dim.size(), 2, + "Input(SeqLength)'s rank should not be greater than 2."); } ctx->SetOutputDim("Precision", {1}); diff --git a/paddle/fluid/operators/crf_decoding_op.cc b/paddle/fluid/operators/crf_decoding_op.cc index 2de714e0d46..671623dcee6 100644 --- a/paddle/fluid/operators/crf_decoding_op.cc +++ b/paddle/fluid/operators/crf_decoding_op.cc @@ -39,8 +39,7 @@ class CRFDecodingOpMaker : public framework::OpProtoAndCheckerMaker { "Label", "(Tensor/LoDTensor). The ground truth with shape " "[N x 1] (for LoDTensor) or [B x S] (for Tensor). This input is " - "optional. " - "See more details in the operator's comments.") + "optional. See more details in the operator's comments.") .AsDispensable(); AddOutput( "ViterbiPath", @@ -126,12 +125,24 @@ class CRFDecodingOp : public framework::OperatorWithKernel { } if (ctx->HasInput("Label")) { auto label_dims = ctx->GetInputDim("Label"); - PADDLE_ENFORCE_EQ(label_dims.size(), 2UL, - "The Input(Label) should be a 2-D tensor"); + if (ctx->HasInput("Length")) { + PADDLE_ENFORCE_EQ( + (label_dims.size() == 3UL && label_dims[2] == 1) || + label_dims.size() == 2UL, + true, + "The Input(Label) should be a 3-D tensor with last dimension " + "fixed to 1 or a 2-D tensor in padding mode."); + } else { + PADDLE_ENFORCE_EQ((label_dims.size() == 2UL && label_dims[1] == 1) || + label_dims.size() == 1UL, + true, + "The Input(Label) should be a 2-D tensor with last " + "dimension fixed to 1 or a 1-D tensor."); + } if (ctx->IsRuntime() || (emission_dims[0] > 0 && label_dims[0] > 0)) { PADDLE_ENFORCE_EQ( emission_dims[0], label_dims[0], - "The height of Input(Emission) and the height of Input(Label) " + "The first dimension of Input(Emission) and Input(Label) " "should be the same."); } } diff --git a/paddle/fluid/operators/crf_decoding_op.h b/paddle/fluid/operators/crf_decoding_op.h index 74b9cb20a9d..eb868602ff4 100644 --- a/paddle/fluid/operators/crf_decoding_op.h +++ b/paddle/fluid/operators/crf_decoding_op.h @@ -46,23 +46,34 @@ class CRFDecodingOpKernel : public framework::OpKernel { const int64_t* length_data = length->data(); auto in_dims = emission_weights->dims(); - auto& dev_ctx = ctx.template device_context(); - framework::Tensor emission_weights_tmp = - ctx.AllocateTmpTensor(emission_weights->dims(), - dev_ctx); - emission_weights_tmp.ShareDataWith(*emission_weights); + Tensor emission_weights_tmp = *emission_weights; emission_weights_tmp.Resize({in_dims[0] * in_dims[1], in_dims[2]}); decoded_path->Resize({in_dims[0] * in_dims[1], 1}); for (size_t i = 0; i < seq_num; ++i) { if (length_data[i] == 0) continue; - int start_pos = i * in_dims[1]; - int end_pos = start_pos + static_cast(length_data[i]); + int64_t start_pos = i * in_dims[1]; + int64_t end_pos = start_pos + static_cast(length_data[i]); Tensor decoded_path_one_seq = decoded_path->Slice(start_pos, end_pos); Decode(emission_weights_tmp.Slice(start_pos, end_pos), *transition_weights, &decoded_path_one_seq); } decoded_path->Resize({in_dims[0], in_dims[1]}); + + if (label) { + const int64_t* label_value = label->data(); + for (size_t i = 0; i < seq_num; ++i) { + for (int64_t j = 0; j < in_dims[1]; ++j) { + int64_t start_pos = i * in_dims[1]; + if (j < length_data[i]) { + path[start_pos + j] = + label_value[start_pos + j] == path[start_pos + j] ? 1 : 0; + } else { + path[start_pos + j] = 0; + } + } + } + } } else { PADDLE_ENFORCE_EQ(emission_weights->NumLevels(), 1UL, "The Input(Emission) should be a sequence."); @@ -73,22 +84,20 @@ class CRFDecodingOpKernel : public framework::OpKernel { for (size_t i = 0; i < seq_num; ++i) { if (lod[level][i] == lod[level][i + 1]) continue; - int start_pos = static_cast(lod[level][i]); - int end_pos = static_cast(lod[level][i + 1]); + int64_t start_pos = static_cast(lod[level][i]); + int64_t end_pos = static_cast(lod[level][i + 1]); Tensor decoded_path_one_seq = decoded_path->Slice(start_pos, end_pos); Decode(emission_weights->Slice(start_pos, end_pos), *transition_weights, &decoded_path_one_seq); } - } - if (label) { - if (!has_length) { + if (label) { PADDLE_ENFORCE_EQ(label->NumLevels(), 1UL, "The Input(Label) should be a sequence."); - } - const int64_t* label_value = label->data(); - size_t numel = label->numel(); - for (size_t i = 0; i < numel; ++i) { - path[i] = label_value[i] == path[i] ? 1 : 0; + const int64_t* label_value = label->data(); + size_t numel = label->numel(); + for (size_t i = 0; i < numel; ++i) { + path[i] = label_value[i] == path[i] ? 1 : 0; + } } } } diff --git a/paddle/fluid/operators/linear_chain_crf_op.cc b/paddle/fluid/operators/linear_chain_crf_op.cc index ed09c64ffda..a056670692f 100644 --- a/paddle/fluid/operators/linear_chain_crf_op.cc +++ b/paddle/fluid/operators/linear_chain_crf_op.cc @@ -22,13 +22,14 @@ namespace operators { class LinearChainCRFOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { - AddInput("Emission", - "(LoDTensor/Tensor). When a LoDTensor input,A 2-D LoDTensor" - " with shape [N x D], where N is the size of the " - "mini-batch and D is the total tag number. The unscaled emission " - "weight matrix for the linear chain CRF. When a Tensor input," - "A Tensor with shape [N x S x D], where N is batch number," - "S is max length of sequences, D is the total tag number."); + AddInput( + "Emission", + "(LoDTensor/Tensor). When a LoDTensor input, A 2-D LoDTensor" + " with shape [N x D], where N is the size of the " + "mini-batch and D is the total tag number. The unscaled emission " + "weight matrix for the linear chain CRF. When a Tensor input," + "A Tensor with shape [N x S x D], where N is batch size," + "S is max length of sequences, D is the total tag number."); AddInput("Transition", "(Tensor, default Tensor) A 2-D Tensor with shape " "[(D + 2) x D]. The learnable parameter for the linear_chain_crf " @@ -38,7 +39,7 @@ class LinearChainCRFOpMaker : public framework::OpProtoAndCheckerMaker { "[N x 1], where N is the total element number in a mini-batch. " "when a Tensor input, [N x S], where N is batch number. " "S is max length of sequences. The ground truth."); - AddInput("length", + AddInput("Length", "(Tensor, default Tensor) A Tensor with shape " "[M x 1], where M is the sequence number in a mini-batch.") .AsDispensable(); @@ -169,12 +170,16 @@ class LinearChainCRFOp : public framework::OperatorWithKernel { auto emission_dims = ctx->GetInputDim("Emission"); PADDLE_ENFORCE_NE(emission_dims[0], 0, "An empty mini-batch is not allowed."); - if (ctx->HasInput("length")) { + if (ctx->HasInput("Length")) { PADDLE_ENFORCE_EQ(emission_dims.size(), 3, "The Input(Emission) should be a 3-D tensor."); auto label_dims = ctx->GetInputDim("Label"); - PADDLE_ENFORCE_EQ(label_dims.size(), 3, - "The Input(Label) should be a 3-D tensor"); + PADDLE_ENFORCE_EQ( + (label_dims.size() == 3UL && label_dims[2] == 1) || + (label_dims.size() == 2UL), + true, + "The Input(Label) should be a 3-D tensor with last " + "dimension fixed to 1 or a 2-D tensor in padding mode."); PADDLE_INFERSHAPE_ENFORCE_EQ( ctx, emission_dims[0], label_dims[0], "The batch size of Input(Emission) and Input(Label) " @@ -249,7 +254,7 @@ class LinearChainCRFGradOp : public framework::OperatorWithKernel { auto emission_exps_dims = ctx->GetInputDim("EmissionExps"); auto label_dims = ctx->GetInputDim("Label"); - if (ctx->HasInput("length")) { + if (ctx->HasInput("Length")) { PADDLE_ENFORCE_EQ(emission_exps_dims.size(), 3, "The Input(EmissionExps) should be a 3-D tensor."); PADDLE_INFERSHAPE_ENFORCE_EQ( @@ -281,7 +286,7 @@ class LinearChainCRFGradOp : public framework::OperatorWithKernel { if (ctx->HasOutput(framework::GradVarName("Emission"))) { ctx->SetOutputDim(framework::GradVarName("Emission"), emission_exps_dims); - if (ctx->HasInput("length") == false) { + if (ctx->HasInput("Length") == false) { ctx->ShareLoD("Emission", framework::GradVarName("Emission")); } } @@ -320,8 +325,8 @@ class LinearChainCRFGradDescMaker : public framework::SingleGradOpDescMaker { op->SetInput("Alpha", Output("Alpha")); op->SetInput("EmissionExps", Output("EmissionExps")); op->SetInput("TransitionExps", Output("TransitionExps")); - if (ForwardOp().Inputs().count("length") > 0) { - op->SetInput("length", Input("length")); + if (ForwardOp().Inputs().count("Length") > 0) { + op->SetInput("Length", Input("Length")); } op->SetInput(framework::GradVarName("LogLikelihood"), OutputGrad("LogLikelihood")); diff --git a/paddle/fluid/operators/linear_chain_crf_op.h b/paddle/fluid/operators/linear_chain_crf_op.h old mode 100755 new mode 100644 index 8cd3cdadc91..488cbc6d517 --- a/paddle/fluid/operators/linear_chain_crf_op.h +++ b/paddle/fluid/operators/linear_chain_crf_op.h @@ -65,62 +65,51 @@ class LinearChainCRFOpKernel : public framework::OpKernel { // Because the computation codes only runs on CPU, here the memory for all // the outputs is FIXED to be allocated on the CPU memory. - auto* emission_exps_data = - emission_exps->mutable_data(platform::CPUPlace()); - auto* alpha_data = alpha->mutable_data(platform::CPUPlace()); + emission_exps->mutable_data(platform::CPUPlace()); + alpha->mutable_data(platform::CPUPlace()); transition_exps->mutable_data(platform::CPUPlace()); - // Resize the output tensor to its correct dimension. - memset(emission_exps_data, 0, emission_exps->numel() * sizeof(T)); - memset(alpha_data, 0, alpha->numel() * sizeof(T)); auto emission_dims = emission_weights->dims(); const Tensor* label = ctx.Input("Label"); - auto& dev_ctx = ctx.template device_context(); - Tensor emission_weights_tmp = ctx.AllocateTmpTensor( - emission_weights->dims(), dev_ctx); - emission_weights_tmp.ShareDataWith(*emission_weights); - Tensor label_tmp = - ctx.AllocateTmpTensor(label->dims(), dev_ctx); - label_tmp.ShareDataWith(*label); - Tensor emission_exps_tmp = - ctx.AllocateTmpTensor(emission_exps->dims(), dev_ctx); - emission_exps_tmp.ShareDataWith(*emission_exps); - Tensor alpha_tmp = - ctx.AllocateTmpTensor(alpha->dims(), dev_ctx); - alpha_tmp.ShareDataWith(*alpha); - size_t seq_num = 0; - size_t batch_size; - size_t tag_num; + Tensor emission_weights_tmp = *emission_weights; + Tensor label_tmp = *label; + Tensor emission_exps_tmp = *emission_exps; + Tensor alpha_tmp = *alpha; + int64_t seq_num = 0; + int64_t batch_size; + int64_t tag_num; const int64_t* length_data = nullptr; - framework::Vector in_lod; - if (ctx.HasInput("length")) { - const Tensor* label_length = ctx.Input("length"); + framework::LoD in_lod; + if (ctx.HasInput("Length")) { + const Tensor* label_length = ctx.Input("Length"); length_data = label_length->data(); seq_num = label_length->numel(); - batch_size = emission_dims[0] * emission_dims[1]; - tag_num = emission_dims[2]; - emission_weights_tmp.Resize( - {emission_dims[0] * emission_dims[1], emission_dims[2]}); - auto label_dims = label->dims(); - label_tmp.Resize({label_dims[0] * label_dims[1], label_dims[2]}); - alpha_tmp.Resize({emission_dims[0] * emission_dims[1], emission_dims[2]}); - emission_exps_tmp.Resize( - {emission_dims[0] * emission_dims[1], emission_dims[2]}); PADDLE_ENFORCE_EQ(seq_num, emission_dims[0], "the size of Input(length) must be equal to " "emission_dims[0]."); + auto label_dims = label->dims(); PADDLE_ENFORCE_EQ(seq_num, label_dims[0], "the size of Input(length) must be equal to " "label_dims[0]."); + + batch_size = emission_dims[0] * emission_dims[1]; + tag_num = emission_dims[2]; + emission_weights_tmp.Resize({batch_size, tag_num}); + label_tmp.Resize({batch_size, 1}); + alpha_tmp.Resize({batch_size, tag_num}); + emission_exps_tmp.Resize({batch_size, tag_num}); + math::set_constant(ctx.device_context(), emission_exps, 0.0); + math::set_constant(ctx.device_context(), alpha, 0.0); } else { - seq_num = ctx.Input("Label")->lod()[0].size() - 1; + in_lod = ctx.Input("Label")->lod(); + PADDLE_ENFORCE_NE(in_lod.size(), 0, "Input(Label) must be a sequence."); + seq_num = in_lod[0].size() - 1; batch_size = emission_dims[0]; tag_num = emission_dims[1]; - in_lod = ctx.Input("Label")->lod()[0]; - PADDLE_ENFORCE_NE(in_lod.size(), 0, "Input(Label) must be a sequence."); } - ll->Resize({static_cast(seq_num), 1}); + // Resize the output tensor to its correct dimension. + ll->Resize({seq_num, 1}); ll->mutable_data(platform::CPUPlace()); // Now, all the inputs and outputs should be on the CPU memory. Tensor emission_row_max; @@ -141,16 +130,15 @@ class LinearChainCRFOpKernel : public framework::OpKernel { auto w_exps = EigenMatrix::From(*transition_exps); w_exps.device(place) = w.exp(); T* log_likelihood = ll->data(); - for (size_t i = 0; i < seq_num; ++i) { - int start_pos = 0; - int end_pos = 0; - if (ctx.HasInput("length")) { - if (length_data[i] == 0) continue; + for (int64_t i = 0; i < seq_num; ++i) { + int64_t start_pos = 0; + int64_t end_pos = 0; + if (ctx.HasInput("Length")) { start_pos = i * emission_dims[1]; - end_pos = start_pos + static_cast(length_data[i]); + end_pos = start_pos + length_data[i]; } else { - start_pos = static_cast(in_lod[i]); - end_pos = static_cast(in_lod[i + 1]); + start_pos = static_cast(in_lod[0][i]); + end_pos = static_cast(in_lod[0][i + 1]); } if (end_pos == start_pos) { // If an empty input sequence is given, pad 0 for its cost. @@ -239,44 +227,35 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel { const Tensor* alpha = ctx.Input("Alpha"); const T* ll_grad = ctx.Input(framework::GradVarName("LogLikelihood"))->data(); - auto& dev_ctx = ctx.template device_context(); Tensor* emission_grad = ctx.Output(framework::GradVarName("Emission")); auto* emission_grad_data = emission_grad->mutable_data(platform::CPUPlace()); memset(emission_grad_data, 0, emission_grad->numel() * sizeof(T)); - Tensor alpha_tmp = - ctx.AllocateTmpTensor(alpha->dims(), dev_ctx); - alpha_tmp.ShareDataWith(*alpha); - Tensor label_tmp = - ctx.AllocateTmpTensor(label->dims(), dev_ctx); - label_tmp.ShareDataWith(*label); - Tensor emission_exps_tmp = - ctx.AllocateTmpTensor(emission_exps->dims(), dev_ctx); - emission_exps_tmp.ShareDataWith(*emission_exps); - Tensor emission_grad_tmp = - ctx.AllocateTmpTensor(emission_grad->dims(), dev_ctx); - emission_grad_tmp.ShareDataWith(*emission_grad); + Tensor alpha_tmp = *alpha; + Tensor label_tmp = *label; + Tensor emission_exps_tmp = *emission_exps; + Tensor emission_grad_tmp = *emission_grad; // getting seq_num using padding or not - size_t seq_num = 0; - framework::Vector lod; + int64_t seq_num = 0; + framework::LoD in_lod; const int64_t* length_data = nullptr; - if (ctx.HasInput("length")) { - const Tensor* label_length = ctx.Input("length"); + if (ctx.HasInput("Length")) { + const Tensor* label_length = ctx.Input("Length"); length_data = label_length->data(); seq_num = label_length->numel(); auto emission_dims = emission_grad->dims(); auto label_dims = label->dims(); emission_grad_tmp.Resize( {emission_dims[0] * emission_dims[1], emission_dims[2]}); - label_tmp.Resize({label_dims[0] * label_dims[1], label_dims[2]}); + label_tmp.Resize({label_dims[0] * label_dims[1], 1}); alpha_tmp.Resize({emission_dims[0] * emission_dims[1], emission_dims[2]}); emission_exps_tmp.Resize( {emission_dims[0] * emission_dims[1], emission_dims[2]}); } else { - seq_num = ctx.Input("Label")->lod()[0].size() - 1; - lod = ctx.Input("Label")->lod()[0]; - PADDLE_ENFORCE_NE(lod.size(), 0, "Input(Label) must be a sequence."); + in_lod = ctx.Input("Label")->lod(); + PADDLE_ENFORCE_NE(in_lod.size(), 0, "Input(Label) must be a sequence."); + seq_num = static_cast(in_lod[0].size() - 1); } Tensor* transition_grad = @@ -295,21 +274,24 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel { // captures the unnormalized probabilities of partial sequences starting // at position i. Tensor beta; - auto* beta_data = beta.mutable_data(emission_dims, platform::CPUPlace()); - memset(beta_data, 0, beta.numel() * sizeof(T)); - if (ctx.HasInput("length")) { + beta.mutable_data(emission_dims, platform::CPUPlace()); + if (ctx.HasInput("Length")) { beta.Resize({emission_dims[0] * emission_dims[1], emission_dims[2]}); } - for (size_t i = 0; i < seq_num; ++i) { - int start_pos = 0; - int end_pos = 0; - if (ctx.HasInput("length")) { - if (length_data[i] == 0) continue; + + for (int64_t i = 0; i < seq_num; ++i) { + int64_t start_pos = 0; + int64_t end_pos = 0; + if (ctx.HasInput("Length")) { start_pos = i * emission_dims[1]; - end_pos = start_pos + static_cast(length_data[i]); + end_pos = start_pos + length_data[i]; } else { - start_pos = static_cast(lod[i]); - end_pos = static_cast(lod[i + 1]); + start_pos = static_cast(in_lod[0][i]); + end_pos = static_cast(in_lod[0][i + 1]); + } + + if (end_pos == start_pos) { + continue; } const Tensor one_seq_emission_exps = emission_exps_tmp.Slice(start_pos, end_pos); diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index d5d8c2d89b1..261ebd9fb30 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -1491,7 +1491,7 @@ def linear_chain_crf(input, label, param_attr=None, length=None): print(transition) """ helper = LayerHelper('linear_chain_crf', **locals()) - size = input.shape[1] + size = input.shape[2] if length else input.shape[1] transition = helper.create_parameter( attr=helper.param_attr, shape=[size + 2, size], @@ -1510,7 +1510,7 @@ def linear_chain_crf(input, label, param_attr=None, length=None): "Label": [label] } if length: - this_inputs['length'] = [length] + this_inputs['Length'] = [length] helper.append_op( type='linear_chain_crf', inputs=this_inputs, @@ -1525,7 +1525,7 @@ def linear_chain_crf(input, label, param_attr=None, length=None): @templatedoc() -def crf_decoding(input, param_attr, label=None): +def crf_decoding(input, param_attr, label=None, length=None): """ ${comment} @@ -1535,6 +1535,8 @@ def crf_decoding(input, param_attr, label=None): param_attr(ParamAttr): The parameter attribute for training. label(${label_type}): ${label_comment} + + label(${length_type}): ${length_comment} Returns: Variable: ${viterbi_path_comment} @@ -1543,23 +1545,41 @@ def crf_decoding(input, param_attr, label=None): .. code-block:: python import paddle.fluid as fluid - images = fluid.layers.data(name='pixel', shape=[784], dtype='float32') - label = fluid.layers.data(name='label', shape=[1], dtype='int32') - hidden = fluid.layers.fc(input=images, size=2) - crf = fluid.layers.linear_chain_crf(input=hidden, label=label, + + # LoDTensor-based example + num_labels = 10 + feature = fluid.layers.data(name='word_emb', shape=[784], dtype='float32', lod_level=1) + label = fluid.layers.data(name='label', shape=[1], dtype='int64', lod_level=1) + emission = fluid.layers.fc(input=feature, size=num_labels) + + crf_cost = fluid.layers.linear_chain_crf(input=emission, label=label, param_attr=fluid.ParamAttr(name="crfw")) - crf_decode = fluid.layers.crf_decoding(input=hidden, + crf_decode = fluid.layers.crf_decoding(input=emission, param_attr=fluid.ParamAttr(name="crfw")) + + # Common tensor example + num_labels, max_len = 10, 20 + feature = fluid.layers.data(name='word_emb_pad', shape=[max_len, 784], dtype='float32') + label = fluid.layers.data(name='label_pad', shape=[max_len, 1], dtype='int64') + length = fluid.layers.data(name='length', shape=[1], dtype='int64') + emission = fluid.layers.fc(input=feature, size=num_labels, + num_flatten_dims=2) + + crf_cost = fluid.layers.linear_chain_crf(input=emission, label=label, length=length, + param_attr=fluid.ParamAttr(name="crfw_pad")) + crf_decode = fluid.layers.crf_decoding(input=emission, length=length, + param_attr=fluid.ParamAttr(name="crfw_pad")) """ helper = LayerHelper('crf_decoding', **locals()) transition = helper.get_parameter(param_attr.name) viterbi_path = helper.create_variable_for_type_inference( dtype=helper.input_dtype()) + inputs = {"Emission": [input], "Transition": transition, "Label": label} + if length: + inputs['Length'] = length helper.append_op( type='crf_decoding', - inputs={"Emission": [input], - "Transition": transition, - "Label": label}, + inputs=inputs, outputs={"ViterbiPath": [viterbi_path]}) return viterbi_path diff --git a/python/paddle/fluid/tests/unittests/test_crf_decoding_op.py b/python/paddle/fluid/tests/unittests/test_crf_decoding_op.py index 5c8682a0756..6f594d16074 100644 --- a/python/paddle/fluid/tests/unittests/test_crf_decoding_op.py +++ b/python/paddle/fluid/tests/unittests/test_crf_decoding_op.py @@ -176,22 +176,23 @@ class TestCRFDecodingOp4(TestCRFDecodingOp2): self.lod = [[0, 2, 3, 0]] +def seq_pad(data, length): + max_len = np.max(length) + shape = [len(length), max_len] + list(data.shape[1:]) + padded = np.zeros(shape).astype(data.dtype) + offset = 0 + for i, l in enumerate(length): + padded[i, 0:l] = data[offset:offset + l] + offset += l + return np.squeeze(padded) + + class TestCRFDecodingOp5(OpTest): """ Compare the dynamic program with random generated parameters and inputs with grouth truth not being given. """ - def seq_pad(self, data, length): - max_len = np.max(length) - shape = [len(length), max_len] + list(data.shape[1:]) - padded = np.zeros(shape).astype(data.dtype) - offset = 0 - for i, l in enumerate(length): - padded[i, 0:l] = data[offset:offset + l] - offset += l - return np.squeeze(padded) - def set_test_data(self): SEQ_NUM = 3 TAG_NUM = 17 @@ -208,7 +209,7 @@ class TestCRFDecodingOp5(OpTest): [TAG_NUM + 2, TAG_NUM]).astype("float64") self.inputs = { - "Emission": self.seq_pad(emission, lod[0]), + "Emission": seq_pad(emission, lod[0]), "Transition": transition, "Length": np.array(lod).astype('int64'), } @@ -216,7 +217,7 @@ class TestCRFDecodingOp5(OpTest): decoder = CRFDecoding(emission, transition, lod[0]) decoded_path = decoder.decode() - self.outputs = {"ViterbiPath": self.seq_pad(decoded_path, lod[0])} + self.outputs = {"ViterbiPath": seq_pad(decoded_path, lod[0])} def setUp(self): self.op_type = "crf_decoding" @@ -226,5 +227,45 @@ class TestCRFDecodingOp5(OpTest): self.check_output() +class TestCRFDecodingOp6(OpTest): + def init_lod(self): + self.lod = [[1, 2, 3, 4]] + + def setUp(self): + self.op_type = "crf_decoding" + TAG_NUM = 5 + + self.init_lod() + total_len = sum(self.lod[-1]) + transition = np.repeat( + np.arange( + TAG_NUM, dtype="float64").reshape(1, TAG_NUM), + TAG_NUM + 2, + axis=0) + emission = np.repeat( + np.arange( + TAG_NUM, dtype="float64").reshape(1, TAG_NUM), + total_len, + axis=0) + + labels = np.random.randint( + low=0, high=TAG_NUM, size=(total_len, 1), dtype="int64") + predicted_labels = np.ones( + (total_len, 1), dtype="int64") * (TAG_NUM - 1) + expected_output = (labels == predicted_labels).astype("int64") + + self.inputs = { + "Emission": seq_pad(emission, self.lod[0]), + "Transition": transition, + "Label": seq_pad(labels, self.lod[0]), + "Length": np.array(self.lod).astype('int64'), + } + + self.outputs = {"ViterbiPath": seq_pad(expected_output, self.lod[0])} + + def test_check_output(self): + self.check_output() + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index 920a212c6f7..8497be489e5 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -2556,21 +2556,46 @@ class TestBook(LayerTest): input=fc_out, size=4 * hidden_dim, proj_size=proj_dim)) def test_linear_chain_crf(self): - # TODO(minqiyang): dygraph do not support lod now with self.static_graph(): label_dict_len = 10 - images = layers.data(name='pixel', shape=[784], dtype='float32') - label = layers.data(name='label', shape=[1], dtype='int32') - hidden = layers.fc(input=images, size=2) + feature = layers.data(name='feature', shape=[784], dtype='float32') + label = layers.data(name='label', shape=[1], dtype='int64') + emission = layers.fc(input=feature, size=10) crf = layers.linear_chain_crf( - input=hidden, label=label, param_attr=ParamAttr(name="crfw")) + input=emission, label=label, param_attr=ParamAttr(name="crfw")) + crf_decode = layers.crf_decoding( + input=emission, param_attr=ParamAttr(name="crfw")) + self.assertFalse(crf is None) + self.assertFalse(crf_decode is None) + return layers.chunk_eval( + input=crf_decode, + label=label, + chunk_scheme="IOB", + num_chunk_types=(label_dict_len - 1) // 2) + + def test_linear_chain_crf_padding(self): + with self.static_graph(): + label_dict_len, max_len = 10, 20 + feature = layers.data( + name='feature', shape=[max_len, 784], dtype='float32') + label = layers.data(name='label', shape=[max_len], dtype='int64') + length = layers.data(name='length', shape=[1], dtype='int64') + emission = layers.fc(input=feature, size=10, num_flatten_dims=2) + crf = layers.linear_chain_crf( + input=emission, + label=label, + length=length, + param_attr=ParamAttr(name="crfw")) crf_decode = layers.crf_decoding( - input=hidden, param_attr=ParamAttr(name="crfw")) + input=emission, + length=length, + param_attr=ParamAttr(name="crfw")) self.assertFalse(crf is None) self.assertFalse(crf_decode is None) return layers.chunk_eval( input=crf_decode, label=label, + seq_length=length, chunk_scheme="IOB", num_chunk_types=(label_dict_len - 1) // 2) diff --git a/python/paddle/fluid/tests/unittests/test_linear_chain_crf_op.py b/python/paddle/fluid/tests/unittests/test_linear_chain_crf_op.py index b86d9586019..27ee3b08a4e 100755 --- a/python/paddle/fluid/tests/unittests/test_linear_chain_crf_op.py +++ b/python/paddle/fluid/tests/unittests/test_linear_chain_crf_op.py @@ -205,7 +205,7 @@ class TestLinearChainCrfPaddingTensor(OpTest): "Emission": self.seq_pad(emission, lod[0]), "Transition": transition, "Label": self.seq_pad(labels, lod[0]), - "length": np.array(lod).astype("int64") + "Length": np.array(lod).astype("int64") } crf = LinearChainCrfForward(seq_start_pos, emission, emission_row_max, emission_exps, transition, transition_exps, -- GitLab