diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index c472bca325b909848d163a9d50aefba890a126bd..d14167bdf7603187d058b242c138d4446b644e30 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -116,7 +116,7 @@ paddle.fluid.layers.dynamic_lstmp (ArgSpec(args=['input', 'size', 'proj_size', ' paddle.fluid.layers.dynamic_gru (ArgSpec(args=['input', 'size', 'param_attr', 'bias_attr', 'is_reverse', 'gate_activation', 'candidate_activation', 'h_0', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, False, 'sigmoid', 'tanh', None, False)), ('document', '83617c165827e030636c80486d5de6f3')) paddle.fluid.layers.gru_unit (ArgSpec(args=['input', 'hidden', 'size', 'param_attr', 'bias_attr', 'activation', 'gate_activation', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, 'tanh', 'sigmoid', False)), ('document', '33974b9bfa69f2f1eb85e6f956dff04e')) paddle.fluid.layers.linear_chain_crf (ArgSpec(args=['input', 'label', 'param_attr'], varargs=None, keywords=None, defaults=(None,)), ('document', '34f96be41684b0959897a9e735997e20')) -paddle.fluid.layers.crf_decoding (ArgSpec(args=['input', 'param_attr', 'label'], varargs=None, keywords=None, defaults=(None,)), ('document', 'c469f22029f7b5d41ecd44dfa1e81ffd')) +paddle.fluid.layers.crf_decoding (ArgSpec(args=['input', 'param_attr', 'label'], varargs=None, keywords=None, defaults=(None,)), ('document', '5ce117258e243be1c81539e254178d90')) paddle.fluid.layers.cos_sim (ArgSpec(args=['X', 'Y'], varargs=None, keywords=None, defaults=None), ('document', '8e6ce424cf9e261ef32ee229c06a6e66')) paddle.fluid.layers.cross_entropy (ArgSpec(args=['input', 'label', 'soft_label', 'ignore_index'], varargs=None, keywords=None, defaults=(False, -100)), ('document', 'f43c659ca1749a3f0ff2231e6dfda07d')) paddle.fluid.layers.bpr_loss (ArgSpec(args=['input', 'label', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6263dfdeb6c670fa0922c9cbc8fb1bf4')) diff --git a/paddle/fluid/operators/crf_decoding_op.cc b/paddle/fluid/operators/crf_decoding_op.cc index c701e895af00baffe49838d130d451319ae42c46..2de714e0d4615c9c65c29dd76524f4760433e1ee 100644 --- a/paddle/fluid/operators/crf_decoding_op.cc +++ b/paddle/fluid/operators/crf_decoding_op.cc @@ -19,14 +19,17 @@ namespace operators { class CRFDecodingOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { - AddInput("Emission", - "(LoDTensor, default: LoDTensor). A LoDTensor with shape " - "[N x D] where N is the size of the mini-batch and D is the total " - "tag number. This input is the unscaled emission weight matrix of " - "the linear_chain_crf operator."); + AddInput( + "Emission", + "(Tensor/LoDTensor). For a LoDTensor input, its " + "shape is [N x D] where N is the total sequence length of the " + "mini-batch and D is the total tag number. While for a tensor " + "input, its shape is [B X S X D] with B the batch size and S the " + "sequence length of each sample after padding. This input is the " + "unscaled emission weight matrix of the linear_chain_crf operator."); AddInput( "Transition", - "(Tensor, default: Tensor). A Tensor with shape [(D + 2) x D]. " + "(Tensor). A Tensor with shape [(D + 2) x D]. " "This input is the transition weights learned by the linear_chain_crf " "operator, denoted as w. The 1st row of w are transition weights for " "the start mask. The 2nd row of w are transition weights for the end " @@ -34,15 +37,24 @@ class CRFDecodingOpMaker : public framework::OpProtoAndCheckerMaker { "w. See more details in comments of the linear_chain_crf operator."); AddInput( "Label", - "(LoDTensor, LoDTensor). The ground truth with shape " - "[N x 1]. This input is optional. See more details in the operator's " - "comments.") + "(Tensor/LoDTensor). The ground truth with shape " + "[N x 1] (for LoDTensor) or [B x S] (for Tensor). This input is " + "optional. " + "See more details in the operator's comments.") .AsDispensable(); AddOutput( "ViterbiPath", - "(LoDTensor, LoDTensor). The decoding results. What to " + "(Tensor/LoDTensor). The decoding results. What to " "return changes depending on whether the Input(Label) (the ground " "truth) is given. See more details in the operator's comment."); + AddInput("Length", + "(Tensor). The actual length of each sample before " + "padding with shape [B x 1]. It means the Input(Emission), " + "Input(Label) " + "and Output(ViterbiPath) are common tensors with padding when " + "this input " + "is given.") + .AsDispensable(); AddComment(R"DOC( The crf_decoding operator reads the emission feature weights and the transition feature weights learned by the linear_chain_crf operator. It implements the @@ -55,15 +67,16 @@ The output of this operator changes according to whether Input(Label) is given: 1. Input(Label) is given: This happens in training. This operator is used to co-work with the chunk_eval operator. - When Input(Label) is given, the crf_decoding operator returns a row vector - with shape [N x 1] whose values are fixed to be 0, indicating an incorrect - prediction, or 1 indicating a tag is correctly predicted. Such an output is the - input to chunk_eval operator. + When Input(Label) is given, the crf_decoding operator returns tensor with the + sampe shape as Input(Label) whose values are fixed to be 0, indicating an + incorrect prediction, or 1 indicating a tag is correctly predicted. Such an + output is the input to chunk_eval operator. 2. Input(Label) is not given: This is the standard decoding process. -The crf_decoding operator returns a row vector with shape [N x 1] whose values +The crf_decoding operator returns a row vector with shape [N x 1]/[B x S], here +the shape depends on the inputs are LoDTensors or common tensors, whose values range from 0 to maximum tag number - 1, Each element indicates an index of a predicted tag. )DOC"); @@ -75,37 +88,46 @@ class CRFDecodingOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("Emission"), - "Input(Emission) should be not null."); - PADDLE_ENFORCE(ctx->HasInput("Transition"), - "Input(Transition) should be not null."); + PADDLE_ENFORCE_EQ(ctx->HasInput("Emission"), true, + "Input(Emission) should be not null."); + PADDLE_ENFORCE_EQ(ctx->HasInput("Transition"), true, + "Input(Transition) should be not null."); - PADDLE_ENFORCE(ctx->HasOutput("ViterbiPath"), - "Output(ViterbiPath) should be not null."); + PADDLE_ENFORCE_EQ(ctx->HasOutput("ViterbiPath"), true, + "Output(ViterbiPath) should be not null."); auto emission_dims = ctx->GetInputDim("Emission"); - PADDLE_ENFORCE_EQ(emission_dims.size(), 2, - "The Input(Emission) should be a 2-D tensor."); - PADDLE_ENFORCE(emission_dims[0], "An empty mini-batch is not allowed."); + bool has_length = ctx->HasInput("Length"); + + if (has_length) { + PADDLE_ENFORCE_EQ(emission_dims.size(), 3, + "The Input(Emission) should be a 3-D tensor."); + } else { + PADDLE_ENFORCE_EQ(emission_dims.size(), 2, + "The Input(Emission) should be a 2-D tensor."); + } + PADDLE_ENFORCE_NE(emission_dims[0], 0, + "An empty mini-batch is not allowed."); auto transition_dims = ctx->GetInputDim("Transition"); - PADDLE_ENFORCE_EQ(transition_dims.size(), 2, + PADDLE_ENFORCE_EQ(transition_dims.size(), 2UL, "The Input(Transition) should be a 2-D tensor."); PADDLE_ENFORCE_EQ( transition_dims[0] - 2, transition_dims[1], "An invalid dimension for the Input(Transition), which should " "be a 2-D tensor with shape [(D + 2) x D]."); - if (ctx->IsRuntime() || (emission_dims[1] > 0 && transition_dims[1] > 0)) { + if (ctx->IsRuntime() || (emission_dims[emission_dims.size() - 1] > 0 && + transition_dims[transition_dims.size() - 1] > 0)) { PADDLE_ENFORCE_EQ( - emission_dims[1], transition_dims[1], - "The 2nd dimension of the Input(Emission) and the Input(Transition) " + emission_dims[emission_dims.size() - 1], + transition_dims[transition_dims.size() - 1], + "The last dimension of the Input(Emission) and the Input(Transition) " "should be equal to the tag number."); } if (ctx->HasInput("Label")) { auto label_dims = ctx->GetInputDim("Label"); - PADDLE_ENFORCE(label_dims.size() == 2UL && label_dims[1] == 1UL, - "The Input(Label) should be a 2-D tensor with the 2nd " - "dimensions fixed to 1."); + PADDLE_ENFORCE_EQ(label_dims.size(), 2UL, + "The Input(Label) should be a 2-D tensor"); if (ctx->IsRuntime() || (emission_dims[0] > 0 && label_dims[0] > 0)) { PADDLE_ENFORCE_EQ( emission_dims[0], label_dims[0], @@ -115,7 +137,11 @@ class CRFDecodingOp : public framework::OperatorWithKernel { } ctx->ShareLoD("Emission", /*->*/ "ViterbiPath"); - ctx->SetOutputDim("ViterbiPath", {emission_dims[0], 1}); + if (has_length) { + ctx->SetOutputDim("ViterbiPath", {emission_dims[0], emission_dims[1]}); + } else { + ctx->SetOutputDim("ViterbiPath", {emission_dims[0], 1}); + } } protected: diff --git a/paddle/fluid/operators/crf_decoding_op.h b/paddle/fluid/operators/crf_decoding_op.h index 13a587dc4b9a96d263c3137ef9a7576e111fdca2..74b9cb20a9d8606db081d3005e9b6aacdf03708f 100644 --- a/paddle/fluid/operators/crf_decoding_op.h +++ b/paddle/fluid/operators/crf_decoding_op.h @@ -35,31 +35,59 @@ class CRFDecodingOpKernel : public framework::OpKernel { auto* label = ctx.Input("Label"); auto* decoded_path = ctx.Output("ViterbiPath"); - PADDLE_ENFORCE_EQ(emission_weights->NumLevels(), 1UL, - "The Input(Emission) should be a sequence."); - auto lod = emission_weights->lod(); - PADDLE_ENFORCE(lod.size(), "Input(Emission) must be a sequence."); - const size_t level = 0; - const size_t seq_num = lod[level].size() - 1; - int64_t* path = decoded_path->mutable_data(platform::CPUPlace()); math::SetConstant()( ctx.template device_context(), decoded_path, 0); - for (size_t i = 0; i < seq_num; ++i) { - if (lod[level][i] == lod[level][i + 1]) continue; - int start_pos = static_cast(lod[level][i]); - int end_pos = static_cast(lod[level][i + 1]); - Tensor decoded_path_one_seq = decoded_path->Slice(start_pos, end_pos); - Decode(emission_weights->Slice(start_pos, end_pos), *transition_weights, - &decoded_path_one_seq); - } + bool has_length = ctx.HasInput("Length"); + if (has_length) { + auto* length = ctx.Input("Length"); + const size_t seq_num = length->numel(); + const int64_t* length_data = length->data(); + auto in_dims = emission_weights->dims(); + + auto& dev_ctx = ctx.template device_context(); + framework::Tensor emission_weights_tmp = + ctx.AllocateTmpTensor(emission_weights->dims(), + dev_ctx); + emission_weights_tmp.ShareDataWith(*emission_weights); + emission_weights_tmp.Resize({in_dims[0] * in_dims[1], in_dims[2]}); + + decoded_path->Resize({in_dims[0] * in_dims[1], 1}); + for (size_t i = 0; i < seq_num; ++i) { + if (length_data[i] == 0) continue; + int start_pos = i * in_dims[1]; + int end_pos = start_pos + static_cast(length_data[i]); + Tensor decoded_path_one_seq = decoded_path->Slice(start_pos, end_pos); + Decode(emission_weights_tmp.Slice(start_pos, end_pos), + *transition_weights, &decoded_path_one_seq); + } + decoded_path->Resize({in_dims[0], in_dims[1]}); + } else { + PADDLE_ENFORCE_EQ(emission_weights->NumLevels(), 1UL, + "The Input(Emission) should be a sequence."); + auto lod = emission_weights->lod(); + PADDLE_ENFORCE_GT(lod.size(), 0, "Input(Emission) must be a sequence."); + const size_t level = 0; + const size_t seq_num = lod[level].size() - 1; + + for (size_t i = 0; i < seq_num; ++i) { + if (lod[level][i] == lod[level][i + 1]) continue; + int start_pos = static_cast(lod[level][i]); + int end_pos = static_cast(lod[level][i + 1]); + Tensor decoded_path_one_seq = decoded_path->Slice(start_pos, end_pos); + Decode(emission_weights->Slice(start_pos, end_pos), *transition_weights, + &decoded_path_one_seq); + } + } if (label) { - PADDLE_ENFORCE_EQ(label->NumLevels(), 1UL, - "The Input(Label) should be a sequence."); + if (!has_length) { + PADDLE_ENFORCE_EQ(label->NumLevels(), 1UL, + "The Input(Label) should be a sequence."); + } const int64_t* label_value = label->data(); - size_t batch_size = emission_weights->dims()[0]; - for (size_t i = 0; i < batch_size; ++i) { + size_t numel = label->numel(); + for (size_t i = 0; i < numel; ++i) { path[i] = label_value[i] == path[i] ? 1 : 0; } } diff --git a/python/paddle/fluid/tests/unittests/test_crf_decoding_op.py b/python/paddle/fluid/tests/unittests/test_crf_decoding_op.py index 89af7210760b88a362649571282873903be60395..5c8682a0756910897b0a708d20cc41690d870db3 100644 --- a/python/paddle/fluid/tests/unittests/test_crf_decoding_op.py +++ b/python/paddle/fluid/tests/unittests/test_crf_decoding_op.py @@ -176,5 +176,55 @@ class TestCRFDecodingOp4(TestCRFDecodingOp2): self.lod = [[0, 2, 3, 0]] +class TestCRFDecodingOp5(OpTest): + """ + Compare the dynamic program with random generated parameters and inputs + with grouth truth not being given. + """ + + def seq_pad(self, data, length): + max_len = np.max(length) + shape = [len(length), max_len] + list(data.shape[1:]) + padded = np.zeros(shape).astype(data.dtype) + offset = 0 + for i, l in enumerate(length): + padded[i, 0:l] = data[offset:offset + l] + offset += l + return np.squeeze(padded) + + def set_test_data(self): + SEQ_NUM = 3 + TAG_NUM = 17 + MAX_SEQ_LEN = 10 + + lod = [[]] + total_len = 0 + for i in range(SEQ_NUM): + lod[-1].append(random.randint(1, MAX_SEQ_LEN)) + total_len += lod[-1][-1] + emission = np.random.uniform(-1, 1, + [total_len, TAG_NUM]).astype("float64") + transition = np.random.uniform(-0.5, 0.5, + [TAG_NUM + 2, TAG_NUM]).astype("float64") + + self.inputs = { + "Emission": self.seq_pad(emission, lod[0]), + "Transition": transition, + "Length": np.array(lod).astype('int64'), + } + + decoder = CRFDecoding(emission, transition, lod[0]) + decoded_path = decoder.decode() + + self.outputs = {"ViterbiPath": self.seq_pad(decoded_path, lod[0])} + + def setUp(self): + self.op_type = "crf_decoding" + self.set_test_data() + + def test_check_output(self): + self.check_output() + + if __name__ == "__main__": unittest.main()