未验证 提交 ea065e32 编写于 作者: Y Yibing Liu 提交者: GitHub

Refactor linear chain crf op & crf decoding op (#19982) (#20171)

* Update crf_decoding api & example

* Update api spec

* Fix linear chain crf api

* Avoid sharing data pointer with input

* Simplify the logic in linear_chain_crf_decoding

* Add unittest for crf_decoding when label & path both are set

* Update API spec

* Add unittest for layers && correct infer_shape in chunk_eval

test=release/1.6
上级 3b49372f
...@@ -132,8 +132,8 @@ paddle.fluid.layers.dynamic_lstm (ArgSpec(args=['input', 'size', 'h_0', 'c_0', ' ...@@ -132,8 +132,8 @@ paddle.fluid.layers.dynamic_lstm (ArgSpec(args=['input', 'size', 'h_0', 'c_0', '
paddle.fluid.layers.dynamic_lstmp (ArgSpec(args=['input', 'size', 'proj_size', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'proj_activation', 'dtype', 'name', 'h_0', 'c_0', 'cell_clip', 'proj_clip'], varargs=None, keywords=None, defaults=(None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'tanh', 'float32', None, None, None, None, None)), ('document', 'c37d51aad655c8a9f9b045c64717320a')) paddle.fluid.layers.dynamic_lstmp (ArgSpec(args=['input', 'size', 'proj_size', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'proj_activation', 'dtype', 'name', 'h_0', 'c_0', 'cell_clip', 'proj_clip'], varargs=None, keywords=None, defaults=(None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'tanh', 'float32', None, None, None, None, None)), ('document', 'c37d51aad655c8a9f9b045c64717320a'))
paddle.fluid.layers.dynamic_gru (ArgSpec(args=['input', 'size', 'param_attr', 'bias_attr', 'is_reverse', 'gate_activation', 'candidate_activation', 'h_0', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, False, 'sigmoid', 'tanh', None, False)), ('document', '83617c165827e030636c80486d5de6f3')) paddle.fluid.layers.dynamic_gru (ArgSpec(args=['input', 'size', 'param_attr', 'bias_attr', 'is_reverse', 'gate_activation', 'candidate_activation', 'h_0', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, False, 'sigmoid', 'tanh', None, False)), ('document', '83617c165827e030636c80486d5de6f3'))
paddle.fluid.layers.gru_unit (ArgSpec(args=['input', 'hidden', 'size', 'param_attr', 'bias_attr', 'activation', 'gate_activation', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, 'tanh', 'sigmoid', False)), ('document', '33974b9bfa69f2f1eb85e6f956dff04e')) paddle.fluid.layers.gru_unit (ArgSpec(args=['input', 'hidden', 'size', 'param_attr', 'bias_attr', 'activation', 'gate_activation', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, 'tanh', 'sigmoid', False)), ('document', '33974b9bfa69f2f1eb85e6f956dff04e'))
paddle.fluid.layers.linear_chain_crf (ArgSpec(args=['input', 'label', 'param_attr', 'length'], varargs=None, keywords=None, defaults=(None, None)), ('document', '9045b8971e4232132ec9952695f4c3ae')) paddle.fluid.layers.linear_chain_crf (ArgSpec(args=['input', 'label', 'param_attr', 'length'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'bc7a0fd2bb2b35dfd2f54947320e78fa'))
paddle.fluid.layers.crf_decoding (ArgSpec(args=['input', 'param_attr', 'label'], varargs=None, keywords=None, defaults=(None,)), ('document', '5ce117258e243be1c81539e254178d90')) paddle.fluid.layers.crf_decoding (ArgSpec(args=['input', 'param_attr', 'label', 'length'], varargs=None, keywords=None, defaults=(None, None)), ('document', '933b7e268c4ffa3d5c3ef953a5ee9f0b'))
paddle.fluid.layers.cos_sim (ArgSpec(args=['X', 'Y'], varargs=None, keywords=None, defaults=None), ('document', '8e6ce424cf9e261ef32ee229c06a6e66')) paddle.fluid.layers.cos_sim (ArgSpec(args=['X', 'Y'], varargs=None, keywords=None, defaults=None), ('document', '8e6ce424cf9e261ef32ee229c06a6e66'))
paddle.fluid.layers.cross_entropy (ArgSpec(args=['input', 'label', 'soft_label', 'ignore_index'], varargs=None, keywords=None, defaults=(False, -100)), ('document', '789a141e97fd0b37241f630935936d08')) paddle.fluid.layers.cross_entropy (ArgSpec(args=['input', 'label', 'soft_label', 'ignore_index'], varargs=None, keywords=None, defaults=(False, -100)), ('document', '789a141e97fd0b37241f630935936d08'))
paddle.fluid.layers.bpr_loss (ArgSpec(args=['input', 'label', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6263dfdeb6c670fa0922c9cbc8fb1bf4')) paddle.fluid.layers.bpr_loss (ArgSpec(args=['input', 'label', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6263dfdeb6c670fa0922c9cbc8fb1bf4'))
......
...@@ -24,37 +24,45 @@ class ChunkEvalOp : public framework::OperatorWithKernel { ...@@ -24,37 +24,45 @@ class ChunkEvalOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Inference"), PADDLE_ENFORCE_EQ(ctx->HasInput("Inference"), true,
"Input(Inference) of ChunkEvalOp should not be null."); "Input(Inference) of ChunkEvalOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Label"), PADDLE_ENFORCE_EQ(ctx->HasInput("Label"), true,
"Input(Label) of ChunkEvalOp should not be null."); "Input(Label) of ChunkEvalOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Precision"), PADDLE_ENFORCE_EQ(ctx->HasOutput("Precision"), true,
"Output(Precision) of ChunkEvalOp should not be null."); "Output(Precision) of ChunkEvalOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Recall"), PADDLE_ENFORCE_EQ(ctx->HasOutput("Recall"), true,
"Output(Recall) of ChunkEvalOp should not be null."); "Output(Recall) of ChunkEvalOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("F1-Score"), PADDLE_ENFORCE_EQ(ctx->HasOutput("F1-Score"), true,
"Output(F1-Score) of ChunkEvalOp should not be null."); "Output(F1-Score) of ChunkEvalOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("NumInferChunks"), PADDLE_ENFORCE_EQ(
"Output(NumInferChunks) of ChunkEvalOp should not be null."); ctx->HasOutput("NumInferChunks"), true,
PADDLE_ENFORCE(ctx->HasOutput("NumLabelChunks"), "Output(NumInferChunks) of ChunkEvalOp should not be null.");
"Output(NumLabelChunks) of ChunkEvalOp should not be null."); PADDLE_ENFORCE_EQ(
PADDLE_ENFORCE( ctx->HasOutput("NumLabelChunks"), true,
ctx->HasOutput("NumCorrectChunks"), "Output(NumLabelChunks) of ChunkEvalOp should not be null.");
PADDLE_ENFORCE_EQ(
ctx->HasOutput("NumCorrectChunks"), true,
"Output(NumCorrectChunks) of ChunkEvalOp should not be null."); "Output(NumCorrectChunks) of ChunkEvalOp should not be null.");
auto inference_dim = ctx->GetInputDim("Inference"); auto inference_dim = ctx->GetInputDim("Inference");
auto label_dim = ctx->GetInputDim("Label"); auto label_dim = ctx->GetInputDim("Label");
PADDLE_ENFORCE(inference_dim == label_dim, PADDLE_ENFORCE_EQ(
"Inference's shape must be the same as Label's shape."); inference_dim, label_dim,
"Input(Inference)'s shape must be the same as Input(Label)'s shape.");
bool use_padding = ctx->HasInput("SeqLength"); bool use_padding = ctx->HasInput("SeqLength");
if (use_padding) { if (use_padding) {
PADDLE_ENFORCE(inference_dim.size() == 3, PADDLE_ENFORCE_EQ((inference_dim.size() == 3 && inference_dim[2] == 1) ||
"when SeqLength is provided, Inference should be of dim 3 " inference_dim.size() == 2,
"(batch, bucket, 1)"); true,
"when Input(SeqLength) is provided, Input(Inference) "
"should be of dim 3 (batch_size, bucket, 1) or dim 2 "
"(batch_size, bucket).");
auto seq_length_dim = ctx->GetInputDim("SeqLength"); auto seq_length_dim = ctx->GetInputDim("SeqLength");
PADDLE_ENFORCE(seq_length_dim.size() == 1, "seq_length should be rank 1"); PADDLE_ENFORCE_LE(
seq_length_dim.size(), 2,
"Input(SeqLength)'s rank should not be greater than 2.");
} }
ctx->SetOutputDim("Precision", {1}); ctx->SetOutputDim("Precision", {1});
......
...@@ -39,8 +39,7 @@ class CRFDecodingOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -39,8 +39,7 @@ class CRFDecodingOpMaker : public framework::OpProtoAndCheckerMaker {
"Label", "Label",
"(Tensor<int64_t>/LoDTensor<int64_t>). The ground truth with shape " "(Tensor<int64_t>/LoDTensor<int64_t>). The ground truth with shape "
"[N x 1] (for LoDTensor) or [B x S] (for Tensor). This input is " "[N x 1] (for LoDTensor) or [B x S] (for Tensor). This input is "
"optional. " "optional. See more details in the operator's comments.")
"See more details in the operator's comments.")
.AsDispensable(); .AsDispensable();
AddOutput( AddOutput(
"ViterbiPath", "ViterbiPath",
...@@ -126,12 +125,24 @@ class CRFDecodingOp : public framework::OperatorWithKernel { ...@@ -126,12 +125,24 @@ class CRFDecodingOp : public framework::OperatorWithKernel {
} }
if (ctx->HasInput("Label")) { if (ctx->HasInput("Label")) {
auto label_dims = ctx->GetInputDim("Label"); auto label_dims = ctx->GetInputDim("Label");
PADDLE_ENFORCE_EQ(label_dims.size(), 2UL, if (ctx->HasInput("Length")) {
"The Input(Label) should be a 2-D tensor"); PADDLE_ENFORCE_EQ(
(label_dims.size() == 3UL && label_dims[2] == 1) ||
label_dims.size() == 2UL,
true,
"The Input(Label) should be a 3-D tensor with last dimension "
"fixed to 1 or a 2-D tensor in padding mode.");
} else {
PADDLE_ENFORCE_EQ((label_dims.size() == 2UL && label_dims[1] == 1) ||
label_dims.size() == 1UL,
true,
"The Input(Label) should be a 2-D tensor with last "
"dimension fixed to 1 or a 1-D tensor.");
}
if (ctx->IsRuntime() || (emission_dims[0] > 0 && label_dims[0] > 0)) { if (ctx->IsRuntime() || (emission_dims[0] > 0 && label_dims[0] > 0)) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
emission_dims[0], label_dims[0], emission_dims[0], label_dims[0],
"The height of Input(Emission) and the height of Input(Label) " "The first dimension of Input(Emission) and Input(Label) "
"should be the same."); "should be the same.");
} }
} }
......
...@@ -46,23 +46,34 @@ class CRFDecodingOpKernel : public framework::OpKernel<T> { ...@@ -46,23 +46,34 @@ class CRFDecodingOpKernel : public framework::OpKernel<T> {
const int64_t* length_data = length->data<int64_t>(); const int64_t* length_data = length->data<int64_t>();
auto in_dims = emission_weights->dims(); auto in_dims = emission_weights->dims();
auto& dev_ctx = ctx.template device_context<DeviceContext>(); Tensor emission_weights_tmp = *emission_weights;
framework::Tensor emission_weights_tmp =
ctx.AllocateTmpTensor<T, DeviceContext>(emission_weights->dims(),
dev_ctx);
emission_weights_tmp.ShareDataWith(*emission_weights);
emission_weights_tmp.Resize({in_dims[0] * in_dims[1], in_dims[2]}); emission_weights_tmp.Resize({in_dims[0] * in_dims[1], in_dims[2]});
decoded_path->Resize({in_dims[0] * in_dims[1], 1}); decoded_path->Resize({in_dims[0] * in_dims[1], 1});
for (size_t i = 0; i < seq_num; ++i) { for (size_t i = 0; i < seq_num; ++i) {
if (length_data[i] == 0) continue; if (length_data[i] == 0) continue;
int start_pos = i * in_dims[1]; int64_t start_pos = i * in_dims[1];
int end_pos = start_pos + static_cast<int>(length_data[i]); int64_t end_pos = start_pos + static_cast<int64_t>(length_data[i]);
Tensor decoded_path_one_seq = decoded_path->Slice(start_pos, end_pos); Tensor decoded_path_one_seq = decoded_path->Slice(start_pos, end_pos);
Decode(emission_weights_tmp.Slice(start_pos, end_pos), Decode(emission_weights_tmp.Slice(start_pos, end_pos),
*transition_weights, &decoded_path_one_seq); *transition_weights, &decoded_path_one_seq);
} }
decoded_path->Resize({in_dims[0], in_dims[1]}); decoded_path->Resize({in_dims[0], in_dims[1]});
if (label) {
const int64_t* label_value = label->data<int64_t>();
for (size_t i = 0; i < seq_num; ++i) {
for (int64_t j = 0; j < in_dims[1]; ++j) {
int64_t start_pos = i * in_dims[1];
if (j < length_data[i]) {
path[start_pos + j] =
label_value[start_pos + j] == path[start_pos + j] ? 1 : 0;
} else {
path[start_pos + j] = 0;
}
}
}
}
} else { } else {
PADDLE_ENFORCE_EQ(emission_weights->NumLevels(), 1UL, PADDLE_ENFORCE_EQ(emission_weights->NumLevels(), 1UL,
"The Input(Emission) should be a sequence."); "The Input(Emission) should be a sequence.");
...@@ -73,22 +84,20 @@ class CRFDecodingOpKernel : public framework::OpKernel<T> { ...@@ -73,22 +84,20 @@ class CRFDecodingOpKernel : public framework::OpKernel<T> {
for (size_t i = 0; i < seq_num; ++i) { for (size_t i = 0; i < seq_num; ++i) {
if (lod[level][i] == lod[level][i + 1]) continue; if (lod[level][i] == lod[level][i + 1]) continue;
int start_pos = static_cast<int>(lod[level][i]); int64_t start_pos = static_cast<int64_t>(lod[level][i]);
int end_pos = static_cast<int>(lod[level][i + 1]); int64_t end_pos = static_cast<int64_t>(lod[level][i + 1]);
Tensor decoded_path_one_seq = decoded_path->Slice(start_pos, end_pos); Tensor decoded_path_one_seq = decoded_path->Slice(start_pos, end_pos);
Decode(emission_weights->Slice(start_pos, end_pos), *transition_weights, Decode(emission_weights->Slice(start_pos, end_pos), *transition_weights,
&decoded_path_one_seq); &decoded_path_one_seq);
} }
} if (label) {
if (label) {
if (!has_length) {
PADDLE_ENFORCE_EQ(label->NumLevels(), 1UL, PADDLE_ENFORCE_EQ(label->NumLevels(), 1UL,
"The Input(Label) should be a sequence."); "The Input(Label) should be a sequence.");
} const int64_t* label_value = label->data<int64_t>();
const int64_t* label_value = label->data<int64_t>(); size_t numel = label->numel();
size_t numel = label->numel(); for (size_t i = 0; i < numel; ++i) {
for (size_t i = 0; i < numel; ++i) { path[i] = label_value[i] == path[i] ? 1 : 0;
path[i] = label_value[i] == path[i] ? 1 : 0; }
} }
} }
} }
......
...@@ -22,13 +22,14 @@ namespace operators { ...@@ -22,13 +22,14 @@ namespace operators {
class LinearChainCRFOpMaker : public framework::OpProtoAndCheckerMaker { class LinearChainCRFOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
void Make() override { void Make() override {
AddInput("Emission", AddInput(
"(LoDTensor/Tensor<float>). When a LoDTensor input,A 2-D LoDTensor" "Emission",
" with shape [N x D], where N is the size of the " "(LoDTensor/Tensor<float>). When a LoDTensor input, A 2-D LoDTensor"
"mini-batch and D is the total tag number. The unscaled emission " " with shape [N x D], where N is the size of the "
"weight matrix for the linear chain CRF. When a Tensor input," "mini-batch and D is the total tag number. The unscaled emission "
"A Tensor with shape [N x S x D], where N is batch number," "weight matrix for the linear chain CRF. When a Tensor input,"
"S is max length of sequences, D is the total tag number."); "A Tensor with shape [N x S x D], where N is batch size,"
"S is max length of sequences, D is the total tag number.");
AddInput("Transition", AddInput("Transition",
"(Tensor, default Tensor<float>) A 2-D Tensor with shape " "(Tensor, default Tensor<float>) A 2-D Tensor with shape "
"[(D + 2) x D]. The learnable parameter for the linear_chain_crf " "[(D + 2) x D]. The learnable parameter for the linear_chain_crf "
...@@ -38,7 +39,7 @@ class LinearChainCRFOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -38,7 +39,7 @@ class LinearChainCRFOpMaker : public framework::OpProtoAndCheckerMaker {
"[N x 1], where N is the total element number in a mini-batch. " "[N x 1], where N is the total element number in a mini-batch. "
"when a Tensor input, [N x S], where N is batch number. " "when a Tensor input, [N x S], where N is batch number. "
"S is max length of sequences. The ground truth."); "S is max length of sequences. The ground truth.");
AddInput("length", AddInput("Length",
"(Tensor, default Tensor<int64_t>) A Tensor with shape " "(Tensor, default Tensor<int64_t>) A Tensor with shape "
"[M x 1], where M is the sequence number in a mini-batch.") "[M x 1], where M is the sequence number in a mini-batch.")
.AsDispensable(); .AsDispensable();
...@@ -169,12 +170,16 @@ class LinearChainCRFOp : public framework::OperatorWithKernel { ...@@ -169,12 +170,16 @@ class LinearChainCRFOp : public framework::OperatorWithKernel {
auto emission_dims = ctx->GetInputDim("Emission"); auto emission_dims = ctx->GetInputDim("Emission");
PADDLE_ENFORCE_NE(emission_dims[0], 0, PADDLE_ENFORCE_NE(emission_dims[0], 0,
"An empty mini-batch is not allowed."); "An empty mini-batch is not allowed.");
if (ctx->HasInput("length")) { if (ctx->HasInput("Length")) {
PADDLE_ENFORCE_EQ(emission_dims.size(), 3, PADDLE_ENFORCE_EQ(emission_dims.size(), 3,
"The Input(Emission) should be a 3-D tensor."); "The Input(Emission) should be a 3-D tensor.");
auto label_dims = ctx->GetInputDim("Label"); auto label_dims = ctx->GetInputDim("Label");
PADDLE_ENFORCE_EQ(label_dims.size(), 3, PADDLE_ENFORCE_EQ(
"The Input(Label) should be a 3-D tensor"); (label_dims.size() == 3UL && label_dims[2] == 1) ||
(label_dims.size() == 2UL),
true,
"The Input(Label) should be a 3-D tensor with last "
"dimension fixed to 1 or a 2-D tensor in padding mode.");
PADDLE_INFERSHAPE_ENFORCE_EQ( PADDLE_INFERSHAPE_ENFORCE_EQ(
ctx, emission_dims[0], label_dims[0], ctx, emission_dims[0], label_dims[0],
"The batch size of Input(Emission) and Input(Label) " "The batch size of Input(Emission) and Input(Label) "
...@@ -249,7 +254,7 @@ class LinearChainCRFGradOp : public framework::OperatorWithKernel { ...@@ -249,7 +254,7 @@ class LinearChainCRFGradOp : public framework::OperatorWithKernel {
auto emission_exps_dims = ctx->GetInputDim("EmissionExps"); auto emission_exps_dims = ctx->GetInputDim("EmissionExps");
auto label_dims = ctx->GetInputDim("Label"); auto label_dims = ctx->GetInputDim("Label");
if (ctx->HasInput("length")) { if (ctx->HasInput("Length")) {
PADDLE_ENFORCE_EQ(emission_exps_dims.size(), 3, PADDLE_ENFORCE_EQ(emission_exps_dims.size(), 3,
"The Input(EmissionExps) should be a 3-D tensor."); "The Input(EmissionExps) should be a 3-D tensor.");
PADDLE_INFERSHAPE_ENFORCE_EQ( PADDLE_INFERSHAPE_ENFORCE_EQ(
...@@ -281,7 +286,7 @@ class LinearChainCRFGradOp : public framework::OperatorWithKernel { ...@@ -281,7 +286,7 @@ class LinearChainCRFGradOp : public framework::OperatorWithKernel {
if (ctx->HasOutput(framework::GradVarName("Emission"))) { if (ctx->HasOutput(framework::GradVarName("Emission"))) {
ctx->SetOutputDim(framework::GradVarName("Emission"), emission_exps_dims); ctx->SetOutputDim(framework::GradVarName("Emission"), emission_exps_dims);
if (ctx->HasInput("length") == false) { if (ctx->HasInput("Length") == false) {
ctx->ShareLoD("Emission", framework::GradVarName("Emission")); ctx->ShareLoD("Emission", framework::GradVarName("Emission"));
} }
} }
...@@ -320,8 +325,8 @@ class LinearChainCRFGradDescMaker : public framework::SingleGradOpDescMaker { ...@@ -320,8 +325,8 @@ class LinearChainCRFGradDescMaker : public framework::SingleGradOpDescMaker {
op->SetInput("Alpha", Output("Alpha")); op->SetInput("Alpha", Output("Alpha"));
op->SetInput("EmissionExps", Output("EmissionExps")); op->SetInput("EmissionExps", Output("EmissionExps"));
op->SetInput("TransitionExps", Output("TransitionExps")); op->SetInput("TransitionExps", Output("TransitionExps"));
if (ForwardOp().Inputs().count("length") > 0) { if (ForwardOp().Inputs().count("Length") > 0) {
op->SetInput("length", Input("length")); op->SetInput("Length", Input("Length"));
} }
op->SetInput(framework::GradVarName("LogLikelihood"), op->SetInput(framework::GradVarName("LogLikelihood"),
OutputGrad("LogLikelihood")); OutputGrad("LogLikelihood"));
......
...@@ -65,62 +65,51 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> { ...@@ -65,62 +65,51 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> {
// Because the computation codes only runs on CPU, here the memory for all // Because the computation codes only runs on CPU, here the memory for all
// the outputs is FIXED to be allocated on the CPU memory. // the outputs is FIXED to be allocated on the CPU memory.
auto* emission_exps_data = emission_exps->mutable_data<T>(platform::CPUPlace());
emission_exps->mutable_data<T>(platform::CPUPlace()); alpha->mutable_data<T>(platform::CPUPlace());
auto* alpha_data = alpha->mutable_data<T>(platform::CPUPlace());
transition_exps->mutable_data<T>(platform::CPUPlace()); transition_exps->mutable_data<T>(platform::CPUPlace());
// Resize the output tensor to its correct dimension.
memset(emission_exps_data, 0, emission_exps->numel() * sizeof(T));
memset(alpha_data, 0, alpha->numel() * sizeof(T));
auto emission_dims = emission_weights->dims(); auto emission_dims = emission_weights->dims();
const Tensor* label = ctx.Input<framework::Tensor>("Label"); const Tensor* label = ctx.Input<framework::Tensor>("Label");
auto& dev_ctx = ctx.template device_context<DeviceContext>(); Tensor emission_weights_tmp = *emission_weights;
Tensor emission_weights_tmp = ctx.AllocateTmpTensor<T, DeviceContext>( Tensor label_tmp = *label;
emission_weights->dims(), dev_ctx); Tensor emission_exps_tmp = *emission_exps;
emission_weights_tmp.ShareDataWith(*emission_weights); Tensor alpha_tmp = *alpha;
Tensor label_tmp = int64_t seq_num = 0;
ctx.AllocateTmpTensor<T, DeviceContext>(label->dims(), dev_ctx); int64_t batch_size;
label_tmp.ShareDataWith(*label); int64_t tag_num;
Tensor emission_exps_tmp =
ctx.AllocateTmpTensor<T, DeviceContext>(emission_exps->dims(), dev_ctx);
emission_exps_tmp.ShareDataWith(*emission_exps);
Tensor alpha_tmp =
ctx.AllocateTmpTensor<T, DeviceContext>(alpha->dims(), dev_ctx);
alpha_tmp.ShareDataWith(*alpha);
size_t seq_num = 0;
size_t batch_size;
size_t tag_num;
const int64_t* length_data = nullptr; const int64_t* length_data = nullptr;
framework::Vector<size_t> in_lod; framework::LoD in_lod;
if (ctx.HasInput("length")) { if (ctx.HasInput("Length")) {
const Tensor* label_length = ctx.Input<framework::Tensor>("length"); const Tensor* label_length = ctx.Input<framework::Tensor>("Length");
length_data = label_length->data<int64_t>(); length_data = label_length->data<int64_t>();
seq_num = label_length->numel(); seq_num = label_length->numel();
batch_size = emission_dims[0] * emission_dims[1];
tag_num = emission_dims[2];
emission_weights_tmp.Resize(
{emission_dims[0] * emission_dims[1], emission_dims[2]});
auto label_dims = label->dims();
label_tmp.Resize({label_dims[0] * label_dims[1], label_dims[2]});
alpha_tmp.Resize({emission_dims[0] * emission_dims[1], emission_dims[2]});
emission_exps_tmp.Resize(
{emission_dims[0] * emission_dims[1], emission_dims[2]});
PADDLE_ENFORCE_EQ(seq_num, emission_dims[0], PADDLE_ENFORCE_EQ(seq_num, emission_dims[0],
"the size of Input(length) must be equal to " "the size of Input(length) must be equal to "
"emission_dims[0]."); "emission_dims[0].");
auto label_dims = label->dims();
PADDLE_ENFORCE_EQ(seq_num, label_dims[0], PADDLE_ENFORCE_EQ(seq_num, label_dims[0],
"the size of Input(length) must be equal to " "the size of Input(length) must be equal to "
"label_dims[0]."); "label_dims[0].");
batch_size = emission_dims[0] * emission_dims[1];
tag_num = emission_dims[2];
emission_weights_tmp.Resize({batch_size, tag_num});
label_tmp.Resize({batch_size, 1});
alpha_tmp.Resize({batch_size, tag_num});
emission_exps_tmp.Resize({batch_size, tag_num});
math::set_constant(ctx.device_context(), emission_exps, 0.0);
math::set_constant(ctx.device_context(), alpha, 0.0);
} else { } else {
seq_num = ctx.Input<LoDTensor>("Label")->lod()[0].size() - 1; in_lod = ctx.Input<LoDTensor>("Label")->lod();
PADDLE_ENFORCE_NE(in_lod.size(), 0, "Input(Label) must be a sequence.");
seq_num = in_lod[0].size() - 1;
batch_size = emission_dims[0]; batch_size = emission_dims[0];
tag_num = emission_dims[1]; tag_num = emission_dims[1];
in_lod = ctx.Input<LoDTensor>("Label")->lod()[0];
PADDLE_ENFORCE_NE(in_lod.size(), 0, "Input(Label) must be a sequence.");
} }
ll->Resize({static_cast<int>(seq_num), 1}); // Resize the output tensor to its correct dimension.
ll->Resize({seq_num, 1});
ll->mutable_data<T>(platform::CPUPlace()); ll->mutable_data<T>(platform::CPUPlace());
// Now, all the inputs and outputs should be on the CPU memory. // Now, all the inputs and outputs should be on the CPU memory.
Tensor emission_row_max; Tensor emission_row_max;
...@@ -141,16 +130,15 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> { ...@@ -141,16 +130,15 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> {
auto w_exps = EigenMatrix<T>::From(*transition_exps); auto w_exps = EigenMatrix<T>::From(*transition_exps);
w_exps.device(place) = w.exp(); w_exps.device(place) = w.exp();
T* log_likelihood = ll->data<T>(); T* log_likelihood = ll->data<T>();
for (size_t i = 0; i < seq_num; ++i) { for (int64_t i = 0; i < seq_num; ++i) {
int start_pos = 0; int64_t start_pos = 0;
int end_pos = 0; int64_t end_pos = 0;
if (ctx.HasInput("length")) { if (ctx.HasInput("Length")) {
if (length_data[i] == 0) continue;
start_pos = i * emission_dims[1]; start_pos = i * emission_dims[1];
end_pos = start_pos + static_cast<int>(length_data[i]); end_pos = start_pos + length_data[i];
} else { } else {
start_pos = static_cast<int>(in_lod[i]); start_pos = static_cast<int64_t>(in_lod[0][i]);
end_pos = static_cast<int>(in_lod[i + 1]); end_pos = static_cast<int64_t>(in_lod[0][i + 1]);
} }
if (end_pos == start_pos) { if (end_pos == start_pos) {
// If an empty input sequence is given, pad 0 for its cost. // If an empty input sequence is given, pad 0 for its cost.
...@@ -239,44 +227,35 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel<T> { ...@@ -239,44 +227,35 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel<T> {
const Tensor* alpha = ctx.Input<Tensor>("Alpha"); const Tensor* alpha = ctx.Input<Tensor>("Alpha");
const T* ll_grad = const T* ll_grad =
ctx.Input<Tensor>(framework::GradVarName("LogLikelihood"))->data<T>(); ctx.Input<Tensor>(framework::GradVarName("LogLikelihood"))->data<T>();
auto& dev_ctx = ctx.template device_context<DeviceContext>();
Tensor* emission_grad = Tensor* emission_grad =
ctx.Output<Tensor>(framework::GradVarName("Emission")); ctx.Output<Tensor>(framework::GradVarName("Emission"));
auto* emission_grad_data = auto* emission_grad_data =
emission_grad->mutable_data<T>(platform::CPUPlace()); emission_grad->mutable_data<T>(platform::CPUPlace());
memset(emission_grad_data, 0, emission_grad->numel() * sizeof(T)); memset(emission_grad_data, 0, emission_grad->numel() * sizeof(T));
Tensor alpha_tmp = Tensor alpha_tmp = *alpha;
ctx.AllocateTmpTensor<T, DeviceContext>(alpha->dims(), dev_ctx); Tensor label_tmp = *label;
alpha_tmp.ShareDataWith(*alpha); Tensor emission_exps_tmp = *emission_exps;
Tensor label_tmp = Tensor emission_grad_tmp = *emission_grad;
ctx.AllocateTmpTensor<T, DeviceContext>(label->dims(), dev_ctx);
label_tmp.ShareDataWith(*label);
Tensor emission_exps_tmp =
ctx.AllocateTmpTensor<T, DeviceContext>(emission_exps->dims(), dev_ctx);
emission_exps_tmp.ShareDataWith(*emission_exps);
Tensor emission_grad_tmp =
ctx.AllocateTmpTensor<T, DeviceContext>(emission_grad->dims(), dev_ctx);
emission_grad_tmp.ShareDataWith(*emission_grad);
// getting seq_num using padding or not // getting seq_num using padding or not
size_t seq_num = 0; int64_t seq_num = 0;
framework::Vector<size_t> lod; framework::LoD in_lod;
const int64_t* length_data = nullptr; const int64_t* length_data = nullptr;
if (ctx.HasInput("length")) { if (ctx.HasInput("Length")) {
const Tensor* label_length = ctx.Input<framework::Tensor>("length"); const Tensor* label_length = ctx.Input<framework::Tensor>("Length");
length_data = label_length->data<int64_t>(); length_data = label_length->data<int64_t>();
seq_num = label_length->numel(); seq_num = label_length->numel();
auto emission_dims = emission_grad->dims(); auto emission_dims = emission_grad->dims();
auto label_dims = label->dims(); auto label_dims = label->dims();
emission_grad_tmp.Resize( emission_grad_tmp.Resize(
{emission_dims[0] * emission_dims[1], emission_dims[2]}); {emission_dims[0] * emission_dims[1], emission_dims[2]});
label_tmp.Resize({label_dims[0] * label_dims[1], label_dims[2]}); label_tmp.Resize({label_dims[0] * label_dims[1], 1});
alpha_tmp.Resize({emission_dims[0] * emission_dims[1], emission_dims[2]}); alpha_tmp.Resize({emission_dims[0] * emission_dims[1], emission_dims[2]});
emission_exps_tmp.Resize( emission_exps_tmp.Resize(
{emission_dims[0] * emission_dims[1], emission_dims[2]}); {emission_dims[0] * emission_dims[1], emission_dims[2]});
} else { } else {
seq_num = ctx.Input<LoDTensor>("Label")->lod()[0].size() - 1; in_lod = ctx.Input<LoDTensor>("Label")->lod();
lod = ctx.Input<LoDTensor>("Label")->lod()[0]; PADDLE_ENFORCE_NE(in_lod.size(), 0, "Input(Label) must be a sequence.");
PADDLE_ENFORCE_NE(lod.size(), 0, "Input(Label) must be a sequence."); seq_num = static_cast<int64_t>(in_lod[0].size() - 1);
} }
Tensor* transition_grad = Tensor* transition_grad =
...@@ -295,21 +274,24 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel<T> { ...@@ -295,21 +274,24 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel<T> {
// captures the unnormalized probabilities of partial sequences starting // captures the unnormalized probabilities of partial sequences starting
// at position i. // at position i.
Tensor beta; Tensor beta;
auto* beta_data = beta.mutable_data<T>(emission_dims, platform::CPUPlace()); beta.mutable_data<T>(emission_dims, platform::CPUPlace());
memset(beta_data, 0, beta.numel() * sizeof(T)); if (ctx.HasInput("Length")) {
if (ctx.HasInput("length")) {
beta.Resize({emission_dims[0] * emission_dims[1], emission_dims[2]}); beta.Resize({emission_dims[0] * emission_dims[1], emission_dims[2]});
} }
for (size_t i = 0; i < seq_num; ++i) {
int start_pos = 0; for (int64_t i = 0; i < seq_num; ++i) {
int end_pos = 0; int64_t start_pos = 0;
if (ctx.HasInput("length")) { int64_t end_pos = 0;
if (length_data[i] == 0) continue; if (ctx.HasInput("Length")) {
start_pos = i * emission_dims[1]; start_pos = i * emission_dims[1];
end_pos = start_pos + static_cast<int>(length_data[i]); end_pos = start_pos + length_data[i];
} else { } else {
start_pos = static_cast<int>(lod[i]); start_pos = static_cast<int64_t>(in_lod[0][i]);
end_pos = static_cast<int>(lod[i + 1]); end_pos = static_cast<int64_t>(in_lod[0][i + 1]);
}
if (end_pos == start_pos) {
continue;
} }
const Tensor one_seq_emission_exps = const Tensor one_seq_emission_exps =
emission_exps_tmp.Slice(start_pos, end_pos); emission_exps_tmp.Slice(start_pos, end_pos);
......
...@@ -1491,7 +1491,7 @@ def linear_chain_crf(input, label, param_attr=None, length=None): ...@@ -1491,7 +1491,7 @@ def linear_chain_crf(input, label, param_attr=None, length=None):
print(transition) print(transition)
""" """
helper = LayerHelper('linear_chain_crf', **locals()) helper = LayerHelper('linear_chain_crf', **locals())
size = input.shape[1] size = input.shape[2] if length else input.shape[1]
transition = helper.create_parameter( transition = helper.create_parameter(
attr=helper.param_attr, attr=helper.param_attr,
shape=[size + 2, size], shape=[size + 2, size],
...@@ -1510,7 +1510,7 @@ def linear_chain_crf(input, label, param_attr=None, length=None): ...@@ -1510,7 +1510,7 @@ def linear_chain_crf(input, label, param_attr=None, length=None):
"Label": [label] "Label": [label]
} }
if length: if length:
this_inputs['length'] = [length] this_inputs['Length'] = [length]
helper.append_op( helper.append_op(
type='linear_chain_crf', type='linear_chain_crf',
inputs=this_inputs, inputs=this_inputs,
...@@ -1525,7 +1525,7 @@ def linear_chain_crf(input, label, param_attr=None, length=None): ...@@ -1525,7 +1525,7 @@ def linear_chain_crf(input, label, param_attr=None, length=None):
@templatedoc() @templatedoc()
def crf_decoding(input, param_attr, label=None): def crf_decoding(input, param_attr, label=None, length=None):
""" """
${comment} ${comment}
...@@ -1535,6 +1535,8 @@ def crf_decoding(input, param_attr, label=None): ...@@ -1535,6 +1535,8 @@ def crf_decoding(input, param_attr, label=None):
param_attr(ParamAttr): The parameter attribute for training. param_attr(ParamAttr): The parameter attribute for training.
label(${label_type}): ${label_comment} label(${label_type}): ${label_comment}
label(${length_type}): ${length_comment}
Returns: Returns:
Variable: ${viterbi_path_comment} Variable: ${viterbi_path_comment}
...@@ -1543,23 +1545,41 @@ def crf_decoding(input, param_attr, label=None): ...@@ -1543,23 +1545,41 @@ def crf_decoding(input, param_attr, label=None):
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle.fluid as fluid
images = fluid.layers.data(name='pixel', shape=[784], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int32') # LoDTensor-based example
hidden = fluid.layers.fc(input=images, size=2) num_labels = 10
crf = fluid.layers.linear_chain_crf(input=hidden, label=label, feature = fluid.layers.data(name='word_emb', shape=[784], dtype='float32', lod_level=1)
label = fluid.layers.data(name='label', shape=[1], dtype='int64', lod_level=1)
emission = fluid.layers.fc(input=feature, size=num_labels)
crf_cost = fluid.layers.linear_chain_crf(input=emission, label=label,
param_attr=fluid.ParamAttr(name="crfw")) param_attr=fluid.ParamAttr(name="crfw"))
crf_decode = fluid.layers.crf_decoding(input=hidden, crf_decode = fluid.layers.crf_decoding(input=emission,
param_attr=fluid.ParamAttr(name="crfw")) param_attr=fluid.ParamAttr(name="crfw"))
# Common tensor example
num_labels, max_len = 10, 20
feature = fluid.layers.data(name='word_emb_pad', shape=[max_len, 784], dtype='float32')
label = fluid.layers.data(name='label_pad', shape=[max_len, 1], dtype='int64')
length = fluid.layers.data(name='length', shape=[1], dtype='int64')
emission = fluid.layers.fc(input=feature, size=num_labels,
num_flatten_dims=2)
crf_cost = fluid.layers.linear_chain_crf(input=emission, label=label, length=length,
param_attr=fluid.ParamAttr(name="crfw_pad"))
crf_decode = fluid.layers.crf_decoding(input=emission, length=length,
param_attr=fluid.ParamAttr(name="crfw_pad"))
""" """
helper = LayerHelper('crf_decoding', **locals()) helper = LayerHelper('crf_decoding', **locals())
transition = helper.get_parameter(param_attr.name) transition = helper.get_parameter(param_attr.name)
viterbi_path = helper.create_variable_for_type_inference( viterbi_path = helper.create_variable_for_type_inference(
dtype=helper.input_dtype()) dtype=helper.input_dtype())
inputs = {"Emission": [input], "Transition": transition, "Label": label}
if length:
inputs['Length'] = length
helper.append_op( helper.append_op(
type='crf_decoding', type='crf_decoding',
inputs={"Emission": [input], inputs=inputs,
"Transition": transition,
"Label": label},
outputs={"ViterbiPath": [viterbi_path]}) outputs={"ViterbiPath": [viterbi_path]})
return viterbi_path return viterbi_path
......
...@@ -176,22 +176,23 @@ class TestCRFDecodingOp4(TestCRFDecodingOp2): ...@@ -176,22 +176,23 @@ class TestCRFDecodingOp4(TestCRFDecodingOp2):
self.lod = [[0, 2, 3, 0]] self.lod = [[0, 2, 3, 0]]
def seq_pad(data, length):
max_len = np.max(length)
shape = [len(length), max_len] + list(data.shape[1:])
padded = np.zeros(shape).astype(data.dtype)
offset = 0
for i, l in enumerate(length):
padded[i, 0:l] = data[offset:offset + l]
offset += l
return np.squeeze(padded)
class TestCRFDecodingOp5(OpTest): class TestCRFDecodingOp5(OpTest):
""" """
Compare the dynamic program with random generated parameters and inputs Compare the dynamic program with random generated parameters and inputs
with grouth truth not being given. with grouth truth not being given.
""" """
def seq_pad(self, data, length):
max_len = np.max(length)
shape = [len(length), max_len] + list(data.shape[1:])
padded = np.zeros(shape).astype(data.dtype)
offset = 0
for i, l in enumerate(length):
padded[i, 0:l] = data[offset:offset + l]
offset += l
return np.squeeze(padded)
def set_test_data(self): def set_test_data(self):
SEQ_NUM = 3 SEQ_NUM = 3
TAG_NUM = 17 TAG_NUM = 17
...@@ -208,7 +209,7 @@ class TestCRFDecodingOp5(OpTest): ...@@ -208,7 +209,7 @@ class TestCRFDecodingOp5(OpTest):
[TAG_NUM + 2, TAG_NUM]).astype("float64") [TAG_NUM + 2, TAG_NUM]).astype("float64")
self.inputs = { self.inputs = {
"Emission": self.seq_pad(emission, lod[0]), "Emission": seq_pad(emission, lod[0]),
"Transition": transition, "Transition": transition,
"Length": np.array(lod).astype('int64'), "Length": np.array(lod).astype('int64'),
} }
...@@ -216,7 +217,7 @@ class TestCRFDecodingOp5(OpTest): ...@@ -216,7 +217,7 @@ class TestCRFDecodingOp5(OpTest):
decoder = CRFDecoding(emission, transition, lod[0]) decoder = CRFDecoding(emission, transition, lod[0])
decoded_path = decoder.decode() decoded_path = decoder.decode()
self.outputs = {"ViterbiPath": self.seq_pad(decoded_path, lod[0])} self.outputs = {"ViterbiPath": seq_pad(decoded_path, lod[0])}
def setUp(self): def setUp(self):
self.op_type = "crf_decoding" self.op_type = "crf_decoding"
...@@ -226,5 +227,45 @@ class TestCRFDecodingOp5(OpTest): ...@@ -226,5 +227,45 @@ class TestCRFDecodingOp5(OpTest):
self.check_output() self.check_output()
class TestCRFDecodingOp6(OpTest):
def init_lod(self):
self.lod = [[1, 2, 3, 4]]
def setUp(self):
self.op_type = "crf_decoding"
TAG_NUM = 5
self.init_lod()
total_len = sum(self.lod[-1])
transition = np.repeat(
np.arange(
TAG_NUM, dtype="float64").reshape(1, TAG_NUM),
TAG_NUM + 2,
axis=0)
emission = np.repeat(
np.arange(
TAG_NUM, dtype="float64").reshape(1, TAG_NUM),
total_len,
axis=0)
labels = np.random.randint(
low=0, high=TAG_NUM, size=(total_len, 1), dtype="int64")
predicted_labels = np.ones(
(total_len, 1), dtype="int64") * (TAG_NUM - 1)
expected_output = (labels == predicted_labels).astype("int64")
self.inputs = {
"Emission": seq_pad(emission, self.lod[0]),
"Transition": transition,
"Label": seq_pad(labels, self.lod[0]),
"Length": np.array(self.lod).astype('int64'),
}
self.outputs = {"ViterbiPath": seq_pad(expected_output, self.lod[0])}
def test_check_output(self):
self.check_output()
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -2556,21 +2556,46 @@ class TestBook(LayerTest): ...@@ -2556,21 +2556,46 @@ class TestBook(LayerTest):
input=fc_out, size=4 * hidden_dim, proj_size=proj_dim)) input=fc_out, size=4 * hidden_dim, proj_size=proj_dim))
def test_linear_chain_crf(self): def test_linear_chain_crf(self):
# TODO(minqiyang): dygraph do not support lod now
with self.static_graph(): with self.static_graph():
label_dict_len = 10 label_dict_len = 10
images = layers.data(name='pixel', shape=[784], dtype='float32') feature = layers.data(name='feature', shape=[784], dtype='float32')
label = layers.data(name='label', shape=[1], dtype='int32') label = layers.data(name='label', shape=[1], dtype='int64')
hidden = layers.fc(input=images, size=2) emission = layers.fc(input=feature, size=10)
crf = layers.linear_chain_crf( crf = layers.linear_chain_crf(
input=hidden, label=label, param_attr=ParamAttr(name="crfw")) input=emission, label=label, param_attr=ParamAttr(name="crfw"))
crf_decode = layers.crf_decoding(
input=emission, param_attr=ParamAttr(name="crfw"))
self.assertFalse(crf is None)
self.assertFalse(crf_decode is None)
return layers.chunk_eval(
input=crf_decode,
label=label,
chunk_scheme="IOB",
num_chunk_types=(label_dict_len - 1) // 2)
def test_linear_chain_crf_padding(self):
with self.static_graph():
label_dict_len, max_len = 10, 20
feature = layers.data(
name='feature', shape=[max_len, 784], dtype='float32')
label = layers.data(name='label', shape=[max_len], dtype='int64')
length = layers.data(name='length', shape=[1], dtype='int64')
emission = layers.fc(input=feature, size=10, num_flatten_dims=2)
crf = layers.linear_chain_crf(
input=emission,
label=label,
length=length,
param_attr=ParamAttr(name="crfw"))
crf_decode = layers.crf_decoding( crf_decode = layers.crf_decoding(
input=hidden, param_attr=ParamAttr(name="crfw")) input=emission,
length=length,
param_attr=ParamAttr(name="crfw"))
self.assertFalse(crf is None) self.assertFalse(crf is None)
self.assertFalse(crf_decode is None) self.assertFalse(crf_decode is None)
return layers.chunk_eval( return layers.chunk_eval(
input=crf_decode, input=crf_decode,
label=label, label=label,
seq_length=length,
chunk_scheme="IOB", chunk_scheme="IOB",
num_chunk_types=(label_dict_len - 1) // 2) num_chunk_types=(label_dict_len - 1) // 2)
......
...@@ -205,7 +205,7 @@ class TestLinearChainCrfPaddingTensor(OpTest): ...@@ -205,7 +205,7 @@ class TestLinearChainCrfPaddingTensor(OpTest):
"Emission": self.seq_pad(emission, lod[0]), "Emission": self.seq_pad(emission, lod[0]),
"Transition": transition, "Transition": transition,
"Label": self.seq_pad(labels, lod[0]), "Label": self.seq_pad(labels, lod[0]),
"length": np.array(lod).astype("int64") "Length": np.array(lod).astype("int64")
} }
crf = LinearChainCrfForward(seq_start_pos, emission, emission_row_max, crf = LinearChainCrfForward(seq_start_pos, emission, emission_row_max,
emission_exps, transition, transition_exps, emission_exps, transition, transition_exps,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册