From 45c8a88a3e0ff4ca0f5440102103a5423432969e Mon Sep 17 00:00:00 2001 From: Qiao Longfei Date: Tue, 5 Dec 2017 16:08:32 +0800 Subject: [PATCH] add crf_decoding layer (#6274) * add crf_decoding layer * fix some typo * fix test_crf_decoding_op --- paddle/operators/crf_decoding_op.cc | 17 +++++++++-------- paddle/operators/crf_decoding_op.h | 10 +++++----- python/paddle/v2/fluid/framework.py | 2 +- python/paddle/v2/fluid/layer_helper.py | 8 +++++++- python/paddle/v2/fluid/layers.py | 18 ++++++++++++++++++ .../tests/book/test_label_semantic_roles.py | 12 +++++++++--- .../v2/fluid/tests/test_crf_decoding_op.py | 12 ++++++------ python/paddle/v2/fluid/tests/test_layers.py | 7 ++++++- 8 files changed, 61 insertions(+), 25 deletions(-) diff --git a/paddle/operators/crf_decoding_op.cc b/paddle/operators/crf_decoding_op.cc index f418f489c0..291b23ed1b 100644 --- a/paddle/operators/crf_decoding_op.cc +++ b/paddle/operators/crf_decoding_op.cc @@ -36,17 +36,18 @@ class CRFDecodingOpMaker : public framework::OpProtoAndCheckerMaker { "w. See more details in comments of the linear_chain_crf operator."); AddInput( "Label", - "(LoDTensor, LoDTensor). The ground truth with shape " + "(LoDTensor, LoDTensor). The ground truth with shape " "[N x 1]. This input is optional. See more details in the operator's " "comments.") .AsDispensable(); - AddOutput("ViterbiPath", - "(LoDTensor, LoDTensor). The decoding results. What to " - "return changes depending on whether the Input(Label) (the groud " - "truth) is given. See more details in the operator's comment."); + AddOutput( + "ViterbiPath", + "(LoDTensor, LoDTensor). The decoding results. What to " + "return changes depending on whether the Input(Label) (the ground " + "truth) is given. See more details in the operator's comment."); AddComment(R"DOC( The crf_decoding operator reads the emission feature weights and the transition -freature weights learned by the linear_chain_crf operator. It implements the +feature weights learned by the linear_chain_crf operator. It implements the Viterbi algorithm which is a dynamic programming algorithm for finding the most likely sequence of hidden states, called the Viterbi path, that results in a sequence of observed tags. @@ -60,14 +61,14 @@ operator. When Input(Label) is given, the crf_decoding operator returns a row vector with shape [N x 1] whose values are fixed to be 0, indicating an incorrect -prediction, or 1 indicating a tag is correctly predicted. Such an ouput is the +prediction, or 1 indicating a tag is correctly predicted. Such an output is the input to chunk_eval operator. 2. Input(Label) is not given: This is the standard decoding process. -The crf_decoding operator returns a row vecotr with shape [N x 1] whose values +The crf_decoding operator returns a row vector with shape [N x 1] whose values range from 0 to maximum tag number - 1. Each element indicates an index of a predicted tag. )DOC"); diff --git a/paddle/operators/crf_decoding_op.h b/paddle/operators/crf_decoding_op.h index 526e0c5dcb..57b5e21b3a 100644 --- a/paddle/operators/crf_decoding_op.h +++ b/paddle/operators/crf_decoding_op.h @@ -43,9 +43,9 @@ class CRFDecodingOpKernel : public framework::OpKernel { const size_t level = 0; const size_t seq_num = lod[level].size() - 1; - int* path = decoded_path->mutable_data(platform::CPUPlace()); - math::SetConstant()(ctx.device_context(), - decoded_path, 0); + int64_t* path = decoded_path->mutable_data(platform::CPUPlace()); + math::SetConstant()(ctx.device_context(), + decoded_path, 0); for (size_t i = 0; i < seq_num; ++i) { int start_pos = static_cast(lod[level][i]); int end_pos = static_cast(lod[level][i + 1]); @@ -57,7 +57,7 @@ class CRFDecodingOpKernel : public framework::OpKernel { if (label) { PADDLE_ENFORCE_EQ(label->NumLevels(), 1UL, "The Input(Label) should be a sequence."); - const int* label_value = label->data(); + const int64_t* label_value = label->data(); size_t batch_size = emission_weights->dims()[0]; for (size_t i = 0; i < batch_size; ++i) { path[i] = label_value[i] == path[i] ? 1 : 0; @@ -76,7 +76,7 @@ class CRFDecodingOpKernel : public framework::OpKernel { const T* x = emission_weights.data(); const T* w = transition_weights.data(); - int* path = decoded_path->data(); + int64_t* path = decoded_path->data(); // alpha is a memo table. An element alpha(k, v) records the score of the // best sequence of tags from position 1 to position k with v being the end diff --git a/python/paddle/v2/fluid/framework.py b/python/paddle/v2/fluid/framework.py index 49c6d89834..cd8bbe0836 100644 --- a/python/paddle/v2/fluid/framework.py +++ b/python/paddle/v2/fluid/framework.py @@ -237,7 +237,7 @@ class Operator(object): def find_name(var_list, name): for var_name in var_list: - if var_name == name: + if var_list[var_name] is not None and var_name == name: return True return False diff --git a/python/paddle/v2/fluid/layer_helper.py b/python/paddle/v2/fluid/layer_helper.py index 5b384e5cf5..cbee3fe637 100644 --- a/python/paddle/v2/fluid/layer_helper.py +++ b/python/paddle/v2/fluid/layer_helper.py @@ -1,7 +1,7 @@ import copy import itertools -from framework import Variable, default_main_program, default_startup_program, \ +from framework import Variable, Parameter, default_main_program, default_startup_program, \ unique_name, dtype_is_floating from paddle.v2.fluid.initializer import Constant, Xavier from param_attr import ParamAttr @@ -122,6 +122,12 @@ class LayerHelper(object): return self.main_program.global_block().create_parameter( dtype=dtype, shape=shape, **attr.to_kwargs()) + def get_parameter(self, name): + param = self.main_program.global_block().var(name) + if not isinstance(param, Parameter): + raise ValueError("no Parameter name %s found" % name) + return param + def create_tmp_variable(self, dtype): return self.main_program.current_block().create_var( name=unique_name(".".join([self.name, 'tmp'])), diff --git a/python/paddle/v2/fluid/layers.py b/python/paddle/v2/fluid/layers.py index 99d0ac4a1b..fc7b687263 100644 --- a/python/paddle/v2/fluid/layers.py +++ b/python/paddle/v2/fluid/layers.py @@ -477,6 +477,24 @@ def linear_chain_crf(input, return log_likelihood +def crf_decoding(input, + param_attr, + label=None, + main_program=None, + startup_program=None): + helper = LayerHelper('crf_decoding', **locals()) + transition = helper.get_parameter(param_attr.name) + viterbi_path = helper.create_tmp_variable(dtype=helper.input_dtype()) + helper.append_op( + type='crf_decoding', + inputs={"Emission": [input], + "Transition": transition, + "Label": label}, + outputs={"ViterbiPath": [viterbi_path]}) + + return viterbi_path + + def assign(input, output, main_program=None, startup_program=None): helper = LayerHelper('assign', **locals()) helper.append_op( diff --git a/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py index 0494c7cdca..0eb7cf600c 100644 --- a/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py +++ b/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py @@ -137,12 +137,19 @@ def main(): param_attr=fluid.ParamAttr( name='crfw', learning_rate=mix_hidden_lr)) avg_cost = fluid.layers.mean(x=crf_cost) + # TODO(qiao) - # 1. add crf_decode_layer and evaluator - # 2. use other optimizer and check why out will be NAN + # check other optimizers and check why out will be NAN sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.0001) sgd_optimizer.minimize(avg_cost) + # TODO(qiao) + # add dependency track and move this config before optimizer + crf_decode = fluid.layers.crf_decoding( + input=feature_out, + label=target, + param_attr=fluid.ParamAttr(name='crfw')) + train_data = paddle.batch( paddle.reader.shuffle( paddle.dataset.conll05.test(), buf_size=8192), @@ -168,7 +175,6 @@ def main(): feed=feeder.feed(data), fetch_list=[avg_cost]) avg_cost_val = np.array(outs[0]) - if batch_id % 10 == 0: print("avg_cost=" + str(avg_cost_val)) diff --git a/python/paddle/v2/fluid/tests/test_crf_decoding_op.py b/python/paddle/v2/fluid/tests/test_crf_decoding_op.py index ee2b996bf4..ab573da31d 100644 --- a/python/paddle/v2/fluid/tests/test_crf_decoding_op.py +++ b/python/paddle/v2/fluid/tests/test_crf_decoding_op.py @@ -20,14 +20,14 @@ class CRFDecoding(object): self.w = transition_weights[2:, :] self.track = np.zeros( - (seq_start_positions[-1], self.tag_num), dtype="int32") + (seq_start_positions[-1], self.tag_num), dtype="int64") self.decoded_path = np.zeros( - (seq_start_positions[-1], 1), dtype="int32") + (seq_start_positions[-1], 1), dtype="int64") def _decode_one_sequence(self, decoded_path, x): seq_len, tag_num = x.shape alpha = np.zeros((seq_len, tag_num), dtype="float64") - track = np.zeros((seq_len, tag_num), dtype="int32") + track = np.zeros((seq_len, tag_num), dtype="int64") for i in range(tag_num): alpha[0, i] = self.a[i] + x[0, i] @@ -125,10 +125,10 @@ class TestCRFDecodingOp2(OpTest): axis=0) labels = np.random.randint( - low=0, high=TAG_NUM, size=(lod[-1][-1], 1), dtype="int32") + low=0, high=TAG_NUM, size=(lod[-1][-1], 1), dtype="int64") predicted_labels = np.ones( - (lod[-1][-1], 1), dtype="int32") * (TAG_NUM - 1) - expected_output = (labels == predicted_labels).astype("int32") + (lod[-1][-1], 1), dtype="int64") * (TAG_NUM - 1) + expected_output = (labels == predicted_labels).astype("int64") self.inputs = { "Emission": (emission, lod), diff --git a/python/paddle/v2/fluid/tests/test_layers.py b/python/paddle/v2/fluid/tests/test_layers.py index a9d9d369c7..b2c31eecc1 100644 --- a/python/paddle/v2/fluid/tests/test_layers.py +++ b/python/paddle/v2/fluid/tests/test_layers.py @@ -4,6 +4,7 @@ import unittest import paddle.v2.fluid.layers as layers import paddle.v2.fluid.nets as nets from paddle.v2.fluid.framework import Program, program_guard +from paddle.v2.fluid.param_attr import ParamAttr class TestBook(unittest.TestCase): @@ -132,8 +133,12 @@ class TestBook(unittest.TestCase): images = layers.data(name='pixel', shape=[784], dtype='float32') label = layers.data(name='label', shape=[1], dtype='int32') hidden = layers.fc(input=images, size=128) - crf = layers.linear_chain_crf(input=hidden, label=label) + crf = layers.linear_chain_crf( + input=hidden, label=label, param_attr=ParamAttr(name="crfw")) + crf_decode = layers.crf_decoding( + input=hidden, param_attr=ParamAttr(name="crfw")) self.assertNotEqual(crf, None) + self.assertNotEqual(crf_decode, None) print(str(program)) -- GitLab