diff --git a/python/paddle/fluid/evaluator.py b/python/paddle/fluid/evaluator.py index 007337b9d0404fa264f7627ced35b5a3980a4701..b8ccfd083cafdea9e671aaca0c13fcc2de201b67 100644 --- a/python/paddle/fluid/evaluator.py +++ b/python/paddle/fluid/evaluator.py @@ -23,7 +23,6 @@ from .initializer import Constant from .layers import detection __all__ = [ - 'ChunkEvaluator', 'EditDistance', 'DetectionMAP', ] @@ -127,123 +126,6 @@ class Evaluator: return state -class ChunkEvaluator(Evaluator): - """ - Warning: This would be deprecated in the future. Please use fluid.metrics.ChunkEvaluator - instead. - - Accumulate counter numbers output by chunk_eval from mini-batches and - compute the precision recall and F1-score using the accumulated counter - numbers. - For some basics of chunking, please refer to - 'Chunking with Support Vector Machines '. - - Args: - input (Variable): prediction output of the network. - label (Variable): label of the test data set. - chunk_scheme (str): can be IOB/IOE/IOBES and IO. See the chunk_eval op for details. - num_chunk_types (int): the number of chunk type. - excluded_chunk_types (list): A list including chunk type ids, indicating chunk types that are not counted. - - Returns: - tuple: tuple containing: precision, recall, f1_score - - Examples: - .. code-block:: python - - exe = fluid.executor(place) - evaluator = fluid.Evaluator.ChunkEvaluator(input, label) - for epoch in PASS_NUM: - evaluator.reset(exe) - for data in batches: - loss = exe.run(fetch_list=[cost]) - distance, instance_error = distance_evaluator.eval(exe) - """ - - def __init__( - self, - input, - label, - chunk_scheme, - num_chunk_types, - excluded_chunk_types=None, - ): - super().__init__("chunk_eval") - main_program = self.helper.main_program - if main_program.current_block().idx != 0: - raise ValueError("You can only invoke Evaluator in root block") - - self.num_infer_chunks = self._create_state( - dtype='int64', shape=[1], suffix='num_infer_chunks' - ) - self.num_label_chunks = self._create_state( - dtype='int64', shape=[1], suffix='num_label_chunks' - ) - self.num_correct_chunks = self._create_state( - dtype='int64', shape=[1], suffix='num_correct_chunks' - ) - ( - precision, - recall, - f1_score, - num_infer_chunks, - num_label_chunks, - num_correct_chunks, - ) = layers.chunk_eval( - input=input, - label=label, - chunk_scheme=chunk_scheme, - num_chunk_types=num_chunk_types, - excluded_chunk_types=excluded_chunk_types, - ) - layers.sums( - input=[self.num_infer_chunks, num_infer_chunks], - out=self.num_infer_chunks, - ) - layers.sums( - input=[self.num_label_chunks, num_label_chunks], - out=self.num_label_chunks, - ) - layers.sums( - input=[self.num_correct_chunks, num_correct_chunks], - out=self.num_correct_chunks, - ) - - self.metrics.extend([precision, recall, f1_score]) - - def eval(self, executor, eval_program=None): - if eval_program is None: - eval_program = Program() - block = eval_program.current_block() - num_infer_chunks, num_label_chunks, num_correct_chunks = executor.run( - eval_program, - fetch_list=[_clone_var_(block, state) for state in self.states], - ) - num_infer_chunks = num_infer_chunks[0] - num_label_chunks = num_label_chunks[0] - num_correct_chunks = num_correct_chunks[0] - precision = ( - float(num_correct_chunks) / num_infer_chunks - if num_infer_chunks - else 0 - ) - recall = ( - float(num_correct_chunks) / num_label_chunks - if num_label_chunks - else 0 - ) - f1_score = ( - float(2 * precision * recall) / (precision + recall) - if num_correct_chunks - else 0 - ) - return ( - np.array([precision], dtype='float32'), - np.array([recall], dtype='float32'), - np.array([f1_score], dtype='float32'), - ) - - class EditDistance(Evaluator): """ Warning: This would be deprecated in the future. Please use fluid.metrics.EditDistance diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 57e8a24e0dc75311703d417ad6c247f870679629..d105ea892ccf2d6866b08142130707775575cb0a 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -68,7 +68,6 @@ __all__ = [ 'linear_chain_crf', 'crf_decoding', 'cos_sim', - 'chunk_eval', 'conv2d', 'conv3d', 'softmax', @@ -1254,168 +1253,6 @@ def dropout( return out -@templatedoc() -def chunk_eval( - input, - label, - chunk_scheme, - num_chunk_types, - excluded_chunk_types=None, - seq_length=None, -): - r""" - This operator computes the precision, recall and F1-score for chunk detection. - It is often used in sequence tagging tasks, such as Named Entity Recognition(NER). - - For some basics of chunking, please refer to - `Chunking with Support Vector Machines `_ . - - This operator supports IOB, IOE, IOBES and IO (also known as plain) tagging schemes. - Here is a NER example for the usage of these tagging schemes: - - .. code-block:: python - - ====== ====== ====== ===== == ============ ===== ===== ===== == ========= - Li Ming works at Agricultural Bank of China in Beijing. - ====== ====== ====== ===== == ============ ===== ===== ===== == ========= - IO I-PER I-PER O O I-ORG I-ORG I-ORG I-ORG O I-LOC - IOB B-PER I-PER O O B-ORG I-ORG I-ORG I-ORG O B-LOC - IOE I-PER E-PER O O I-ORG I-ORG I-ORG E-ORG O E-LOC - IOBES B-PER E-PER O O I-ORG I-ORG I-ORG E-ORG O S-LOC - ====== ====== ====== ===== == ============ ===== ===== ===== == ========= - - There are three chunk types(named entity types) including PER(person), ORG(organization) - and LOC(location), and we can see that the labels have the form `-` . - - Since the implementation of this operator actually uses label ids rather than - label strings, to make it work, there should be a way to map label ids to - tag types and chunk types. This operator uses the following way to do mapping: - - .. code-block:: python - - tag_type = label % num_tag_type - chunk_type = label / num_tag_type - - where `num_tag_type` is the num of tag types in the tagging scheme, `num_chunk_type` - is the num of chunk types, and `tag_type` get its value from the following table. - - .. code-block:: python - - Scheme Begin Inside End Single - plain 0 - - - - IOB 0 1 - - - IOE - 0 1 - - IOBES 0 1 2 3 - - Accordingly, in the above NER example, if the tagging scheme is IOB and chunk - types are ORG, PER and LOC, then the label ids would be as follows: - - .. code-block:: python - - B-ORG 0 - I-ORG 1 - B-PER 2 - I-PER 3 - B-LOC 4 - I-LOC 5 - O 6 - - With which we can map each label id to the corresponding tag type and chunk - type correctly. - - Args: - input (Tensor): A Tensor representing the predicted labels - from the network. Its shape would be `[N, M, 1]`, - where `N` stands for batch size, `M` for sequence length. - The data type should be int64. - label (Tensor): A Tensor representing the ground-truth labels. - It should have the same shape, lod and data type as ``input`` . - chunk_scheme (str): Indicate the tagging schemes used here. The value must - be IOB, IOE, IOBES or plain. - num_chunk_types (int): The number of chunk types. - excluded_chunk_types (list, optional): Indicate the chunk types shouldn't - be taken into account. It should be a list of chunk type ids(integer). - Default None. - seq_length(Tensor, optional): A 1D Tensor containing the length of each - sequence when ``input`` and ``label`` are Tensor. Default None. - - Returns: - tuple: A tuple including precision, recall, F1-score, chunk number detected, \ - chunk number in ground-truth, chunk number correctly detected. Each \ - is a Tensor with shape `[1]`. The data type of precision, recall and \ - F1-score all is float32, and the others' data type all is int64. - - Examples: - .. code-block:: python - - import paddle.fluid as fluid - - dict_size = 10000 - label_dict_len = 7 - sequence = fluid.data( - name='id', shape=[None, 1], lod_level=1, dtype='int64') - embedding = fluid.embedding( - input=sequence, size=[dict_size, 512]) - hidden = fluid.layers.fc(input=embedding, size=512) - label = fluid.data( - name='label', shape=[None, 1], lod_level=1, dtype='int64') - crf = fluid.layers.linear_chain_crf( - input=hidden, label=label, param_attr=fluid.ParamAttr(name="crfw")) - crf_decode = fluid.layers.crf_decoding( - input=hidden, param_attr=fluid.ParamAttr(name="crfw")) - fluid.layers.chunk_eval( - input=crf_decode, - label=label, - chunk_scheme="IOB", - num_chunk_types=int((label_dict_len - 1) / 2)) - """ - helper = LayerHelper("chunk_eval", **locals()) - - check_variable_and_dtype(input, 'input', ['int64'], 'chunk_eval') - check_variable_and_dtype(label, 'label', ['int64'], 'chunk_eval') - - # prepare output - precision = helper.create_variable_for_type_inference(dtype="float32") - recall = helper.create_variable_for_type_inference(dtype="float32") - f1_score = helper.create_variable_for_type_inference(dtype="float32") - num_infer_chunks = helper.create_variable_for_type_inference(dtype="int64") - num_label_chunks = helper.create_variable_for_type_inference(dtype="int64") - num_correct_chunks = helper.create_variable_for_type_inference( - dtype="int64" - ) - - this_input = {"Inference": [input], "Label": [label]} - - if seq_length is not None: - this_input["SeqLength"] = [seq_length] - - helper.append_op( - type="chunk_eval", - inputs=this_input, - outputs={ - "Precision": [precision], - "Recall": [recall], - "F1-Score": [f1_score], - "NumInferChunks": [num_infer_chunks], - "NumLabelChunks": [num_label_chunks], - "NumCorrectChunks": [num_correct_chunks], - }, - attrs={ - "num_chunk_types": num_chunk_types, - "chunk_scheme": chunk_scheme, - "excluded_chunk_types": excluded_chunk_types or [], - }, - ) - return ( - precision, - recall, - f1_score, - num_infer_chunks, - num_label_chunks, - num_correct_chunks, - ) - - @deprecated(since="2.0.0", update_to="paddle.nn.functional.softmax") def softmax(input, use_cudnn=True, name=None, axis=-1): r""" diff --git a/python/paddle/fluid/tests/unittests/test_chunk_eval_op.py b/python/paddle/fluid/tests/unittests/test_chunk_eval_op.py index d6b2bcc28c0d3134c07846d28e472e934492c67d..1cbf8ebabb80422b47b69a24aa873ed593965e9c 100644 --- a/python/paddle/fluid/tests/unittests/test_chunk_eval_op.py +++ b/python/paddle/fluid/tests/unittests/test_chunk_eval_op.py @@ -16,8 +16,6 @@ import unittest import numpy as np from op_test import OpTest import numpy as np -from paddle.fluid import Program, program_guard -from paddle import fluid class Segment: @@ -283,50 +281,5 @@ class TestChunkEvalOpWithTensorInput(TestChunkEvalOp): } -class TestChunkEvalOpError(unittest.TestCase): - def test_errors(self): - with program_guard(Program(), Program()): - - def test_input(): - input_data = np.random.random(1, 1).astype("int64") - label_data = np.random.random(1).astype("int64") - fluid.layers.chunk_eval( - input=input_data, - label=label_data, - chunk_scheme="IOB", - num_chunk_types=3, - ) - - self.assertRaises(TypeError, test_input) - - def test_label(): - input_ = fluid.data( - name="input", shape=[None, 1], dtype="int64" - ) - label_data = np.random.random(1).astype("int64") - fluid.layers.chunk_eval( - input=input_, - label=label_data, - chunk_scheme="IOB", - num_chunk_types=3, - ) - - self.assertRaises(TypeError, test_label) - - def test_type(): - in_data = fluid.data( - name="input_", shape=[None, 1], dtype="int32" - ) - label = fluid.data(name="label_", shape=[1], dtype="int64") - fluid.layers.chunk_eval( - input=in_data, - label=label, - chunk_scheme="IOB", - num_chunk_types=3, - ) - - self.assertRaises(TypeError, test_type) - - if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index 9f9e98bfca1c714be0154efe88788fe7f7a77813..328f719d40537f5a9fe372f624ab1d895c52ba55 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -4169,55 +4169,6 @@ class TestBook(LayerTest): ) ) - def test_linear_chain_crf(self): - with self.static_graph(): - label_dict_len = 10 - feature = layers.data(name='feature', shape=[784], dtype='float32') - label = layers.data(name='label', shape=[1], dtype='int64') - emission = layers.fc(input=feature, size=10) - crf = layers.linear_chain_crf( - input=emission, label=label, param_attr=ParamAttr(name="crfw") - ) - crf_decode = layers.crf_decoding( - input=emission, param_attr=ParamAttr(name="crfw") - ) - self.assertIsNotNone(crf) - self.assertIsNotNone(crf_decode) - return layers.chunk_eval( - input=crf_decode, - label=label, - chunk_scheme="IOB", - num_chunk_types=(label_dict_len - 1) // 2, - ) - - def test_linear_chain_crf_padding(self): - with self.static_graph(): - label_dict_len, max_len = 10, 20 - feature = layers.data( - name='feature', shape=[max_len, 784], dtype='float32' - ) - label = layers.data(name='label', shape=[max_len], dtype='int64') - length = layers.data(name='length', shape=[1], dtype='int64') - emission = layers.fc(input=feature, size=10, num_flatten_dims=2) - crf = layers.linear_chain_crf( - input=emission, - label=label, - length=length, - param_attr=ParamAttr(name="crfw"), - ) - crf_decode = layers.crf_decoding( - input=emission, length=length, param_attr=ParamAttr(name="crfw") - ) - self.assertIsNotNone(crf) - self.assertIsNotNone(crf_decode) - return layers.chunk_eval( - input=crf_decode, - label=label, - seq_length=length, - chunk_scheme="IOB", - num_chunk_types=(label_dict_len - 1) // 2, - ) - def test_im2sequence(self): # TODO(minqiyang): dygraph do not support lod now with self.static_graph():