Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
fd15390a
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
fd15390a
编写于
11月 16, 2022
作者:
C
ccrrong
提交者:
GitHub
11月 16, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
remove chunk_eval in nn.py under fluid (#47948)
* remove chunk_eval
上级
9cf3aa61
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
0 addition
and
377 deletion
+0
-377
python/paddle/fluid/evaluator.py
python/paddle/fluid/evaluator.py
+0
-118
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+0
-163
python/paddle/fluid/tests/unittests/test_chunk_eval_op.py
python/paddle/fluid/tests/unittests/test_chunk_eval_op.py
+0
-47
python/paddle/fluid/tests/unittests/test_layers.py
python/paddle/fluid/tests/unittests/test_layers.py
+0
-49
未找到文件。
python/paddle/fluid/evaluator.py
浏览文件 @
fd15390a
...
...
@@ -23,7 +23,6 @@ from .initializer import Constant
from
.layers
import
detection
__all__
=
[
'ChunkEvaluator'
,
'EditDistance'
,
'DetectionMAP'
,
]
...
...
@@ -127,123 +126,6 @@ class Evaluator:
return
state
class
ChunkEvaluator
(
Evaluator
):
"""
Warning: This would be deprecated in the future. Please use fluid.metrics.ChunkEvaluator
instead.
Accumulate counter numbers output by chunk_eval from mini-batches and
compute the precision recall and F1-score using the accumulated counter
numbers.
For some basics of chunking, please refer to
'Chunking with Support Vector Machines <https://aclanthology.info/pdf/N/N01/N01-1025.pdf>'.
Args:
input (Variable): prediction output of the network.
label (Variable): label of the test data set.
chunk_scheme (str): can be IOB/IOE/IOBES and IO. See the chunk_eval op for details.
num_chunk_types (int): the number of chunk type.
excluded_chunk_types (list): A list including chunk type ids, indicating chunk types that are not counted.
Returns:
tuple: tuple containing: precision, recall, f1_score
Examples:
.. code-block:: python
exe = fluid.executor(place)
evaluator = fluid.Evaluator.ChunkEvaluator(input, label)
for epoch in PASS_NUM:
evaluator.reset(exe)
for data in batches:
loss = exe.run(fetch_list=[cost])
distance, instance_error = distance_evaluator.eval(exe)
"""
def
__init__
(
self
,
input
,
label
,
chunk_scheme
,
num_chunk_types
,
excluded_chunk_types
=
None
,
):
super
().
__init__
(
"chunk_eval"
)
main_program
=
self
.
helper
.
main_program
if
main_program
.
current_block
().
idx
!=
0
:
raise
ValueError
(
"You can only invoke Evaluator in root block"
)
self
.
num_infer_chunks
=
self
.
_create_state
(
dtype
=
'int64'
,
shape
=
[
1
],
suffix
=
'num_infer_chunks'
)
self
.
num_label_chunks
=
self
.
_create_state
(
dtype
=
'int64'
,
shape
=
[
1
],
suffix
=
'num_label_chunks'
)
self
.
num_correct_chunks
=
self
.
_create_state
(
dtype
=
'int64'
,
shape
=
[
1
],
suffix
=
'num_correct_chunks'
)
(
precision
,
recall
,
f1_score
,
num_infer_chunks
,
num_label_chunks
,
num_correct_chunks
,
)
=
layers
.
chunk_eval
(
input
=
input
,
label
=
label
,
chunk_scheme
=
chunk_scheme
,
num_chunk_types
=
num_chunk_types
,
excluded_chunk_types
=
excluded_chunk_types
,
)
layers
.
sums
(
input
=
[
self
.
num_infer_chunks
,
num_infer_chunks
],
out
=
self
.
num_infer_chunks
,
)
layers
.
sums
(
input
=
[
self
.
num_label_chunks
,
num_label_chunks
],
out
=
self
.
num_label_chunks
,
)
layers
.
sums
(
input
=
[
self
.
num_correct_chunks
,
num_correct_chunks
],
out
=
self
.
num_correct_chunks
,
)
self
.
metrics
.
extend
([
precision
,
recall
,
f1_score
])
def
eval
(
self
,
executor
,
eval_program
=
None
):
if
eval_program
is
None
:
eval_program
=
Program
()
block
=
eval_program
.
current_block
()
num_infer_chunks
,
num_label_chunks
,
num_correct_chunks
=
executor
.
run
(
eval_program
,
fetch_list
=
[
_clone_var_
(
block
,
state
)
for
state
in
self
.
states
],
)
num_infer_chunks
=
num_infer_chunks
[
0
]
num_label_chunks
=
num_label_chunks
[
0
]
num_correct_chunks
=
num_correct_chunks
[
0
]
precision
=
(
float
(
num_correct_chunks
)
/
num_infer_chunks
if
num_infer_chunks
else
0
)
recall
=
(
float
(
num_correct_chunks
)
/
num_label_chunks
if
num_label_chunks
else
0
)
f1_score
=
(
float
(
2
*
precision
*
recall
)
/
(
precision
+
recall
)
if
num_correct_chunks
else
0
)
return
(
np
.
array
([
precision
],
dtype
=
'float32'
),
np
.
array
([
recall
],
dtype
=
'float32'
),
np
.
array
([
f1_score
],
dtype
=
'float32'
),
)
class
EditDistance
(
Evaluator
):
"""
Warning: This would be deprecated in the future. Please use fluid.metrics.EditDistance
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
fd15390a
...
...
@@ -68,7 +68,6 @@ __all__ = [
'linear_chain_crf',
'crf_decoding',
'cos_sim',
'chunk_eval',
'conv2d',
'conv3d',
'softmax',
...
...
@@ -1254,168 +1253,6 @@ def dropout(
return out
@templatedoc()
def chunk_eval(
input,
label,
chunk_scheme,
num_chunk_types,
excluded_chunk_types=None,
seq_length=None,
):
r"""
This operator computes the precision, recall and F1-score for chunk detection.
It is often used in sequence tagging tasks, such as Named Entity Recognition(NER).
For some basics of chunking, please refer to
`Chunking with Support Vector Machines <https://aclanthology.info/pdf/N/N01/N01-1025.pdf>`_ .
This operator supports IOB, IOE, IOBES and IO (also known as plain) tagging schemes.
Here is a NER example for the usage of these tagging schemes:
.. code-block:: python
====== ====== ====== ===== == ============ ===== ===== ===== == =========
Li Ming works at Agricultural Bank of China in Beijing.
====== ====== ====== ===== == ============ ===== ===== ===== == =========
IO I-PER I-PER O O I-ORG I-ORG I-ORG I-ORG O I-LOC
IOB B-PER I-PER O O B-ORG I-ORG I-ORG I-ORG O B-LOC
IOE I-PER E-PER O O I-ORG I-ORG I-ORG E-ORG O E-LOC
IOBES B-PER E-PER O O I-ORG I-ORG I-ORG E-ORG O S-LOC
====== ====== ====== ===== == ============ ===== ===== ===== == =========
There are three chunk types(named entity types) including PER(person), ORG(organization)
and LOC(location), and we can see that the labels have the form `<tag type>-<chunk type>` .
Since the implementation of this operator actually uses label ids rather than
label strings, to make it work, there should be a way to map label ids to
tag types and chunk types. This operator uses the following way to do mapping:
.. code-block:: python
tag_type = label % num_tag_type
chunk_type = label / num_tag_type
where `num_tag_type` is the num of tag types in the tagging scheme, `num_chunk_type`
is the num of chunk types, and `tag_type` get its value from the following table.
.. code-block:: python
Scheme Begin Inside End Single
plain 0 - - -
IOB 0 1 - -
IOE - 0 1 -
IOBES 0 1 2 3
Accordingly, in the above NER example, if the tagging scheme is IOB and chunk
types are ORG, PER and LOC, then the label ids would be as follows:
.. code-block:: python
B-ORG 0
I-ORG 1
B-PER 2
I-PER 3
B-LOC 4
I-LOC 5
O 6
With which we can map each label id to the corresponding tag type and chunk
type correctly.
Args:
input (Tensor): A Tensor representing the predicted labels
from the network. Its shape would be `[N, M, 1]`,
where `N` stands for batch size, `M` for sequence length.
The data type should be int64.
label (Tensor): A Tensor representing the ground-truth labels.
It should have the same shape, lod and data type as ``input`` .
chunk_scheme (str): Indicate the tagging schemes used here. The value must
be IOB, IOE, IOBES or plain.
num_chunk_types (int): The number of chunk types.
excluded_chunk_types (list, optional): Indicate the chunk types shouldn't
be taken into account. It should be a list of chunk type ids(integer).
Default None.
seq_length(Tensor, optional): A 1D Tensor containing the length of each
sequence when ``input`` and ``label`` are Tensor. Default None.
Returns:
tuple: A tuple including precision, recall, F1-score, chunk number detected, \
chunk number in ground-truth, chunk number correctly detected. Each \
is a Tensor with shape `[1]`. The data type of precision, recall and \
F1-score all is float32, and the others' data type all is int64.
Examples:
.. code-block:: python
import paddle.fluid as fluid
dict_size = 10000
label_dict_len = 7
sequence = fluid.data(
name='id', shape=[None, 1], lod_level=1, dtype='int64')
embedding = fluid.embedding(
input=sequence, size=[dict_size, 512])
hidden = fluid.layers.fc(input=embedding, size=512)
label = fluid.data(
name='label', shape=[None, 1], lod_level=1, dtype='int64')
crf = fluid.layers.linear_chain_crf(
input=hidden, label=label, param_attr=fluid.ParamAttr(name="crfw"))
crf_decode = fluid.layers.crf_decoding(
input=hidden, param_attr=fluid.ParamAttr(name="crfw"))
fluid.layers.chunk_eval(
input=crf_decode,
label=label,
chunk_scheme="IOB",
num_chunk_types=int((label_dict_len - 1) / 2))
"""
helper = LayerHelper("chunk_eval", **locals())
check_variable_and_dtype(input, 'input', ['int64'], 'chunk_eval')
check_variable_and_dtype(label, 'label', ['int64'], 'chunk_eval')
# prepare output
precision = helper.create_variable_for_type_inference(dtype="float32")
recall = helper.create_variable_for_type_inference(dtype="float32")
f1_score = helper.create_variable_for_type_inference(dtype="float32")
num_infer_chunks = helper.create_variable_for_type_inference(dtype="int64")
num_label_chunks = helper.create_variable_for_type_inference(dtype="int64")
num_correct_chunks = helper.create_variable_for_type_inference(
dtype="int64"
)
this_input = {"Inference": [input], "Label": [label]}
if seq_length is not None:
this_input["SeqLength"] = [seq_length]
helper.append_op(
type="chunk_eval",
inputs=this_input,
outputs={
"Precision": [precision],
"Recall": [recall],
"F1-Score": [f1_score],
"NumInferChunks": [num_infer_chunks],
"NumLabelChunks": [num_label_chunks],
"NumCorrectChunks": [num_correct_chunks],
},
attrs={
"num_chunk_types": num_chunk_types,
"chunk_scheme": chunk_scheme,
"excluded_chunk_types": excluded_chunk_types or [],
},
)
return (
precision,
recall,
f1_score,
num_infer_chunks,
num_label_chunks,
num_correct_chunks,
)
@deprecated(since="2.0.0", update_to="paddle.nn.functional.softmax")
def softmax(input, use_cudnn=True, name=None, axis=-1):
r"""
...
...
python/paddle/fluid/tests/unittests/test_chunk_eval_op.py
浏览文件 @
fd15390a
...
...
@@ -16,8 +16,6 @@ import unittest
import
numpy
as
np
from
op_test
import
OpTest
import
numpy
as
np
from
paddle.fluid
import
Program
,
program_guard
from
paddle
import
fluid
class
Segment
:
...
...
@@ -283,50 +281,5 @@ class TestChunkEvalOpWithTensorInput(TestChunkEvalOp):
}
class
TestChunkEvalOpError
(
unittest
.
TestCase
):
def
test_errors
(
self
):
with
program_guard
(
Program
(),
Program
()):
def
test_input
():
input_data
=
np
.
random
.
random
(
1
,
1
).
astype
(
"int64"
)
label_data
=
np
.
random
.
random
(
1
).
astype
(
"int64"
)
fluid
.
layers
.
chunk_eval
(
input
=
input_data
,
label
=
label_data
,
chunk_scheme
=
"IOB"
,
num_chunk_types
=
3
,
)
self
.
assertRaises
(
TypeError
,
test_input
)
def
test_label
():
input_
=
fluid
.
data
(
name
=
"input"
,
shape
=
[
None
,
1
],
dtype
=
"int64"
)
label_data
=
np
.
random
.
random
(
1
).
astype
(
"int64"
)
fluid
.
layers
.
chunk_eval
(
input
=
input_
,
label
=
label_data
,
chunk_scheme
=
"IOB"
,
num_chunk_types
=
3
,
)
self
.
assertRaises
(
TypeError
,
test_label
)
def
test_type
():
in_data
=
fluid
.
data
(
name
=
"input_"
,
shape
=
[
None
,
1
],
dtype
=
"int32"
)
label
=
fluid
.
data
(
name
=
"label_"
,
shape
=
[
1
],
dtype
=
"int64"
)
fluid
.
layers
.
chunk_eval
(
input
=
in_data
,
label
=
label
,
chunk_scheme
=
"IOB"
,
num_chunk_types
=
3
,
)
self
.
assertRaises
(
TypeError
,
test_type
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_layers.py
浏览文件 @
fd15390a
...
...
@@ -4169,55 +4169,6 @@ class TestBook(LayerTest):
)
)
def
test_linear_chain_crf
(
self
):
with
self
.
static_graph
():
label_dict_len
=
10
feature
=
layers
.
data
(
name
=
'feature'
,
shape
=
[
784
],
dtype
=
'float32'
)
label
=
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
emission
=
layers
.
fc
(
input
=
feature
,
size
=
10
)
crf
=
layers
.
linear_chain_crf
(
input
=
emission
,
label
=
label
,
param_attr
=
ParamAttr
(
name
=
"crfw"
)
)
crf_decode
=
layers
.
crf_decoding
(
input
=
emission
,
param_attr
=
ParamAttr
(
name
=
"crfw"
)
)
self
.
assertIsNotNone
(
crf
)
self
.
assertIsNotNone
(
crf_decode
)
return
layers
.
chunk_eval
(
input
=
crf_decode
,
label
=
label
,
chunk_scheme
=
"IOB"
,
num_chunk_types
=
(
label_dict_len
-
1
)
//
2
,
)
def
test_linear_chain_crf_padding
(
self
):
with
self
.
static_graph
():
label_dict_len
,
max_len
=
10
,
20
feature
=
layers
.
data
(
name
=
'feature'
,
shape
=
[
max_len
,
784
],
dtype
=
'float32'
)
label
=
layers
.
data
(
name
=
'label'
,
shape
=
[
max_len
],
dtype
=
'int64'
)
length
=
layers
.
data
(
name
=
'length'
,
shape
=
[
1
],
dtype
=
'int64'
)
emission
=
layers
.
fc
(
input
=
feature
,
size
=
10
,
num_flatten_dims
=
2
)
crf
=
layers
.
linear_chain_crf
(
input
=
emission
,
label
=
label
,
length
=
length
,
param_attr
=
ParamAttr
(
name
=
"crfw"
),
)
crf_decode
=
layers
.
crf_decoding
(
input
=
emission
,
length
=
length
,
param_attr
=
ParamAttr
(
name
=
"crfw"
)
)
self
.
assertIsNotNone
(
crf
)
self
.
assertIsNotNone
(
crf_decode
)
return
layers
.
chunk_eval
(
input
=
crf_decode
,
label
=
label
,
seq_length
=
length
,
chunk_scheme
=
"IOB"
,
num_chunk_types
=
(
label_dict_len
-
1
)
//
2
,
)
def
test_im2sequence
(
self
):
# TODO(minqiyang): dygraph do not support lod now
with
self
.
static_graph
():
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录