Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
fd15390a
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
fd15390a
编写于
11月 16, 2022
作者:
C
ccrrong
提交者:
GitHub
11月 16, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
remove chunk_eval in nn.py under fluid (#47948)
* remove chunk_eval
上级
9cf3aa61
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
0 addition
and
377 deletion
+0
-377
python/paddle/fluid/evaluator.py
python/paddle/fluid/evaluator.py
+0
-118
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+0
-163
python/paddle/fluid/tests/unittests/test_chunk_eval_op.py
python/paddle/fluid/tests/unittests/test_chunk_eval_op.py
+0
-47
python/paddle/fluid/tests/unittests/test_layers.py
python/paddle/fluid/tests/unittests/test_layers.py
+0
-49
未找到文件。
python/paddle/fluid/evaluator.py
浏览文件 @
fd15390a
...
...
@@ -23,7 +23,6 @@ from .initializer import Constant
from
.layers
import
detection
__all__
=
[
'ChunkEvaluator'
,
'EditDistance'
,
'DetectionMAP'
,
]
...
...
@@ -127,123 +126,6 @@ class Evaluator:
return
state
class
ChunkEvaluator
(
Evaluator
):
"""
Warning: This would be deprecated in the future. Please use fluid.metrics.ChunkEvaluator
instead.
Accumulate counter numbers output by chunk_eval from mini-batches and
compute the precision recall and F1-score using the accumulated counter
numbers.
For some basics of chunking, please refer to
'Chunking with Support Vector Machines <https://aclanthology.info/pdf/N/N01/N01-1025.pdf>'.
Args:
input (Variable): prediction output of the network.
label (Variable): label of the test data set.
chunk_scheme (str): can be IOB/IOE/IOBES and IO. See the chunk_eval op for details.
num_chunk_types (int): the number of chunk type.
excluded_chunk_types (list): A list including chunk type ids, indicating chunk types that are not counted.
Returns:
tuple: tuple containing: precision, recall, f1_score
Examples:
.. code-block:: python
exe = fluid.executor(place)
evaluator = fluid.Evaluator.ChunkEvaluator(input, label)
for epoch in PASS_NUM:
evaluator.reset(exe)
for data in batches:
loss = exe.run(fetch_list=[cost])
distance, instance_error = distance_evaluator.eval(exe)
"""
def
__init__
(
self
,
input
,
label
,
chunk_scheme
,
num_chunk_types
,
excluded_chunk_types
=
None
,
):
super
().
__init__
(
"chunk_eval"
)
main_program
=
self
.
helper
.
main_program
if
main_program
.
current_block
().
idx
!=
0
:
raise
ValueError
(
"You can only invoke Evaluator in root block"
)
self
.
num_infer_chunks
=
self
.
_create_state
(
dtype
=
'int64'
,
shape
=
[
1
],
suffix
=
'num_infer_chunks'
)
self
.
num_label_chunks
=
self
.
_create_state
(
dtype
=
'int64'
,
shape
=
[
1
],
suffix
=
'num_label_chunks'
)
self
.
num_correct_chunks
=
self
.
_create_state
(
dtype
=
'int64'
,
shape
=
[
1
],
suffix
=
'num_correct_chunks'
)
(
precision
,
recall
,
f1_score
,
num_infer_chunks
,
num_label_chunks
,
num_correct_chunks
,
)
=
layers
.
chunk_eval
(
input
=
input
,
label
=
label
,
chunk_scheme
=
chunk_scheme
,
num_chunk_types
=
num_chunk_types
,
excluded_chunk_types
=
excluded_chunk_types
,
)
layers
.
sums
(
input
=
[
self
.
num_infer_chunks
,
num_infer_chunks
],
out
=
self
.
num_infer_chunks
,
)
layers
.
sums
(
input
=
[
self
.
num_label_chunks
,
num_label_chunks
],
out
=
self
.
num_label_chunks
,
)
layers
.
sums
(
input
=
[
self
.
num_correct_chunks
,
num_correct_chunks
],
out
=
self
.
num_correct_chunks
,
)
self
.
metrics
.
extend
([
precision
,
recall
,
f1_score
])
def
eval
(
self
,
executor
,
eval_program
=
None
):
if
eval_program
is
None
:
eval_program
=
Program
()
block
=
eval_program
.
current_block
()
num_infer_chunks
,
num_label_chunks
,
num_correct_chunks
=
executor
.
run
(
eval_program
,
fetch_list
=
[
_clone_var_
(
block
,
state
)
for
state
in
self
.
states
],
)
num_infer_chunks
=
num_infer_chunks
[
0
]
num_label_chunks
=
num_label_chunks
[
0
]
num_correct_chunks
=
num_correct_chunks
[
0
]
precision
=
(
float
(
num_correct_chunks
)
/
num_infer_chunks
if
num_infer_chunks
else
0
)
recall
=
(
float
(
num_correct_chunks
)
/
num_label_chunks
if
num_label_chunks
else
0
)
f1_score
=
(
float
(
2
*
precision
*
recall
)
/
(
precision
+
recall
)
if
num_correct_chunks
else
0
)
return
(
np
.
array
([
precision
],
dtype
=
'float32'
),
np
.
array
([
recall
],
dtype
=
'float32'
),
np
.
array
([
f1_score
],
dtype
=
'float32'
),
)
class
EditDistance
(
Evaluator
):
"""
Warning: This would be deprecated in the future. Please use fluid.metrics.EditDistance
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
fd15390a
...
...
@@ -68,7 +68,6 @@ __all__ = [
'linear_chain_crf',
'crf_decoding',
'cos_sim',
'chunk_eval',
'conv2d',
'conv3d',
'softmax',
...
...
@@ -1254,168 +1253,6 @@ def dropout(
return out
@templatedoc()
def chunk_eval(
input,
label,
chunk_scheme,
num_chunk_types,
excluded_chunk_types=None,
seq_length=None,
):
r"""
This operator computes the precision, recall and F1-score for chunk detection.
It is often used in sequence tagging tasks, such as Named Entity Recognition(NER).
For some basics of chunking, please refer to
`Chunking with Support Vector Machines <https://aclanthology.info/pdf/N/N01/N01-1025.pdf>`_ .
This operator supports IOB, IOE, IOBES and IO (also known as plain) tagging schemes.
Here is a NER example for the usage of these tagging schemes:
.. code-block:: python
====== ====== ====== ===== == ============ ===== ===== ===== == =========
Li Ming works at Agricultural Bank of China in Beijing.
====== ====== ====== ===== == ============ ===== ===== ===== == =========
IO I-PER I-PER O O I-ORG I-ORG I-ORG I-ORG O I-LOC
IOB B-PER I-PER O O B-ORG I-ORG I-ORG I-ORG O B-LOC
IOE I-PER E-PER O O I-ORG I-ORG I-ORG E-ORG O E-LOC
IOBES B-PER E-PER O O I-ORG I-ORG I-ORG E-ORG O S-LOC
====== ====== ====== ===== == ============ ===== ===== ===== == =========
There are three chunk types(named entity types) including PER(person), ORG(organization)
and LOC(location), and we can see that the labels have the form `<tag type>-<chunk type>` .
Since the implementation of this operator actually uses label ids rather than
label strings, to make it work, there should be a way to map label ids to
tag types and chunk types. This operator uses the following way to do mapping:
.. code-block:: python
tag_type = label % num_tag_type
chunk_type = label / num_tag_type
where `num_tag_type` is the num of tag types in the tagging scheme, `num_chunk_type`
is the num of chunk types, and `tag_type` get its value from the following table.
.. code-block:: python
Scheme Begin Inside End Single
plain 0 - - -
IOB 0 1 - -
IOE - 0 1 -
IOBES 0 1 2 3
Accordingly, in the above NER example, if the tagging scheme is IOB and chunk
types are ORG, PER and LOC, then the label ids would be as follows:
.. code-block:: python
B-ORG 0
I-ORG 1
B-PER 2
I-PER 3
B-LOC 4
I-LOC 5
O 6
With which we can map each label id to the corresponding tag type and chunk
type correctly.
Args:
input (Tensor): A Tensor representing the predicted labels
from the network. Its shape would be `[N, M, 1]`,
where `N` stands for batch size, `M` for sequence length.
The data type should be int64.
label (Tensor): A Tensor representing the ground-truth labels.
It should have the same shape, lod and data type as ``input`` .
chunk_scheme (str): Indicate the tagging schemes used here. The value must
be IOB, IOE, IOBES or plain.
num_chunk_types (int): The number of chunk types.
excluded_chunk_types (list, optional): Indicate the chunk types shouldn't
be taken into account. It should be a list of chunk type ids(integer).
Default None.
seq_length(Tensor, optional): A 1D Tensor containing the length of each
sequence when ``input`` and ``label`` are Tensor. Default None.
Returns:
tuple: A tuple including precision, recall, F1-score, chunk number detected, \
chunk number in ground-truth, chunk number correctly detected. Each \
is a Tensor with shape `[1]`. The data type of precision, recall and \
F1-score all is float32, and the others' data type all is int64.
Examples:
.. code-block:: python
import paddle.fluid as fluid
dict_size = 10000
label_dict_len = 7
sequence = fluid.data(
name='id', shape=[None, 1], lod_level=1, dtype='int64')
embedding = fluid.embedding(
input=sequence, size=[dict_size, 512])
hidden = fluid.layers.fc(input=embedding, size=512)
label = fluid.data(
name='label', shape=[None, 1], lod_level=1, dtype='int64')
crf = fluid.layers.linear_chain_crf(
input=hidden, label=label, param_attr=fluid.ParamAttr(name="crfw"))
crf_decode = fluid.layers.crf_decoding(
input=hidden, param_attr=fluid.ParamAttr(name="crfw"))
fluid.layers.chunk_eval(
input=crf_decode,
label=label,
chunk_scheme="IOB",
num_chunk_types=int((label_dict_len - 1) / 2))
"""
helper = LayerHelper("chunk_eval", **locals())
check_variable_and_dtype(input, 'input', ['int64'], 'chunk_eval')
check_variable_and_dtype(label, 'label', ['int64'], 'chunk_eval')
# prepare output
precision = helper.create_variable_for_type_inference(dtype="float32")
recall = helper.create_variable_for_type_inference(dtype="float32")
f1_score = helper.create_variable_for_type_inference(dtype="float32")
num_infer_chunks = helper.create_variable_for_type_inference(dtype="int64")
num_label_chunks = helper.create_variable_for_type_inference(dtype="int64")
num_correct_chunks = helper.create_variable_for_type_inference(
dtype="int64"
)
this_input = {"Inference": [input], "Label": [label]}
if seq_length is not None:
this_input["SeqLength"] = [seq_length]
helper.append_op(
type="chunk_eval",
inputs=this_input,
outputs={
"Precision": [precision],
"Recall": [recall],
"F1-Score": [f1_score],
"NumInferChunks": [num_infer_chunks],
"NumLabelChunks": [num_label_chunks],
"NumCorrectChunks": [num_correct_chunks],
},
attrs={
"num_chunk_types": num_chunk_types,
"chunk_scheme": chunk_scheme,
"excluded_chunk_types": excluded_chunk_types or [],
},
)
return (
precision,
recall,
f1_score,
num_infer_chunks,
num_label_chunks,
num_correct_chunks,
)
@deprecated(since="2.0.0", update_to="paddle.nn.functional.softmax")
def softmax(input, use_cudnn=True, name=None, axis=-1):
r"""
...
...
python/paddle/fluid/tests/unittests/test_chunk_eval_op.py
浏览文件 @
fd15390a
...
...
@@ -16,8 +16,6 @@ import unittest
import
numpy
as
np
from
op_test
import
OpTest
import
numpy
as
np
from
paddle.fluid
import
Program
,
program_guard
from
paddle
import
fluid
class
Segment
:
...
...
@@ -283,50 +281,5 @@ class TestChunkEvalOpWithTensorInput(TestChunkEvalOp):
}
class
TestChunkEvalOpError
(
unittest
.
TestCase
):
def
test_errors
(
self
):
with
program_guard
(
Program
(),
Program
()):
def
test_input
():
input_data
=
np
.
random
.
random
(
1
,
1
).
astype
(
"int64"
)
label_data
=
np
.
random
.
random
(
1
).
astype
(
"int64"
)
fluid
.
layers
.
chunk_eval
(
input
=
input_data
,
label
=
label_data
,
chunk_scheme
=
"IOB"
,
num_chunk_types
=
3
,
)
self
.
assertRaises
(
TypeError
,
test_input
)
def
test_label
():
input_
=
fluid
.
data
(
name
=
"input"
,
shape
=
[
None
,
1
],
dtype
=
"int64"
)
label_data
=
np
.
random
.
random
(
1
).
astype
(
"int64"
)
fluid
.
layers
.
chunk_eval
(
input
=
input_
,
label
=
label_data
,
chunk_scheme
=
"IOB"
,
num_chunk_types
=
3
,
)
self
.
assertRaises
(
TypeError
,
test_label
)
def
test_type
():
in_data
=
fluid
.
data
(
name
=
"input_"
,
shape
=
[
None
,
1
],
dtype
=
"int32"
)
label
=
fluid
.
data
(
name
=
"label_"
,
shape
=
[
1
],
dtype
=
"int64"
)
fluid
.
layers
.
chunk_eval
(
input
=
in_data
,
label
=
label
,
chunk_scheme
=
"IOB"
,
num_chunk_types
=
3
,
)
self
.
assertRaises
(
TypeError
,
test_type
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_layers.py
浏览文件 @
fd15390a
...
...
@@ -4169,55 +4169,6 @@ class TestBook(LayerTest):
)
)
def
test_linear_chain_crf
(
self
):
with
self
.
static_graph
():
label_dict_len
=
10
feature
=
layers
.
data
(
name
=
'feature'
,
shape
=
[
784
],
dtype
=
'float32'
)
label
=
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
emission
=
layers
.
fc
(
input
=
feature
,
size
=
10
)
crf
=
layers
.
linear_chain_crf
(
input
=
emission
,
label
=
label
,
param_attr
=
ParamAttr
(
name
=
"crfw"
)
)
crf_decode
=
layers
.
crf_decoding
(
input
=
emission
,
param_attr
=
ParamAttr
(
name
=
"crfw"
)
)
self
.
assertIsNotNone
(
crf
)
self
.
assertIsNotNone
(
crf_decode
)
return
layers
.
chunk_eval
(
input
=
crf_decode
,
label
=
label
,
chunk_scheme
=
"IOB"
,
num_chunk_types
=
(
label_dict_len
-
1
)
//
2
,
)
def
test_linear_chain_crf_padding
(
self
):
with
self
.
static_graph
():
label_dict_len
,
max_len
=
10
,
20
feature
=
layers
.
data
(
name
=
'feature'
,
shape
=
[
max_len
,
784
],
dtype
=
'float32'
)
label
=
layers
.
data
(
name
=
'label'
,
shape
=
[
max_len
],
dtype
=
'int64'
)
length
=
layers
.
data
(
name
=
'length'
,
shape
=
[
1
],
dtype
=
'int64'
)
emission
=
layers
.
fc
(
input
=
feature
,
size
=
10
,
num_flatten_dims
=
2
)
crf
=
layers
.
linear_chain_crf
(
input
=
emission
,
label
=
label
,
length
=
length
,
param_attr
=
ParamAttr
(
name
=
"crfw"
),
)
crf_decode
=
layers
.
crf_decoding
(
input
=
emission
,
length
=
length
,
param_attr
=
ParamAttr
(
name
=
"crfw"
)
)
self
.
assertIsNotNone
(
crf
)
self
.
assertIsNotNone
(
crf_decode
)
return
layers
.
chunk_eval
(
input
=
crf_decode
,
label
=
label
,
seq_length
=
length
,
chunk_scheme
=
"IOB"
,
num_chunk_types
=
(
label_dict_len
-
1
)
//
2
,
)
def
test_im2sequence
(
self
):
# TODO(minqiyang): dygraph do not support lod now
with
self
.
static_graph
():
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录