提交 d6a9f005 编写于 作者: D dzhwinter 提交者: Yu Yang

"module document non-layer doc" (#11598)

* "add some api reference"

* "fix based on preview"

* "fix evaluator"

* "remove template doc"
上级 15130fc8
......@@ -41,7 +41,12 @@ def _clone_var_(block, var):
class Evaluator(object):
"""
Base Class for all evaluators
Warning: better to use the fluid.metrics.* things, more
flexible support via pure Python and Operator, and decoupled
with executor. Short doc are intended to urge new user
start from Metrics.
Base Class for all evaluators.
Args:
name(str): The name of evaluator. such as, "accuracy". Used for generate
......@@ -69,6 +74,10 @@ class Evaluator(object):
def reset(self, executor, reset_program=None):
"""
reset metric states at the begin of each pass/user specified batch
Args:
executor(Executor|ParallelExecutor): a executor for executing the reset_program
reset_program(Program): a single Program for reset process
"""
if reset_program is None:
reset_program = Program()
......@@ -85,15 +94,16 @@ class Evaluator(object):
def eval(self, executor, eval_program=None):
"""
Evaluate the statistics merged by multiple mini-batches.
Args:
executor(Executor|ParallelExecutor): a executor for executing the eval_program
eval_program(Program): a single Program for eval process
"""
raise NotImplementedError()
def create_state(self, suffix, dtype, shape):
def _create_state(self, suffix, dtype, shape):
"""
Create state variable.
NOTE: It is not a public API.
Args:
suffix(str): the state suffix.
dtype(str|core.VarDesc.VarType): the state data type
......@@ -113,9 +123,35 @@ class Evaluator(object):
class ChunkEvaluator(Evaluator):
"""
Warning: This would be deprecated in the future. Please use fluid.metrics.ChunkEvaluator
instead.
Accumulate counter numbers output by chunk_eval from mini-batches and
compute the precision recall and F1-score using the accumulated counter
numbers.
For some basics of chunking, please refer to
'Chunking with Support Vector Machines <https://aclanthology.info/pdf/N/N01/N01-1025.pdf>'.
Args:
input (Variable): prediction output of the network.
label (Variable): label of the test data set.
chunk_scheme (str): can be IOB/IOE/IOBES and IO. See the chunk_eval op for details.
num_chunk_types (int): the number of chunk type.
excluded_chunk_types (list): A list including chunk type ids, indicating chunk types that are not counted.
Returns:
tuple: tuple containing: precision, recall, f1_score
Examples:
.. code-block:: python
exe = fluid.executor(place)
evaluator = fluid.Evaluator.ChunkEvaluator(input, label)
for epoch in PASS_NUM:
evaluator.reset(exe)
for data in batches:
loss = exe.run(fetch_list=[cost])
distance, instance_error = distance_evaluator.eval(exe)
"""
def __init__(
......@@ -130,11 +166,11 @@ class ChunkEvaluator(Evaluator):
if main_program.current_block().idx != 0:
raise ValueError("You can only invoke Evaluator in root block")
self.num_infer_chunks = self.create_state(
self.num_infer_chunks = self._create_state(
dtype='int64', shape=[1], suffix='num_infer_chunks')
self.num_label_chunks = self.create_state(
self.num_label_chunks = self._create_state(
dtype='int64', shape=[1], suffix='num_label_chunks')
self.num_correct_chunks = self.create_state(
self.num_correct_chunks = self._create_state(
dtype='int64', shape=[1], suffix='num_correct_chunks')
precision, recall, f1_score, num_infer_chunks, num_label_chunks, num_correct_chunks = layers.chunk_eval(
input=input,
......@@ -178,6 +214,8 @@ class ChunkEvaluator(Evaluator):
class EditDistance(Evaluator):
"""
Warning: This would be deprecated in the future. Please use fluid.metrics.EditDistance
instead.
Accumulate edit distance sum and sequence number from mini-batches and
compute the average edit_distance and instance error of all batches.
......@@ -188,15 +226,16 @@ class EditDistance(Evaluator):
ignored_tokens(list of int): Tokens that should be removed before
calculating edit distance.
Example:
Examples:
.. code-block:: python
exe = fluid.executor(place)
distance_evaluator = fluid.Evaluator.EditDistance(input, label)
for epoch in PASS_NUM:
distance_evaluator.reset(exe)
for data in batches:
loss = exe.run(fetch_list=[cost])
distance, instance_error = distance_evaluator.eval(exe)
exe = fluid.executor(place)
distance_evaluator = fluid.Evaluator.EditDistance(input, label)
for epoch in PASS_NUM:
distance_evaluator.reset(exe)
for data in batches:
loss = exe.run(fetch_list=[cost])
distance, instance_error = distance_evaluator.eval(exe)
In the above example:
'distance' is the average of the edit distance in a pass.
......@@ -210,11 +249,11 @@ class EditDistance(Evaluator):
if main_program.current_block().idx != 0:
raise ValueError("You can only invoke Evaluator in root block")
self.total_distance = self.create_state(
self.total_distance = self._create_state(
dtype='float32', shape=[1], suffix='total_distance')
self.seq_num = self.create_state(
self.seq_num = self._create_state(
dtype='int64', shape=[1], suffix='seq_num')
self.instance_error = self.create_state(
self.instance_error = self._create_state(
dtype='int64', shape=[1], suffix='instance_error')
distances, seq_num = layers.edit_distance(
input=input, label=label, ignored_tokens=ignored_tokens)
......@@ -256,9 +295,10 @@ class EditDistance(Evaluator):
class DetectionMAP(Evaluator):
"""
Warning: This would be deprecated in the future. Please use fluid.metrics.DetectionMAP
instead.
Calculate the detection mean average precision (mAP).
TODO (Dang Qingqing): update the following doc.
The general steps are as follows:
1. calculate the true positive and false positive according to the input
of detection and labels.
......@@ -293,17 +333,18 @@ class DetectionMAP(Evaluator):
- 11point: the 11-point interpolated average precision.
- integral: the natural integral of the precision-recall curve.
Example:
Examples:
.. code-block:: python
exe = fluid.executor(place)
map_evaluator = fluid.Evaluator.DetectionMAP(input,
gt_label, gt_box, gt_difficult)
cur_map, accum_map = map_evaluator.get_map_var()
fetch = [cost, cur_map, accum_map]
for epoch in PASS_NUM:
map_evaluator.reset(exe)
for data in batches:
loss, cur_map_v, accum_map_v = exe.run(fetch_list=fetch)
exe = fluid.executor(place)
map_evaluator = fluid.Evaluator.DetectionMAP(input,
gt_label, gt_box, gt_difficult)
cur_map, accum_map = map_evaluator.get_map_var()
fetch = [cost, cur_map, accum_map]
for epoch in PASS_NUM:
map_evaluator.reset(exe)
for data in batches:
loss, cur_map_v, accum_map_v = exe.run(fetch_list=fetch)
In the above example:
......@@ -340,9 +381,10 @@ class DetectionMAP(Evaluator):
evaluate_difficult=evaluate_difficult,
ap_version=ap_version)
self.create_state(dtype='int32', shape=None, suffix='accum_pos_count')
self.create_state(dtype='float32', shape=None, suffix='accum_true_pos')
self.create_state(dtype='float32', shape=None, suffix='accum_false_pos')
self._create_state(dtype='int32', shape=None, suffix='accum_pos_count')
self._create_state(dtype='float32', shape=None, suffix='accum_true_pos')
self._create_state(
dtype='float32', shape=None, suffix='accum_false_pos')
self.has_state = None
var = self.helper.create_variable(
......
......@@ -28,8 +28,8 @@ import math_op_patch
from math_op_patch import *
import detection
from detection import *
import metric
from metric import *
import metric_op
from metric_op import *
from learning_rate_scheduler import *
__all__ = []
......@@ -41,5 +41,5 @@ __all__ += control_flow.__all__
__all__ += ops.__all__
__all__ += device.__all__
__all__ += detection.__all__
__all__ += metric.__all__
__all__ += metric_op.__all__
__all__ += learning_rate_scheduler.__all__
......@@ -126,7 +126,7 @@ def auc(input, label, curve='ROC', num_thresholds=200):
topk_out, topk_indices = nn.topk(input, k=k)
auc_out = helper.create_tmp_variable(dtype="float32")
helper.append_op(
type="accuracy",
type="auc",
inputs={
"Out": [topk_out],
"Indices": [topk_indices],
......
......@@ -23,6 +23,8 @@ import warnings
__all__ = [
'MetricBase',
'CompositeMetric',
'Precision',
'Recall',
'Accuracy',
'ChunkEvaluator',
'EditDistance',
......@@ -46,33 +48,34 @@ def _is_number_or_matrix_(var):
class MetricBase(object):
"""
Base Class for all evaluators
Base Class for all Metrics.
MetricBase define a group of interfaces for the
model evaluation methods. Metrics accumulate metric states between
consecutive minibatches, at every minibatch, use update
interface to add current minibatch value to global states.
Use eval to compute accumative metric value from last reset()
or from scratch on.
If you need to custom a new metric, please inherit from MetricBase and
custom implementation.
Args:
name(str): The name of evaluator. such as, "accuracy". Used for generate
temporary variable name.
Interface:
Note(*) : the states is the attributes who not has _ prefix.
get_config(): print current states and configuration
reset(): clear the states. If the Metrics states type is not (int, float, np.ndarray),
Please override this method.
update(): update states at every minibatch
eval(): get metric evaluation in numpy type.
name(str): The name of metric instance. such as, "accuracy".
It needed if you want to distinct different metrics in a model.
"""
def __init__(self, name, **kwargs):
def __init__(self, name):
self._name = str(name) if name != None else self.__class__.__name__
self._kwargs = kwargs if kwargs != None else dict()
self.reset()
def __str__(self):
return self._name
def reset(self):
"""
states is the attributes who not has _ prefix.
reset the states of metrics.
reset clear the states of metrics. By default, the states
are the members who do not has _ prefix, reset set them to inital states.
If you violate the implicit name rule, please also custom the reset
interface.
"""
states = {
attr: value
......@@ -90,61 +93,231 @@ class MetricBase(object):
setattr(self, attr, None)
def get_config(self):
"""
Get the metric and current states.
The states are the members who do not has "_" prefix.
Args:
None
Returns:
dict: a dict of metric and states
"""
states = {
attr: value
for attr, value in self.__dict__.iteritems()
if not attr.startswith("_")
}
config = copy.deepcopy(self._kwargs)
config = {}
config.update({"name": self._name, "states": copy.deepcopy(states)})
return config
def update(self):
raise NotImplementedError()
def update(self, preds, labels):
"""
Updates the metric states at every minibatch.
One user can compute the minibatch metric via pure Python, or
via a c++ operator.
Args:
preds(numpy.array): the predictions of current minibatch
labels(numpy.array): the labels of current minibatch, if the label is one-hot
or soft-label, should custom the corresponding update rule.
"""
raise NotImplementedError(
"Should not use it directly, please extend it.")
def eval(self):
raise NotImplementedError()
"""
Evalute the current metrics based the accumulated states.
Returns:
float|list(float)|numpy.array: the metrics via Python.
"""
raise NotImplementedError(
"Should not use it directly, please extend it.")
class CompositeMetric(MetricBase):
"""
Compute multiple metrics in each minibatch.
Composite multiple metrics in one instance.
for example, merge F1, accuracy, recall into one Metric.
Examples:
.. code-block:: python
labels = fluid.layers.data(name="data", shape=[1], dtype="int32")
data = fluid.layers.data(name="data", shape=[32, 32], dtype="int32")
pred = fluid.layers.fc(input=data, size=1000, act="tanh")
comp = fluid.metrics.CompositeMetric()
acc = fluid.metrics.Precision()
recall = fluid.metrics.Recall()
comp.add_metric(acc)
comp.add_metric(recall)
for pass in range(PASSES):
comp.reset()
for data in train_reader():
loss, preds, labels = exe.run(fetch_list=[cost, preds, labels])
comp.update(preds=preds, labels=labels)
numpy_acc, numpy_recall = comp.eval()
"""
def __init__(self, name=None, **kwargs):
super(CompositeMetric, self).__init__(name, kwargs)
def __init__(self, name=None):
super(CompositeMetric, self).__init__(name)
self._metrics = []
def add_metric(self, metric):
"""
add one metric instance to CompositeMetric.
Args:
metric: a instance of MetricBase.
"""
if not isinstance(metric, MetricBase):
raise ValueError("SubMetric should be inherit from MetricBase.")
self._metrics.append(metric)
def update(self, preds, labels):
"""
Update every metrics in sequence.
Args:
preds(numpy.array): the predictions of current minibatch
labels(numpy.array): the labels of current minibatch, if the label is one-hot
or soft-label, should custom the corresponding update rule.
"""
for m in self._metrics:
ans.append(m.update(preds, labels))
def eval(self):
"""
Evaluate every metrics in sequence.
Returns:
list(float|numpy.array): a list of metrics value in Python.
"""
ans = []
for m in self._metrics:
ans.append(m.eval())
return ans
class Precision(MetricBase):
"""
Precision (also called positive predictive value) is the fraction of
relevant instances among the retrieved instances.
https://en.wikipedia.org/wiki/Evaluation_of_binary_classifiers
Note Precision is different with Accuracy in binary classifiers.
accuracy = true positive / total instances
precision = true positive / all positive instance
Examples:
.. code-block:: python
metric = fluid.metrics.Precision()
for pass in range(PASSES):
metric.reset()
for data in train_reader():
loss, preds, labels = exe.run(fetch_list=[cost, preds, labels])
metric.update(preds=preds, labels=labels)
numpy_precision = metric.eval()
"""
def __init__(self, name=None):
super(Precision, self).__init__(name)
self.tp = 0 # true positive
self.fp = 0 # false positive
def update(self, preds, labels):
if not _is_numpy_(preds):
raise ValueError("The 'preds' must be a numpy ndarray.")
if not _is_numpy_(labels):
raise ValueError("The 'labels' must be a numpy ndarray.")
sample_num = labels[0]
for i in range(sample_num):
pred = preds[i].astype("int32")
label = labels[i]
if label == 1:
if pred == label:
self.tp += 1
else:
self.fp += 1
def eval(self):
ap = self.tp + self.fp
return float(self.tp) / ap if ap != 0 else .0
class Recall(MetricBase):
"""
Recall (also known as sensitivity) is the fraction of
relevant instances that have been retrieved over the
total amount of relevant instances
https://en.wikipedia.org/wiki/Precision_and_recall
Examples:
.. code-block:: python
metric = fluid.metrics.Recall()
for pass in range(PASSES):
metric.reset()
for data in train_reader():
loss, preds, labels = exe.run(fetch_list=[cost, preds, labels])
metric.update(preds=preds, labels=labels)
numpy_recall = metric.eval()
"""
def __init__(self, name=None):
super(Recall, self).__init__(name)
self.tp = 0 # true positive
self.fn = 0 # false negtive
def update(self, preds, labels):
if not _is_numpy_(preds):
raise ValueError("The 'preds' must be a numpy ndarray.")
if not _is_numpy_(labels):
raise ValueError("The 'labels' must be a numpy ndarray.")
sample_num = labels[0]
for i in range(sample_num):
pred = preds[i].astype("int32")
label = labels[i]
if label == 1:
if pred == label:
self.tp += 1
else:
if pred != label:
self.fn += 1
def eval(self):
recall = self.tp + self.fn
return float(self.tp) / recall if recall != 0 else .0
class Accuracy(MetricBase):
"""
Accumulate the accuracy from minibatches and compute the average accuracy
for every pass.
https://en.wikipedia.org/wiki/Accuracy_and_precision
Args:
name: the metrics name
Example:
minibatch_accuracy = fluid.layers.accuracy(pred, label)
accuracy_evaluator = fluid.metrics.Accuracy()
for epoch in PASS_NUM:
accuracy_evaluator.reset()
for data in batches:
loss = exe.run(fetch_list=[cost, minibatch_accuracy])
accuracy_evaluator.update(value=minibatch_accuracy, weight=batches)
accuracy = accuracy_evaluator.eval()
Examples:
.. code-block:: python
labels = fluid.layers.data(name="data", shape=[1], dtype="int32")
data = fluid.layers.data(name="data", shape=[32, 32], dtype="int32")
pred = fluid.layers.fc(input=data, size=1000, act="tanh")
minibatch_accuracy = fluid.layers.accuracy(pred, label)
accuracy_evaluator = fluid.metrics.Accuracy()
for pass in range(PASSES):
accuracy_evaluator.reset()
for data in train_reader():
batch_size = data[0]
loss = exe.run(fetch_list=[cost, minibatch_accuracy])
accuracy_evaluator.update(value=minibatch_accuracy, weight=batch_size)
numpy_acc = accuracy_evaluator.eval()
"""
def __init__(self, name=None):
......@@ -153,6 +326,13 @@ class Accuracy(MetricBase):
self.weight = .0
def update(self, value, weight):
"""
Update minibatch states.
Args:
value(float|numpy.array): accuracy of one minibatch.
weight(int|float): batch size.
"""
if not _is_number_or_matrix_(value):
raise ValueError(
"The 'value' must be a number(int, float) or a numpy ndarray.")
......@@ -163,9 +343,8 @@ class Accuracy(MetricBase):
def eval(self):
if self.weight == 0:
raise ValueError(
"There is no data in Accuracy Metrics. Please check layers.accuracy output has added to Accuracy."
)
raise ValueError("There is no data in Accuracy Metrics. \
Please check layers.accuracy output has added to Accuracy.")
return self.value / self.weight
......@@ -174,6 +353,25 @@ class ChunkEvaluator(MetricBase):
Accumulate counter numbers output by chunk_eval from mini-batches and
compute the precision recall and F1-score using the accumulated counter
numbers.
For some basics of chunking, please refer to
'Chunking with Support Vector Machines <https://aclanthology.info/pdf/N/N01/N01-1025.pdf>'.
ChunkEvalEvaluator computes the precision, recall, and F1-score of chunk detection,
and supports IOB, IOE, IOBES and IO (also known as plain) tagging schemes.
Examples:
.. code-block:: python
labels = fluid.layers.data(name="data", shape=[1], dtype="int32")
data = fluid.layers.data(name="data", shape=[32, 32], dtype="int32")
pred = fluid.layers.fc(input=data, size=1000, act="tanh")
precision, recall, f1_score, num_infer_chunks, num_label_chunks, num_correct_chunks = layers.chunk_eval(
input=pred,
label=label)
metric = fluid.metrics.ChunkEvaluator()
for data in train_reader():
loss, preds, labels = exe.run(fetch_list=[cost, preds, labels])
metric.update(num_infer_chunks, num_label_chunks, num_correct_chunks)
numpy_precision, numpy_recall, numpy_f1 = metric.eval()
"""
def __init__(self, name=None):
......@@ -183,9 +381,17 @@ class ChunkEvaluator(MetricBase):
self.num_correct_chunks = 0
def update(self, num_infer_chunks, num_label_chunks, num_correct_chunks):
"""
Update the states based on the layers.chunk_eval() ouputs.
Args:
num_infer_chunks(int|numpy.array): The number of chunks in Inference on the given minibatch.
num_label_chunks(int|numpy.array): The number of chunks in Label on the given mini-batch.
num_correct_chunks(int|float|numpy.array): The number of chunks both in Inference and Label on the
given mini-batch.
"""
if not _is_number_or_matrix_(num_infer_chunks):
raise ValueError(
"The 'num_infer_chunks' must be a number(int, float) or a numpy ndarray."
"The 'num_infer_chunks' must be a number(int) or a numpy ndarray."
)
if not _is_number_or_matrix_(num_label_chunks):
raise ValueError(
......@@ -212,21 +418,28 @@ class ChunkEvaluator(MetricBase):
class EditDistance(MetricBase):
"""
Edit distance is a way of quantifying how dissimilar two strings
(e.g., words) are to one another by counting the minimum number
of operations required to transform one string into the other.
Refer to https://en.wikipedia.org/wiki/Edit_distance
Accumulate edit distance sum and sequence number from mini-batches and
compute the average edit_distance and instance error of all batches.
Args:
name: the metrics name
Example:
edit_distance_metrics = fluid.layers.edit_distance(input, label)
distance_evaluator = fluid.metrics.EditDistance()
for epoch in PASS_NUM:
distance_evaluator.reset()
for data in batches:
loss = exe.run(fetch_list=[cost] + list(edit_distance_metrics))
distance_evaluator.update(*edit_distance_metrics)
distance, instance_error = distance_evaluator.eval()
Examples:
.. code-block:: python
distances, seq_num = fluid.layers.edit_distance(input, label)
distance_evaluator = fluid.metrics.EditDistance()
for epoch in PASS_NUM:
distance_evaluator.reset()
for data in batches:
loss = exe.run(fetch_list=[cost] + list(edit_distance_metrics))
distance_evaluator.update(distances, seq_num)
distance, instance_error = distance_evaluator.eval()
In the above example:
'distance' is the average of the edit distance in a pass.
......@@ -264,16 +477,38 @@ class EditDistance(MetricBase):
class DetectionMAP(MetricBase):
"""
Calculate the detection mean average precision (mAP).
TODO (Dang Qingqing): update the following doc.
The general steps are as follows:
1. calculate the true positive and false positive according to the input
of detection and labels.
2. calculate mAP value, support two versions: '11 point' and 'integral'.
mAP is the metric to measure the accuracy of object detectors
like Faster R-CNN, SSD, etc.
It is the average of the maximum precisions at different recall values.
Please get more information from the following articles:
https://sanchom.wordpress.com/tag/average-precision/
https://arxiv.org/abs/1512.02325
The general steps are as follows:
1. calculate the true positive and false positive according to the input
of detection and labels.
2. calculate mAP value, support two versions: '11 point' and 'integral'.
Examples:
.. code-block:: python
pred = fluid.layers.fc(input=data, size=1000, act="tanh")
batch_map = layers.detection_map(
input,
label,
class_num,
background_label,
overlap_threshold=overlap_threshold,
evaluate_difficult=evaluate_difficult,
ap_version=ap_version)
metric = fluid.metrics.DetectionMAP()
for data in train_reader():
loss, preds, labels = exe.run(fetch_list=[cost, batch_map])
batch_size = data[0]
metric.update(value=batch_map, weight=batch_size)
numpy_map = metric.eval()
"""
def __init__(self, name=None):
......@@ -302,17 +537,18 @@ class DetectionMAP(MetricBase):
class Auc(MetricBase):
"""
Auc Metrics which adapts to binary classification.
Need to note that auc metrics compute the value via Python natively.
Auc metric adapts to the binary classification.
Refer to https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve
Need to note that auc metric compute the value via Python natively.
If you concern the speed, please use the fluid.layers.auc instead.
The `auc` function creates four local variables, `true_positives`,
`true_negatives`, `false_positives` and `false_negatives` that are used to
compute the AUC. To discretize the AUC curve, a linearly spaced set of
thresholds is used to compute pairs of recall and precision values. The area
under the ROC-curve is therefore computed using the height of the recall
values by the false positive rate, while the area under the PR-curve is the
computed using the height of the precision values by the recall.
`true_negatives`, `false_positives` and `false_negatives` that are used to
compute the AUC. To discretize the AUC curve, a linearly spaced set of
thresholds is used to compute pairs of recall and precision values. The area
under the ROC-curve is therefore computed using the height of the recall
values by the false positive rate, while the area under the PR-curve is the
computed using the height of the precision values by the recall.
Args:
name: metric name
......@@ -322,6 +558,16 @@ class Auc(MetricBase):
curve.
"NOTE: only implement the ROC curve type via Python now."
Examples:
.. code-block:: python
pred = fluid.layers.fc(input=data, size=1000, act="tanh")
metric = fluid.metrics.Auc()
for data in train_reader():
loss, preds, labels = exe.run(fetch_list=[cost, preds, labels])
metric.update(preds, labels)
numpy_auc = metric.eval()
"""
def __init__(self, name, curve='ROC', num_thresholds=200):
......@@ -334,10 +580,10 @@ class Auc(MetricBase):
self.tn_list = np.zeros((num_thresholds, ))
self.fp_list = np.zeros((num_thresholds, ))
def update(self, labels, predictions, axis=1):
def update(self, preds, labels):
if not _is_numpy_(labels):
raise ValueError("The 'labels' must be a numpy ndarray.")
if not _is_numpy_(predictions):
if not _is_numpy_(preds):
raise ValueError("The 'predictions' must be a numpy ndarray.")
kepsilon = 1e-7 # to account for floating point imprecisions
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册