metric_op.py 11.2 KB
Newer Older
F
fengjiayi 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
All layers just related to metric.
"""

18 19
from __future__ import print_function

D
dzhwinter 已提交
20
import warnings
F
fengjiayi 已提交
21 22
from ..layer_helper import LayerHelper
from ..initializer import Normal, Constant
H
hong 已提交
23
from ..framework import Variable, _non_static_mode, _varbase_creator, _in_legacy_dygraph, in_dygraph_mode
24
from .. import core
F
fengjiayi 已提交
25
from ..param_attr import ParamAttr
26
from . import nn
27
from ..data_feeder import check_variable_and_dtype
W
wanghuancoder 已提交
28
from paddle import _C_ops
F
fengjiayi 已提交
29

D
dzhwinter 已提交
30
__all__ = ['accuracy', 'auc']
F
fengjiayi 已提交
31 32 33 34


def accuracy(input, label, k=1, correct=None, total=None):
    """
D
dzhwinter 已提交
35 36 37
    accuracy layer.
    Refer to the https://en.wikipedia.org/wiki/Precision_and_recall

F
fengjiayi 已提交
38
    This function computes the accuracy using the input and label.
D
dzhwinter 已提交
39 40 41 42
    If the correct label occurs in top k predictions, then correct will increment by one.
    Note: the dtype of accuracy is determined by input. the input and label dtype can be different.

    Args:
43
        input(Variable): The input of accuracy layer, which is the predictions of network. A LoDTensor or Tensor with type float32,float64.
44 45
            The shape is ``[sample_number, class_dim]`` .
        label(Variable): The label of dataset.  LoDTensor or Tensor with type int32,int64. The shape is ``[sample_number, 1]`` .
46 47 48
        k(int): The top k predictions for each class will be checked. Data type is int64 or int32.
        correct(Variable): The correct predictions count. A Tensor with type int64 or int32.
        total(Variable): The total entries count. A tensor with type int64 or int32.
D
dzhwinter 已提交
49 50

    Returns:
51
        Variable: The correct rate. A Tensor with type float32.
D
dzhwinter 已提交
52 53 54

    Examples:
        .. code-block:: python
55

56 57
            import numpy as np

J
Jiaqi Liu 已提交
58 59 60 61 62 63 64 65 66 67
            import paddle
            import paddle.static as static
            import paddle.nn.functional as F

            paddle.enable_static()
            data = static.data(name="input", shape=[-1, 32, 32], dtype="float32")
            label = static.data(name="label", shape=[-1,1], dtype="int")
            fc_out = static.nn.fc(x=data, size=10)
            predict = F.softmax(x=fc_out)
            result = static.accuracy(input=predict, label=label, k=5)
68

J
Jiaqi Liu 已提交
69 70
            place = paddle.CPUPlace()
            exe = static.Executor(place)
71

J
Jiaqi Liu 已提交
72
            exe.run(static.default_startup_program())
73 74 75
            x = np.random.rand(3, 32, 32).astype("float32")
            y = np.array([[1],[0],[1]])
            output= exe.run(feed={"input": x,"label": y},
J
Jiaqi Liu 已提交
76
                        fetch_list=[result[0]])
77
            print(output)
D
dzhwinter 已提交
78

J
Jiaqi Liu 已提交
79
            #[array([0.], dtype=float32)]
F
fengjiayi 已提交
80
    """
J
Jiabin Yang 已提交
81
    if _non_static_mode():
82
        if correct is None:
83
            correct = _varbase_creator(dtype="int32")
84
        if total is None:
85 86
            total = _varbase_creator(dtype="int32")

87 88 89
        _k = k.numpy().item(0) if isinstance(k, Variable) else k
        topk_out, topk_indices = _C_ops.top_k_v2(input, 'k', _k, 'sorted',
                                                 False)
W
wanghuancoder 已提交
90 91
        _acc, _, _ = _C_ops.accuracy(topk_out, topk_indices, label, correct,
                                     total)
92
        return _acc
93

F
fengjiayi 已提交
94
    helper = LayerHelper("accuracy", **locals())
95 96
    check_variable_and_dtype(input, 'input', ['float16', 'float32', 'float64'],
                             'accuracy')
97 98 99 100 101 102 103 104
    topk_out = helper.create_variable_for_type_inference(dtype=input.dtype)
    topk_indices = helper.create_variable_for_type_inference(dtype="int64")
    inputs = {"X": [input]}
    if isinstance(k, Variable):
        inputs['K'] = [k]
    else:
        attrs = {'k': k}
    attrs['sorted'] = False
105 106 107 108 109 110 111
    helper.append_op(type="top_k_v2",
                     inputs=inputs,
                     attrs=attrs,
                     outputs={
                         "Out": [topk_out],
                         "Indices": [topk_indices]
                     })
X
Xin Pan 已提交
112
    acc_out = helper.create_variable_for_type_inference(dtype="float32")
F
fengjiayi 已提交
113
    if correct is None:
114
        correct = helper.create_variable_for_type_inference(dtype="int32")
F
fengjiayi 已提交
115
    if total is None:
116
        total = helper.create_variable_for_type_inference(dtype="int32")
117 118 119 120 121 122 123 124 125 126 127
    helper.append_op(type="accuracy",
                     inputs={
                         "Out": [topk_out],
                         "Indices": [topk_indices],
                         "Label": [label]
                     },
                     outputs={
                         "Accuracy": [acc_out],
                         "Correct": [correct],
                         "Total": [total],
                     })
F
fengjiayi 已提交
128
    return acc_out
D
dzhwinter 已提交
129 130


T
tangwei12 已提交
131 132 133 134 135 136
def auc(input,
        label,
        curve='ROC',
        num_thresholds=2**12 - 1,
        topk=1,
        slide_steps=1):
137
    r"""
Y
Yibing Liu 已提交
138
    **Area Under the Curve (AUC) Layer**
Y
Yibing Liu 已提交
139 140

    This implementation computes the AUC according to forward output and label.
141
    It is used very widely in binary classification evaluation.
Y
Yibing Liu 已提交
142

143
    Note: If input label contains values other than 0 and 1, it will be cast
Y
Yibing Liu 已提交
144 145
    to `bool`. Find the relevant definitions `here <https://en.wikipedia.org\
    /wiki/Receiver_operating_characteristic#Area_under_the_curve>`_.
Y
Yibing Liu 已提交
146 147

    There are two types of possible curves:
Y
Yibing Liu 已提交
148 149 150

        1. ROC: Receiver operating characteristic;
        2. PR: Precision Recall
Y
Yibing Liu 已提交
151 152

    Args:
153 154 155
        input(Variable): A floating-point 2D Variable, values are in the range
                         [0, 1]. Each row is sorted in descending order. This
                         input should be the output of topk. Typically, this
Y
Yibing Liu 已提交
156
                         Variable indicates the probability of each label.
157
                         A LoDTensor or Tensor with type float32,float64.
158
        label(Variable): A 2D int Variable indicating the label of the training
Y
Yibing Liu 已提交
159
                         data. The height is batch size and width is always 1.
160
                         A LoDTensor or Tensor with type int32,int64.
Y
Yibing Liu 已提交
161
        curve(str): Curve type, can be 'ROC' or 'PR'. Default 'ROC'.
162
        num_thresholds(int): The number of thresholds to use when discretizing
Y
Yibing Liu 已提交
163
                             the roc curve. Default 200.
W
Wu Yi 已提交
164
        topk(int): only topk number of prediction output will be used for auc.
T
tangwei12 已提交
165 166
        slide_steps: when calc batch auc, we can not only use step currently but the previous steps can be used. slide_steps=1 means use the current step, slide_steps=3 means use current step and the previous second steps, slide_steps=0 use all of the steps.

Y
Yibing Liu 已提交
167 168

    Returns:
169 170 171
        Variable: A tuple representing the current AUC.
        The return tuple is auc_out, batch_auc_out, [
        batch_stat_pos, batch_stat_neg, stat_pos, stat_neg ]
172
        Data type is Tensor, supporting float32, float64.
Y
Yibing Liu 已提交
173 174 175

    Examples:
        .. code-block:: python
176

177 178
            import numpy as np

J
Jiaqi Liu 已提交
179 180 181 182 183 184 185 186 187 188
            import paddle
            import paddle.static as static
            import paddle.nn.functional as F

            paddle.enable_static()
            data = static.data(name="input", shape=[-1, 32,32], dtype="float32")
            label = static.data(name="label", shape=[-1], dtype="int")
            fc_out = static.nn.fc(x=data, size=2)
            predict = F.softmax(x=fc_out)
            result = static.auc(input=predict, label=label)
189

J
Jiaqi Liu 已提交
190 191
            place = paddle.CPUPlace()
            exe = static.Executor(place)
192

J
Jiaqi Liu 已提交
193
            exe.run(static.default_startup_program())
194 195 196
            x = np.random.rand(3,32,32).astype("float32")
            y = np.array([1,0,1])
            output= exe.run(feed={"input": x,"label": y},
J
Jiaqi Liu 已提交
197
                        fetch_list=[result[0]])
198
            print(output)
J
Jiaqi Liu 已提交
199
            #[array([0.])]
Y
Yibing Liu 已提交
200
    """
D
dzhwinter 已提交
201
    helper = LayerHelper("auc", **locals())
202 203
    check_variable_and_dtype(input, 'input', ['float32', 'float64'], 'auc')
    check_variable_and_dtype(label, 'label', ['int32', 'int64'], 'auc')
X
Xin Pan 已提交
204 205
    auc_out = helper.create_variable_for_type_inference(dtype="float64")
    batch_auc_out = helper.create_variable_for_type_inference(dtype="float64")
W
Wu Yi 已提交
206
    # make tp, tn, fp, fn persistable, so that can accumulate all batches.
T
tangwei12 已提交
207 208

    # for batch auc
209 210
    # we create slide_step+1 buckets, the first slide_steps buckets store
    # historical batch-level values, and the last bucket stores the sum values of
211 212 213
    # previous slide_step buckets.
    # The index of bucket that the newest batch will use is determined by batch_id mod slide_steps,
    # and batch_id is store in the last posision of following variable
T
tangwei12 已提交
214 215 216
    batch_stat_pos = helper.create_global_variable(
        persistable=True,
        dtype='int64',
217
        shape=[(1 + slide_steps) * (num_thresholds + 1) + 1])
T
tangwei12 已提交
218 219 220
    batch_stat_neg = helper.create_global_variable(
        persistable=True,
        dtype='int64',
221
        shape=[(1 + slide_steps) * (num_thresholds + 1) + 1])
T
tangwei12 已提交
222 223

    # for global auc
224
    # Needn't maintain the batch id
225 226 227 228 229 230
    stat_pos = helper.create_global_variable(persistable=True,
                                             dtype='int64',
                                             shape=[1, num_thresholds + 1])
    stat_neg = helper.create_global_variable(persistable=True,
                                             dtype='int64',
                                             shape=[1, num_thresholds + 1])
T
tangwei12 已提交
231

T
tangwei12 已提交
232
    for var in [batch_stat_pos, batch_stat_neg, stat_pos, stat_neg]:
233 234
        helper.set_variable_initializer(var, Constant(value=0.0,
                                                      force_cpu=False))
W
Wu Yi 已提交
235

T
tangwei12 已提交
236
    # Batch AUC
237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253
    helper.append_op(type="auc",
                     inputs={
                         "Predict": [input],
                         "Label": [label],
                         "StatPos": [batch_stat_pos],
                         "StatNeg": [batch_stat_neg]
                     },
                     attrs={
                         "curve": curve,
                         "num_thresholds": num_thresholds,
                         "slide_steps": slide_steps
                     },
                     outputs={
                         "AUC": [batch_auc_out],
                         "StatPosOut": [batch_stat_pos],
                         "StatNegOut": [batch_stat_neg]
                     })
T
tangwei12 已提交
254
    # Global AUC
255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271
    helper.append_op(type="auc",
                     inputs={
                         "Predict": [input],
                         "Label": [label],
                         "StatPos": [stat_pos],
                         "StatNeg": [stat_neg]
                     },
                     attrs={
                         "curve": curve,
                         "num_thresholds": num_thresholds,
                         "slide_steps": 0
                     },
                     outputs={
                         "AUC": [auc_out],
                         "StatPosOut": [stat_pos],
                         "StatNegOut": [stat_neg]
                     })
T
tangwei12 已提交
272 273 274
    return auc_out, batch_auc_out, [
        batch_stat_pos, batch_stat_neg, stat_pos, stat_neg
    ]