metric_op.py 9.0 KB
Newer Older
F
fengjiayi 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
All layers just related to metric.
"""

18 19
from __future__ import print_function

D
dzhwinter 已提交
20
import warnings
F
fengjiayi 已提交
21 22 23 24
from ..layer_helper import LayerHelper
from ..initializer import Normal, Constant
from ..framework import Variable
from ..param_attr import ParamAttr
25
from . import nn
26
from ..data_feeder import convert_dtype
F
fengjiayi 已提交
27

D
dzhwinter 已提交
28
__all__ = ['accuracy', 'auc']
F
fengjiayi 已提交
29 30 31 32


def accuracy(input, label, k=1, correct=None, total=None):
    """
D
dzhwinter 已提交
33 34 35
    accuracy layer.
    Refer to the https://en.wikipedia.org/wiki/Precision_and_recall

F
fengjiayi 已提交
36
    This function computes the accuracy using the input and label.
D
dzhwinter 已提交
37 38 39 40
    If the correct label occurs in top k predictions, then correct will increment by one.
    Note: the dtype of accuracy is determined by input. the input and label dtype can be different.

    Args:
41 42 43 44 45
        input(Variable): The input of accuracy layer, which is the predictions of network. A LoDTensor or Tensor with type float32,float64.
        label(Variable): The label of dataset.  LoDTensor or Tensor with type int32,int64.
        k(int): The top k predictions for each class will be checked. Data type is int64 or int32.
        correct(Variable): The correct predictions count. A Tensor with type int64 or int32.
        total(Variable): The total entries count. A tensor with type int64 or int32.
D
dzhwinter 已提交
46 47

    Returns:
48
        Variable: The correct rate. A Tensor with type float32.
D
dzhwinter 已提交
49 50 51 52

    Examples:
        .. code-block:: python

53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
            import paddle.fluid as fluid
            import numpy as np

            data = fluid.data(name="input", shape=[-1, 32, 32], dtype="float32")
            label = fluid.data(name="label", shape=[-1,1], dtype="int")
            fc_out = fluid.layers.fc(input=data, size=10)
            predict = fluid.layers.softmax(input=fc_out)
            result = fluid.layers.accuracy(input=predict, label=label, k=5)

            place = fluid.CPUPlace()
            exe = fluid.Executor(place)

            exe.run(fluid.default_startup_program())
            x = np.random.rand(3, 32, 32).astype("float32")
            y = np.array([[1],[0],[1]])
            output= exe.run(feed={"input": x,"label": y},
                             fetch_list=[result[0]])
            print(output)
D
dzhwinter 已提交
71

72
            #[array([0.6666667], dtype=float32)]
F
fengjiayi 已提交
73 74
    """
    helper = LayerHelper("accuracy", **locals())
75 76 77 78 79 80 81 82 83 84 85 86
    if not isinstance(input, Variable):
        raise TypeError(
            "The type of 'input' in accuracy must be Variable, but received %s"
            % (type(input)))
    if convert_dtype(input.dtype) in ['float16']:
        warnings.warn(
            "The data type of 'input' in accuracy only support float16 in GPU now."
        )
    if convert_dtype(input.dtype) not in ['float16', 'float32', 'float64']:
        raise TypeError(
            "The data type of 'input' in accuracy must be float16 or float32 or float64, but received %s."
            % (convert_dtype(input.dtype)))
Q
qingqing01 已提交
87
    topk_out, topk_indices = nn.topk(input, k=k)
X
Xin Pan 已提交
88
    acc_out = helper.create_variable_for_type_inference(dtype="float32")
F
fengjiayi 已提交
89
    if correct is None:
X
Xin Pan 已提交
90
        correct = helper.create_variable_for_type_inference(dtype="int64")
F
fengjiayi 已提交
91
    if total is None:
X
Xin Pan 已提交
92
        total = helper.create_variable_for_type_inference(dtype="int64")
F
fengjiayi 已提交
93 94 95 96 97 98 99 100 101 102 103 104 105
    helper.append_op(
        type="accuracy",
        inputs={
            "Out": [topk_out],
            "Indices": [topk_indices],
            "Label": [label]
        },
        outputs={
            "Accuracy": [acc_out],
            "Correct": [correct],
            "Total": [total],
        })
    return acc_out
D
dzhwinter 已提交
106 107


T
tangwei12 已提交
108 109 110 111 112 113
def auc(input,
        label,
        curve='ROC',
        num_thresholds=2**12 - 1,
        topk=1,
        slide_steps=1):
Y
Yibing Liu 已提交
114
    """
Y
Yibing Liu 已提交
115
    **Area Under the Curve (AUC) Layer**
Y
Yibing Liu 已提交
116 117

    This implementation computes the AUC according to forward output and label.
118
    It is used very widely in binary classification evaluation.
Y
Yibing Liu 已提交
119

120
    Note: If input label contains values other than 0 and 1, it will be cast
Y
Yibing Liu 已提交
121 122
    to `bool`. Find the relevant definitions `here <https://en.wikipedia.org\
    /wiki/Receiver_operating_characteristic#Area_under_the_curve>`_.
Y
Yibing Liu 已提交
123 124

    There are two types of possible curves:
Y
Yibing Liu 已提交
125 126 127

        1. ROC: Receiver operating characteristic;
        2. PR: Precision Recall
Y
Yibing Liu 已提交
128 129

    Args:
130 131 132
        input(Variable): A floating-point 2D Variable, values are in the range
                         [0, 1]. Each row is sorted in descending order. This
                         input should be the output of topk. Typically, this
Y
Yibing Liu 已提交
133
                         Variable indicates the probability of each label.
134
                         A LoDTensor or Tensor with type float32,float64.
135
        label(Variable): A 2D int Variable indicating the label of the training
Y
Yibing Liu 已提交
136
                         data. The height is batch size and width is always 1.
137
                         A LoDTensor or Tensor with type int32,int64.
Y
Yibing Liu 已提交
138
        curve(str): Curve type, can be 'ROC' or 'PR'. Default 'ROC'.
139
        num_thresholds(int): The number of thresholds to use when discretizing
Y
Yibing Liu 已提交
140
                             the roc curve. Default 200.
W
Wu Yi 已提交
141
        topk(int): only topk number of prediction output will be used for auc.
T
tangwei12 已提交
142 143
        slide_steps: when calc batch auc, we can not only use step currently but the previous steps can be used. slide_steps=1 means use the current step, slide_steps=3 means use current step and the previous second steps, slide_steps=0 use all of the steps.

Y
Yibing Liu 已提交
144 145

    Returns:
146 147 148
        Variable: A tuple representing the current AUC.
        The return tuple is auc_out, batch_auc_out, [
        batch_stat_pos, batch_stat_neg, stat_pos, stat_neg ]
149
        Data type is Tensor, supporting float32, float64.
Y
Yibing Liu 已提交
150 151 152

    Examples:
        .. code-block:: python
153

J
JesseyXujin 已提交
154
            import paddle.fluid as fluid
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
            import numpy as np

            data = fluid.data(name="input", shape=[-1, 32,32], dtype="float32")
            label = fluid.data(name="label", shape=[-1], dtype="int")
            fc_out = fluid.layers.fc(input=data, size=2)
            predict = fluid.layers.softmax(input=fc_out)
            result=fluid.layers.auc(input=predict, label=label)

            place = fluid.CPUPlace()
            exe = fluid.Executor(place)

            exe.run(fluid.default_startup_program())
            x = np.random.rand(3,32,32).astype("float32")
            y = np.array([1,0,1])
            output= exe.run(feed={"input": x,"label": y},
                             fetch_list=[result[0]])
            print(output)
            #[array([0.5])]
Y
Yibing Liu 已提交
173
    """
D
dzhwinter 已提交
174
    helper = LayerHelper("auc", **locals())
X
Xin Pan 已提交
175 176
    auc_out = helper.create_variable_for_type_inference(dtype="float64")
    batch_auc_out = helper.create_variable_for_type_inference(dtype="float64")
W
Wu Yi 已提交
177
    # make tp, tn, fp, fn persistable, so that can accumulate all batches.
T
tangwei12 已提交
178 179 180 181 182 183 184 185 186 187 188 189

    # for batch auc
    batch_stat_pos = helper.create_global_variable(
        persistable=True,
        dtype='int64',
        shape=[slide_steps, num_thresholds + 1])
    batch_stat_neg = helper.create_global_variable(
        persistable=True,
        dtype='int64',
        shape=[slide_steps, num_thresholds + 1])

    # for global auc
T
tangwei12 已提交
190
    stat_pos = helper.create_global_variable(
T
tangwei12 已提交
191
        persistable=True, dtype='int64', shape=[1, num_thresholds + 1])
T
tangwei12 已提交
192
    stat_neg = helper.create_global_variable(
T
tangwei12 已提交
193
        persistable=True, dtype='int64', shape=[1, num_thresholds + 1])
T
tangwei12 已提交
194

T
tangwei12 已提交
195
    for var in [batch_stat_pos, batch_stat_neg, stat_pos, stat_neg]:
W
Wu Yi 已提交
196 197 198 199
        helper.set_variable_initializer(
            var, Constant(
                value=0.0, force_cpu=True))

T
tangwei12 已提交
200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
    # Batch AUC
    helper.append_op(
        type="auc",
        inputs={
            "Predict": [input],
            "Label": [label],
            "StatPos": [batch_stat_pos],
            "StatNeg": [batch_stat_neg]
        },
        attrs={
            "curve": curve,
            "num_thresholds": num_thresholds,
            "slide_steps": slide_steps
        },
        outputs={
            "AUC": [batch_auc_out],
            "StatPosOut": [batch_stat_pos],
            "StatNegOut": [batch_stat_neg]
        })
    # Global AUC
D
dzhwinter 已提交
220
    helper.append_op(
221
        type="auc",
D
dzhwinter 已提交
222
        inputs={
Q
Qiao Longfei 已提交
223
            "Predict": [input],
W
Wu Yi 已提交
224
            "Label": [label],
T
tangwei12 已提交
225 226
            "StatPos": [stat_pos],
            "StatNeg": [stat_neg]
D
dzhwinter 已提交
227
        },
T
tangwei12 已提交
228 229 230 231 232
        attrs={
            "curve": curve,
            "num_thresholds": num_thresholds,
            "slide_steps": 0
        },
W
Wu Yi 已提交
233 234
        outputs={
            "AUC": [auc_out],
T
tangwei12 已提交
235 236
            "StatPosOut": [stat_pos],
            "StatNegOut": [stat_neg]
W
Wu Yi 已提交
237
        })
T
tangwei12 已提交
238 239 240
    return auc_out, batch_auc_out, [
        batch_stat_pos, batch_stat_neg, stat_pos, stat_neg
    ]