metric.py 11.8 KB
Newer Older
F
fengjiayi 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
All layers just related to metric.
"""

18 19 20
from paddle import _legacy_C_ops
from paddle.fluid.data_feeder import check_variable_and_dtype
from paddle.fluid.framework import Variable, _non_static_mode, _varbase_creator
21
from paddle.fluid.initializer import Constant
22
from paddle.fluid.layer_helper import LayerHelper
23
from paddle.fluid.layers import tensor
F
fengjiayi 已提交
24

D
dzhwinter 已提交
25
__all__ = ['accuracy', 'auc']
F
fengjiayi 已提交
26 27 28 29


def accuracy(input, label, k=1, correct=None, total=None):
    """
U
ustiniankw 已提交
30

D
dzhwinter 已提交
31 32
    accuracy layer.
    Refer to the https://en.wikipedia.org/wiki/Precision_and_recall
F
fengjiayi 已提交
33
    This function computes the accuracy using the input and label.
D
dzhwinter 已提交
34
    If the correct label occurs in top k predictions, then correct will increment by one.
U
ustiniankw 已提交
35 36 37 38

    Note:
        the dtype of accuracy is determined by input. the input and label dtype can be different.

D
dzhwinter 已提交
39
    Args:
40
        input(Tensor): The input of accuracy layer, which is the predictions of network. A Tensor with type float32,float64.
41
            The shape is ``[sample_number, class_dim]`` .
42
        label(Tensor): The label of dataset.  Tensor with type int32,int64. The shape is ``[sample_number, 1]`` .
U
ustiniankw 已提交
43 44 45 46
        k(int, optional): The top k predictions for each class will be checked. Data type is int64 or int32. Default is 1.
        correct(Tensor, optional): The correct predictions count. A Tensor with type int64 or int32. Default is None.
        total(Tensor, optional): The total entries count. A tensor with type int64 or int32. Default is None.

D
dzhwinter 已提交
47
    Returns:
U
ustiniankw 已提交
48 49
        Tensor, The correct rate. A Tensor with type float32.

D
dzhwinter 已提交
50 51
    Examples:
        .. code-block:: python
U
ustiniankw 已提交
52

53
            import numpy as np
J
Jiaqi Liu 已提交
54 55 56 57 58 59 60 61 62 63 64 65
            import paddle
            import paddle.static as static
            import paddle.nn.functional as F
            paddle.enable_static()
            data = static.data(name="input", shape=[-1, 32, 32], dtype="float32")
            label = static.data(name="label", shape=[-1,1], dtype="int")
            fc_out = static.nn.fc(x=data, size=10)
            predict = F.softmax(x=fc_out)
            result = static.accuracy(input=predict, label=label, k=5)
            place = paddle.CPUPlace()
            exe = static.Executor(place)
            exe.run(static.default_startup_program())
66 67 68
            x = np.random.rand(3, 32, 32).astype("float32")
            y = np.array([[1],[0],[1]])
            output= exe.run(feed={"input": x,"label": y},
J
Jiaqi Liu 已提交
69
                        fetch_list=[result[0]])
70
            print(output)
J
Jiaqi Liu 已提交
71
            #[array([0.], dtype=float32)]
U
ustiniankw 已提交
72

F
fengjiayi 已提交
73
    """
J
Jiabin Yang 已提交
74
    if _non_static_mode():
75
        if correct is None:
76
            correct = _varbase_creator(dtype="int32")
77
        if total is None:
78 79
            total = _varbase_creator(dtype="int32")

80
        _k = k.numpy().item(0) if isinstance(k, Variable) else k
81 82 83 84 85 86
        topk_out, topk_indices = _legacy_C_ops.top_k_v2(
            input, 'k', _k, 'sorted', False
        )
        _acc, _, _ = _legacy_C_ops.accuracy(
            topk_out, topk_indices, label, correct, total
        )
87
        return _acc
88

F
fengjiayi 已提交
89
    helper = LayerHelper("accuracy", **locals())
90 91 92
    check_variable_and_dtype(
        input, 'input', ['float16', 'float32', 'float64'], 'accuracy'
    )
93 94 95 96 97 98 99 100
    topk_out = helper.create_variable_for_type_inference(dtype=input.dtype)
    topk_indices = helper.create_variable_for_type_inference(dtype="int64")
    inputs = {"X": [input]}
    if isinstance(k, Variable):
        inputs['K'] = [k]
    else:
        attrs = {'k': k}
    attrs['sorted'] = False
101 102 103 104 105 106
    helper.append_op(
        type="top_k_v2",
        inputs=inputs,
        attrs=attrs,
        outputs={"Out": [topk_out], "Indices": [topk_indices]},
    )
X
Xin Pan 已提交
107
    acc_out = helper.create_variable_for_type_inference(dtype="float32")
F
fengjiayi 已提交
108
    if correct is None:
109
        correct = helper.create_variable_for_type_inference(dtype="int32")
F
fengjiayi 已提交
110
    if total is None:
111
        total = helper.create_variable_for_type_inference(dtype="int32")
112 113 114 115 116 117 118 119 120
    helper.append_op(
        type="accuracy",
        inputs={"Out": [topk_out], "Indices": [topk_indices], "Label": [label]},
        outputs={
            "Accuracy": [acc_out],
            "Correct": [correct],
            "Total": [total],
        },
    )
F
fengjiayi 已提交
121
    return acc_out
D
dzhwinter 已提交
122 123


124 125 126 127 128 129 130 131 132
def auc(
    input,
    label,
    curve='ROC',
    num_thresholds=2**12 - 1,
    topk=1,
    slide_steps=1,
    ins_tag_weight=None,
):
133
    """
Y
Yibing Liu 已提交
134
    **Area Under the Curve (AUC) Layer**
Y
Yibing Liu 已提交
135 136

    This implementation computes the AUC according to forward output and label.
137
    It is used very widely in binary classification evaluation.
Y
Yibing Liu 已提交
138

139
    Note: If input label contains values other than 0 and 1, it will be cast
Y
Yibing Liu 已提交
140 141
    to `bool`. Find the relevant definitions `here <https://en.wikipedia.org\
    /wiki/Receiver_operating_characteristic#Area_under_the_curve>`_.
Y
Yibing Liu 已提交
142 143

    There are two types of possible curves:
Y
Yibing Liu 已提交
144 145 146

        1. ROC: Receiver operating characteristic;
        2. PR: Precision Recall
Y
Yibing Liu 已提交
147 148

    Args:
149
        input(Tensor): A floating-point 2D Tensor, values are in the range
150 151
                         [0, 1]. Each row is sorted in descending order. This
                         input should be the output of topk. Typically, this
152 153 154
                         Tensor indicates the probability of each label.
                         A Tensor with type float32,float64.
        label(Tensor): A 2D int Tensor indicating the label of the training
Y
Yibing Liu 已提交
155
                         data. The height is batch size and width is always 1.
156
                         A Tensor with type int32,int64.
Y
Yibing Liu 已提交
157
        curve(str): Curve type, can be 'ROC' or 'PR'. Default 'ROC'.
158
        num_thresholds(int): The number of thresholds to use when discretizing
159
                             the roc curve. Default 4095.
W
Wu Yi 已提交
160
        topk(int): only topk number of prediction output will be used for auc.
T
tangwei12 已提交
161
        slide_steps: when calc batch auc, we can not only use step currently but the previous steps can be used. slide_steps=1 means use the current step, slide_steps=3 means use current step and the previous second steps, slide_steps=0 use all of the steps.
162
        ins_tag_weight(Tensor): A 2D int Tensor indicating the data's tag weight, 1 means real data, 0 means fake data. Default None, and it will be assigned to a tensor of value 1.
163
                         A Tensor with type float32,float64.
Y
Yibing Liu 已提交
164 165

    Returns:
166
        Tensor: A tuple representing the current AUC.
167 168
        The return tuple is auc_out, batch_auc_out, [
        batch_stat_pos, batch_stat_neg, stat_pos, stat_neg ]
169
        Data type is Tensor, supporting float32, float64.
Y
Yibing Liu 已提交
170

171
    Examples:
Y
Yibing Liu 已提交
172
        .. code-block:: python
173

174
            import paddle
175
            import numpy as np
176
            paddle.enable_static()
177

178 179
            data = paddle.static.data(name="input", shape=[-1, 32,32], dtype="float32")
            label = paddle.static.data(name="label", shape=[-1], dtype="int")
180 181
            fc_out = paddle.static.nn.fc(x=data, size=2)
            predict = paddle.nn.functional.softmax(x=fc_out)
182 183 184 185 186 187 188 189 190 191 192
            result=paddle.static.auc(input=predict, label=label)

            place = paddle.CPUPlace()
            exe = paddle.static.Executor(place)

            exe.run(paddle.static.default_startup_program())
            x = np.random.rand(3,32,32).astype("float32")
            y = np.array([1,0,1])
            output= exe.run(feed={"input": x,"label": y},
                             fetch_list=[result[0]])
            print(output)
193

194 195
            #you can learn the usage of ins_tag_weight by the following code.
            '''
196 197
            import paddle
            import numpy as np
J
Jiaqi Liu 已提交
198
            paddle.enable_static()
199 200 201

            data = paddle.static.data(name="input", shape=[-1, 32,32], dtype="float32")
            label = paddle.static.data(name="label", shape=[-1], dtype="int")
202 203 204
            ins_tag_weight = paddle.static.data(name='ins_tag', shape=[-1,16], lod_level=0, dtype='float64')
            fc_out = paddle.static.nn.fc(x=data, size=2)
            predict = paddle.nn.functional.softmax(x=fc_out)
205
            result=paddle.static.auc(input=predict, label=label, ins_tag_weight=ins_tag_weight)
206

J
Jiaqi Liu 已提交
207
            place = paddle.CPUPlace()
208
            exe = paddle.static.Executor(place)
209

210
            exe.run(paddle.static.default_startup_program())
211 212
            x = np.random.rand(3,32,32).astype("float32")
            y = np.array([1,0,1])
213 214
            z = np.array([1,0,1])
            output= exe.run(feed={"input": x,"label": y, "ins_tag_weight":z},
215
                             fetch_list=[result[0]])
216
            print(output)
217 218
            '''

Y
Yibing Liu 已提交
219
    """
D
dzhwinter 已提交
220
    helper = LayerHelper("auc", **locals())
221 222

    if ins_tag_weight is None:
223 224 225
        ins_tag_weight = tensor.fill_constant(
            shape=[1, 1], dtype="float32", value=1.0
        )
226 227
    check_variable_and_dtype(input, 'input', ['float32', 'float64'], 'auc')
    check_variable_and_dtype(label, 'label', ['int32', 'int64'], 'auc')
228 229 230
    check_variable_and_dtype(
        ins_tag_weight, 'ins_tag_weight', ['float32', 'float64'], 'auc'
    )
X
Xin Pan 已提交
231 232
    auc_out = helper.create_variable_for_type_inference(dtype="float64")
    batch_auc_out = helper.create_variable_for_type_inference(dtype="float64")
W
Wu Yi 已提交
233
    # make tp, tn, fp, fn persistable, so that can accumulate all batches.
T
tangwei12 已提交
234 235

    # for batch auc
236 237
    # we create slide_step+1 buckets, the first slide_steps buckets store
    # historical batch-level values, and the last bucket stores the sum values of
238 239 240
    # previous slide_step buckets.
    # The index of bucket that the newest batch will use is determined by batch_id mod slide_steps,
    # and batch_id is store in the last posision of following variable
T
tangwei12 已提交
241 242 243
    batch_stat_pos = helper.create_global_variable(
        persistable=True,
        dtype='int64',
244 245
        shape=[(1 + slide_steps) * (num_thresholds + 1) + 1],
    )
T
tangwei12 已提交
246 247 248
    batch_stat_neg = helper.create_global_variable(
        persistable=True,
        dtype='int64',
249 250
        shape=[(1 + slide_steps) * (num_thresholds + 1) + 1],
    )
T
tangwei12 已提交
251 252

    # for global auc
253
    # Needn't maintain the batch id
254 255 256 257 258 259
    stat_pos = helper.create_global_variable(
        persistable=True, dtype='int64', shape=[1, num_thresholds + 1]
    )
    stat_neg = helper.create_global_variable(
        persistable=True, dtype='int64', shape=[1, num_thresholds + 1]
    )
T
tangwei12 已提交
260

T
tangwei12 已提交
261
    for var in [batch_stat_pos, batch_stat_neg, stat_pos, stat_neg]:
262 263 264
        helper.set_variable_initializer(
            var, Constant(value=0.0, force_cpu=False)
        )
W
Wu Yi 已提交
265

266
    # "InsTagWeight": [ins_tag_weight]
T
tangwei12 已提交
267
    # Batch AUC
268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
    helper.append_op(
        type="auc",
        inputs={
            "Predict": [input],
            "Label": [label],
            "StatPos": [batch_stat_pos],
            "StatNeg": [batch_stat_neg],
        },
        attrs={
            "curve": curve,
            "num_thresholds": num_thresholds,
            "slide_steps": slide_steps,
        },
        outputs={
            "AUC": [batch_auc_out],
            "StatPosOut": [batch_stat_pos],
            "StatNegOut": [batch_stat_neg],
        },
    )
T
tangwei12 已提交
287
    # Global AUC
288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311
    helper.append_op(
        type="auc",
        inputs={
            "Predict": [input],
            "Label": [label],
            "StatPos": [stat_pos],
            "StatNeg": [stat_neg],
        },
        attrs={
            "curve": curve,
            "num_thresholds": num_thresholds,
            "slide_steps": 0,
        },
        outputs={
            "AUC": [auc_out],
            "StatPosOut": [stat_pos],
            "StatNegOut": [stat_neg],
        },
    )
    return (
        auc_out,
        batch_auc_out,
        [batch_stat_pos, batch_stat_neg, stat_pos, stat_neg],
    )