Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
2b58c62a
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
2b58c62a
编写于
7月 19, 2018
作者:
Q
Qiao Longfei
提交者:
GitHub
7月 19, 2018
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Update auc op (#12199)
fix AUC op optimize it's test
上级
37713f22
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
52 addition
and
108 deletion
+52
-108
paddle/fluid/operators/auc_op.cc
paddle/fluid/operators/auc_op.cc
+13
-17
paddle/fluid/operators/auc_op.h
paddle/fluid/operators/auc_op.h
+20
-21
python/paddle/fluid/layers/metric_op.py
python/paddle/fluid/layers/metric_op.py
+7
-18
python/paddle/fluid/metrics.py
python/paddle/fluid/metrics.py
+1
-1
python/paddle/fluid/tests/unittests/test_auc_op.py
python/paddle/fluid/tests/unittests/test_auc_op.py
+11
-51
未找到文件。
paddle/fluid/operators/auc_op.cc
浏览文件 @
2b58c62a
...
...
@@ -24,15 +24,16 @@ class AucOp : public framework::OperatorWithKernel {
protected:
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Out"
),
"Input of Out should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Indices"
),
"Input of Indices should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Predict"
),
"Input of Out should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Label"
),
"Input of Label should not be null."
);
auto
inference_height
=
ctx
->
GetInputDim
(
"Out"
)[
0
];
auto
predict_width
=
ctx
->
GetInputDim
(
"Predict"
)[
1
];
PADDLE_ENFORCE_EQ
(
predict_width
,
2
,
"Only support binary classification"
);
auto
predict_height
=
ctx
->
GetInputDim
(
"Predict"
)[
0
];
auto
label_height
=
ctx
->
GetInputDim
(
"Label"
)[
0
];
PADDLE_ENFORCE_EQ
(
inference
_height
,
label_height
,
PADDLE_ENFORCE_EQ
(
predict
_height
,
label_height
,
"Out and Label should have same height."
);
int
num_thres
=
ctx
->
Attrs
().
Get
<
int
>
(
"num_thresholds"
);
...
...
@@ -43,14 +44,14 @@ class AucOp : public framework::OperatorWithKernel {
ctx
->
SetOutputDim
(
"FPOut"
,
{
num_thres
});
ctx
->
SetOutputDim
(
"FNOut"
,
{
num_thres
});
ctx
->
ShareLoD
(
"
Ou
t"
,
/*->*/
"AUC"
);
ctx
->
ShareLoD
(
"
Predic
t"
,
/*->*/
"AUC"
);
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"
Ou
t"
)
->
type
()),
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"
Predic
t"
)
->
type
()),
ctx
.
device_context
());
}
};
...
...
@@ -58,18 +59,13 @@ class AucOp : public framework::OperatorWithKernel {
class
AucOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"Out"
,
"A floating point 2D tensor, values are in the range [0, 1]."
"Each row is sorted in descending order. This input should be the"
"output of topk."
AddInput
(
"Predict"
,
"A floating point 2D tensor with shape [batch_size, 2], values "
"are in the range [0, 1]."
"Typically, this tensor indicates the probability of each label"
);
AddInput
(
"Indices"
,
"An int 2D tensor, indicating the indices of original"
"tensor before sorting. Typically, this tensor indicates which "
"label the probability stands for."
);
AddInput
(
"Label"
,
"A 2D int tensor indicating the label of the training data."
"
The height is batch size and width is always 1.
"
);
"A 2D int tensor indicating the label of the training data.
"
"
shape: [batch_size, 1]
"
);
AddInput
(
"TP"
,
"True-Positive value."
);
AddInput
(
"FP"
,
"False-Positive value."
);
AddInput
(
"TN"
,
"True-Negative value."
);
...
...
paddle/fluid/operators/auc_op.h
浏览文件 @
2b58c62a
...
...
@@ -31,7 +31,7 @@ template <typename DeviceContext, typename T>
class
AucKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
inference
=
ctx
.
Input
<
Tensor
>
(
"Ou
t"
);
auto
*
predict
=
ctx
.
Input
<
Tensor
>
(
"Predic
t"
);
auto
*
label
=
ctx
.
Input
<
Tensor
>
(
"Label"
);
auto
*
auc
=
ctx
.
Output
<
Tensor
>
(
"AUC"
);
// Only use output var for now, make sure it's persistable and
...
...
@@ -41,24 +41,24 @@ class AucKernel : public framework::OpKernel<T> {
auto
*
true_negative
=
ctx
.
Output
<
Tensor
>
(
"TNOut"
);
auto
*
false_negative
=
ctx
.
Output
<
Tensor
>
(
"FNOut"
);
float
*
auc_data
=
auc
->
mutable_data
<
float
>
(
ctx
.
GetPlace
());
auto
*
auc_data
=
auc
->
mutable_data
<
double
>
(
ctx
.
GetPlace
());
std
::
string
curve
=
ctx
.
Attr
<
std
::
string
>
(
"curve"
);
int
num_thresholds
=
ctx
.
Attr
<
int
>
(
"num_thresholds"
);
std
::
vector
<
float
>
thresholds_list
;
std
::
vector
<
double
>
thresholds_list
;
thresholds_list
.
reserve
(
num_thresholds
);
for
(
int
i
=
1
;
i
<
num_thresholds
-
1
;
i
++
)
{
thresholds_list
[
i
]
=
static_cast
<
float
>
(
i
)
/
(
num_thresholds
-
1
);
thresholds_list
[
i
]
=
static_cast
<
double
>
(
i
)
/
(
num_thresholds
-
1
);
}
const
float
kEpsilon
=
1e-7
;
const
double
kEpsilon
=
1e-7
;
thresholds_list
[
0
]
=
0.0
f
-
kEpsilon
;
thresholds_list
[
num_thresholds
-
1
]
=
1.0
f
+
kEpsilon
;
size_t
batch_size
=
inference
->
dims
()[
0
];
size_t
inference_width
=
inference
->
dims
()[
1
];
size_t
batch_size
=
predict
->
dims
()[
0
];
size_t
inference_width
=
predict
->
dims
()[
1
];
const
T
*
inference_data
=
inference
->
data
<
T
>
();
const
int64_t
*
label_data
=
label
->
data
<
int64_t
>
();
const
T
*
inference_data
=
predict
->
data
<
T
>
();
const
auto
*
label_data
=
label
->
data
<
int64_t
>
();
auto
*
tp_data
=
true_positive
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
());
auto
*
fn_data
=
false_negative
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
());
...
...
@@ -66,20 +66,19 @@ class AucKernel : public framework::OpKernel<T> {
auto
*
fp_data
=
false_positive
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
());
for
(
int
idx_thresh
=
0
;
idx_thresh
<
num_thresholds
;
idx_thresh
++
)
{
// caculate TP, FN, TN, FP for current thresh
// ca
l
culate TP, FN, TN, FP for current thresh
int64_t
tp
=
0
,
fn
=
0
,
tn
=
0
,
fp
=
0
;
for
(
size_t
i
=
0
;
i
<
batch_size
;
i
++
)
{
// NOTE: label_data used as bool, labels >0 will be treated as true.
// NOTE: label_data used as bool, labels >
0 will be treated as true.
if
(
label_data
[
i
])
{
// use first(max) data in each row
if
(
inference_data
[
i
*
inference_width
]
>=
if
(
inference_data
[
i
*
inference_width
+
1
]
>=
(
thresholds_list
[
idx_thresh
]))
{
tp
++
;
}
else
{
fn
++
;
}
}
else
{
if
(
inference_data
[
i
*
inference_width
]
>=
if
(
inference_data
[
i
*
inference_width
+
1
]
>=
(
thresholds_list
[
idx_thresh
]))
{
fp
++
;
}
else
{
...
...
@@ -94,21 +93,21 @@ class AucKernel : public framework::OpKernel<T> {
fp_data
[
idx_thresh
]
+=
fp
;
}
// epsilon to avoid divide by zero.
float
epsilon
=
1e-6
;
double
epsilon
=
1e-6
;
// Riemann sum to caculate auc.
Tensor
tp_rate
,
fp_rate
,
rec_rate
;
tp_rate
.
Resize
({
num_thresholds
});
fp_rate
.
Resize
({
num_thresholds
});
rec_rate
.
Resize
({
num_thresholds
});
float
*
tp_rate_data
=
tp_rate
.
mutable_data
<
float
>
(
ctx
.
GetPlace
());
float
*
fp_rate_data
=
fp_rate
.
mutable_data
<
float
>
(
ctx
.
GetPlace
());
float
*
rec_rate_data
=
rec_rate
.
mutable_data
<
float
>
(
ctx
.
GetPlace
());
auto
*
tp_rate_data
=
tp_rate
.
mutable_data
<
double
>
(
ctx
.
GetPlace
());
auto
*
fp_rate_data
=
fp_rate
.
mutable_data
<
double
>
(
ctx
.
GetPlace
());
auto
*
rec_rate_data
=
rec_rate
.
mutable_data
<
double
>
(
ctx
.
GetPlace
());
for
(
int
i
=
0
;
i
<
num_thresholds
;
i
++
)
{
tp_rate_data
[
i
]
=
(
static_cast
<
float
>
(
tp_data
[
i
])
+
epsilon
)
/
tp_rate_data
[
i
]
=
(
static_cast
<
double
>
(
tp_data
[
i
])
+
epsilon
)
/
(
tp_data
[
i
]
+
fn_data
[
i
]
+
epsilon
);
fp_rate_data
[
i
]
=
static_cast
<
float
>
(
fp_data
[
i
])
/
(
fp_data
[
i
]
+
tn_data
[
i
]
+
epsilon
);
rec_rate_data
[
i
]
=
(
static_cast
<
float
>
(
tp_data
[
i
])
+
epsilon
)
/
static_cast
<
double
>
(
fp_data
[
i
])
/
(
fp_data
[
i
]
+
tn_data
[
i
]
+
epsilon
);
rec_rate_data
[
i
]
=
(
static_cast
<
double
>
(
tp_data
[
i
])
+
epsilon
)
/
(
tp_data
[
i
]
+
fp_data
[
i
]
+
epsilon
);
}
*
auc_data
=
0.0
f
;
...
...
python/paddle/fluid/layers/metric_op.py
浏览文件 @
2b58c62a
...
...
@@ -114,23 +114,13 @@ def auc(input, label, curve='ROC', num_thresholds=200, topk=1):
prediction = network(image, is_infer=True)
auc_out=fluid.layers.auc(input=prediction, label=label)
"""
warnings
.
warn
(
"This interface is not recommended, fluid.layers.auc compute the auc at every minibatch,
\
but can not aggregate them and get the pass AUC, because pass
\
auc can not be averaged with weighted from the minibatch auc value.
\
Please use fluid.metrics.Auc, it can compute the auc value via Python natively,
\
which can get every minibatch and every pass auc value."
,
Warning
)
helper
=
LayerHelper
(
"auc"
,
**
locals
())
topk_out
=
helper
.
create_tmp_variable
(
dtype
=
input
.
dtype
)
topk_indices
=
helper
.
create_tmp_variable
(
dtype
=
"int64"
)
topk_out
,
topk_indices
=
nn
.
topk
(
input
,
k
=
k
)
auc_out
=
helper
.
create_tmp_variable
(
dtype
=
"float32"
)
auc_out
=
helper
.
create_tmp_variable
(
dtype
=
"float64"
)
# make tp, tn, fp, fn persistable, so that can accumulate all batches.
tp
=
helper
.
create_global_variable
(
persistable
=
True
)
tn
=
helper
.
create_global_variable
(
persistable
=
True
)
fp
=
helper
.
create_global_variable
(
persistable
=
True
)
fn
=
helper
.
create_global_variable
(
persistable
=
True
)
tp
=
helper
.
create_global_variable
(
persistable
=
True
,
dtype
=
'int64'
)
tn
=
helper
.
create_global_variable
(
persistable
=
True
,
dtype
=
'int64'
)
fp
=
helper
.
create_global_variable
(
persistable
=
True
,
dtype
=
'int64'
)
fn
=
helper
.
create_global_variable
(
persistable
=
True
,
dtype
=
'int64'
)
for
var
in
[
tp
,
tn
,
fp
,
fn
]:
helper
.
set_variable_initializer
(
var
,
Constant
(
...
...
@@ -139,8 +129,7 @@ def auc(input, label, curve='ROC', num_thresholds=200, topk=1):
helper
.
append_op
(
type
=
"auc"
,
inputs
=
{
"Out"
:
[
topk_out
],
"Indices"
:
[
topk_indices
],
"Predict"
:
[
input
],
"Label"
:
[
label
],
"TP"
:
[
tp
],
"TN"
:
[
tn
],
...
...
@@ -156,4 +145,4 @@ def auc(input, label, curve='ROC', num_thresholds=200, topk=1):
"FPOut"
:
[
fp
],
"FNOut"
:
[
fn
]
})
return
auc_out
return
auc_out
,
[
tp
,
tn
,
fp
,
fn
]
python/paddle/fluid/metrics.py
浏览文件 @
2b58c62a
...
...
@@ -591,7 +591,7 @@ class Auc(MetricBase):
for
i
in
range
(
self
.
_num_thresholds
-
2
)]
thresholds
=
[
0.0
-
kepsilon
]
+
thresholds
+
[
1.0
+
kepsilon
]
# caculate TP, FN, TN, FP count
# ca
l
culate TP, FN, TN, FP count
for
idx_thresh
,
thresh
in
enumerate
(
thresholds
):
tp
,
fn
,
tn
,
fp
=
0
,
0
,
0
,
0
for
i
,
lbl
in
enumerate
(
labels
):
...
...
python/paddle/fluid/tests/unittests/test_auc_op.py
浏览文件 @
2b58c62a
...
...
@@ -15,13 +15,13 @@
import
unittest
import
numpy
as
np
from
op_test
import
OpTest
from
paddle.fluid
import
metrics
class
TestAucOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"auc"
pred
=
np
.
random
.
random
((
128
,
2
)).
astype
(
"float32"
)
indices
=
np
.
random
.
randint
(
0
,
2
,
(
128
,
2
))
labels
=
np
.
random
.
randint
(
0
,
2
,
(
128
,
1
))
num_thresholds
=
200
tp
=
np
.
zeros
((
num_thresholds
,
)).
astype
(
"int64"
)
...
...
@@ -30,8 +30,7 @@ class TestAucOp(OpTest):
fn
=
np
.
zeros
((
num_thresholds
,
)).
astype
(
"int64"
)
self
.
inputs
=
{
'Out'
:
pred
,
'Indices'
:
indices
,
'Predict'
:
pred
,
'Label'
:
labels
,
'TP'
:
tp
,
'TN'
:
tn
,
...
...
@@ -39,57 +38,18 @@ class TestAucOp(OpTest):
'FN'
:
fn
}
self
.
attrs
=
{
'curve'
:
'ROC'
,
'num_thresholds'
:
num_thresholds
}
# NOTE: sklearn use a different way to generate thresholds
# which will cause the result differs slightly:
# from sklearn.metrics import roc_curve, auc
# fpr, tpr, thresholds = roc_curve(labels, pred)
# auc_value = auc(fpr, tpr)
# we caculate AUC again using numpy for testing
kepsilon
=
1e-7
# to account for floating point imprecisions
thresholds
=
[(
i
+
1
)
*
1.0
/
(
num_thresholds
-
1
)
for
i
in
range
(
num_thresholds
-
2
)]
thresholds
=
[
0.0
-
kepsilon
]
+
thresholds
+
[
1.0
+
kepsilon
]
# caculate TP, FN, TN, FP count
tp_list
=
np
.
ndarray
((
num_thresholds
,
))
fn_list
=
np
.
ndarray
((
num_thresholds
,
))
tn_list
=
np
.
ndarray
((
num_thresholds
,
))
fp_list
=
np
.
ndarray
((
num_thresholds
,
))
for
idx_thresh
,
thresh
in
enumerate
(
thresholds
):
tp
,
fn
,
tn
,
fp
=
0
,
0
,
0
,
0
for
i
,
lbl
in
enumerate
(
labels
):
if
lbl
:
if
pred
[
i
,
0
]
>=
thresh
:
tp
+=
1
else
:
fn
+=
1
else
:
if
pred
[
i
,
0
]
>=
thresh
:
fp
+=
1
else
:
tn
+=
1
tp_list
[
idx_thresh
]
=
tp
fn_list
[
idx_thresh
]
=
fn
tn_list
[
idx_thresh
]
=
tn
fp_list
[
idx_thresh
]
=
fp
epsilon
=
1e-6
tpr
=
(
tp_list
.
astype
(
"float32"
)
+
epsilon
)
/
(
tp_list
+
fn_list
+
epsilon
)
fpr
=
fp_list
.
astype
(
"float32"
)
/
(
fp_list
+
tn_list
+
epsilon
)
rec
=
(
tp_list
.
astype
(
"float32"
)
+
epsilon
)
/
(
tp_list
+
fp_list
+
epsilon
)
x
=
fpr
[:
num_thresholds
-
1
]
-
fpr
[
1
:]
y
=
(
tpr
[:
num_thresholds
-
1
]
+
tpr
[
1
:])
/
2.0
auc_value
=
np
.
sum
(
x
*
y
)
python_auc
=
metrics
.
Auc
(
name
=
"auc"
,
curve
=
'ROC'
,
num_thresholds
=
num_thresholds
)
python_auc
.
update
(
pred
,
labels
)
self
.
outputs
=
{
'AUC'
:
auc_value
,
'TPOut'
:
tp_list
,
'FNOut'
:
fn_list
,
'TNOut'
:
tn_list
,
'FPOut'
:
fp_list
'AUC'
:
python_auc
.
eval
()
,
'TPOut'
:
python_auc
.
tp_list
,
'FNOut'
:
python_auc
.
fn_list
,
'TNOut'
:
python_auc
.
tn_list
,
'FPOut'
:
python_auc
.
fp_list
}
def
test_check_output
(
self
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录