Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleClas
提交
4e6c36e2
P
PaddleClas
项目概览
PaddlePaddle
/
PaddleClas
1 年多 前同步成功
通知
115
Star
4999
Fork
1114
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
19
列表
看板
标记
里程碑
合并请求
6
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleClas
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
19
Issue
19
列表
看板
标记
里程碑
合并请求
6
合并请求
6
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
4e6c36e2
编写于
4月 22, 2022
作者:
C
cuicheng01
提交者:
GitHub
4月 22, 2022
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1833 from TingquanGao/dev/fix_dist_loss
fix calc metric error and calc loss error in distributed.
上级
b86ddf47
b5a7eea5
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
80 addition
and
70 deletion
+80
-70
ppcls/engine/evaluation/classification.py
ppcls/engine/evaluation/classification.py
+51
-48
ppcls/engine/evaluation/retrieval.py
ppcls/engine/evaluation/retrieval.py
+29
-22
未找到文件。
ppcls/engine/evaluation/classification.py
浏览文件 @
4e6c36e2
...
...
@@ -73,68 +73,71 @@ def classification_eval(engine, epoch_id=0):
},
level
=
amp_level
):
out
=
engine
.
model
(
batch
[
0
])
# calc loss
if
engine
.
eval_loss_func
is
not
None
:
loss_dict
=
engine
.
eval_loss_func
(
out
,
batch
[
1
])
for
key
in
loss_dict
:
if
key
not
in
output_info
:
output_info
[
key
]
=
AverageMeter
(
key
,
'7.5f'
)
output_info
[
key
].
update
(
loss_dict
[
key
].
numpy
()[
0
],
batch_size
)
else
:
out
=
engine
.
model
(
batch
[
0
])
# calc loss
if
engine
.
eval_loss_func
is
not
None
:
loss_dict
=
engine
.
eval_loss_func
(
out
,
batch
[
1
])
for
key
in
loss_dict
:
if
key
not
in
output_info
:
output_info
[
key
]
=
AverageMeter
(
key
,
'7.5f'
)
output_info
[
key
].
update
(
loss_dict
[
key
].
numpy
()[
0
],
batch_size
)
# just for DistributedBatchSampler issue: repeat sampling
current_samples
=
batch_size
*
paddle
.
distributed
.
get_world_size
()
accum_samples
+=
current_samples
# calc metric
if
engine
.
eval_metric_func
is
not
None
:
if
paddle
.
distributed
.
get_world_size
()
>
1
:
label_list
=
[]
paddle
.
distributed
.
all_gather
(
label_list
,
batch
[
1
])
labels
=
paddle
.
concat
(
label_list
,
0
)
if
isinstance
(
out
,
dict
):
if
"Student"
in
out
:
out
=
out
[
"Student"
]
if
isinstance
(
out
,
dict
):
out
=
out
[
"logits"
]
elif
"logits"
in
out
:
# gather Tensor when distributed
if
paddle
.
distributed
.
get_world_size
()
>
1
:
label_list
=
[]
paddle
.
distributed
.
all_gather
(
label_list
,
batch
[
1
])
labels
=
paddle
.
concat
(
label_list
,
0
)
if
isinstance
(
out
,
dict
):
if
"Student"
in
out
:
out
=
out
[
"Student"
]
if
isinstance
(
out
,
dict
):
out
=
out
[
"logits"
]
else
:
msg
=
"Error: Wrong key in out!"
raise
Exception
(
msg
)
if
isinstance
(
out
,
list
):
pred
=
[]
for
x
in
out
:
pred_list
=
[]
paddle
.
distributed
.
all_gather
(
pred_list
,
x
)
pred_x
=
paddle
.
concat
(
pred_list
,
0
)
pred
.
append
(
pred_x
)
elif
"logits"
in
out
:
out
=
out
[
"logits"
]
else
:
msg
=
"Error: Wrong key in out!"
raise
Exception
(
msg
)
if
isinstance
(
out
,
list
):
preds
=
[]
for
x
in
out
:
pred_list
=
[]
paddle
.
distributed
.
all_gather
(
pred_list
,
out
)
pred
=
paddle
.
concat
(
pred_list
,
0
)
paddle
.
distributed
.
all_gather
(
pred_list
,
x
)
pred_x
=
paddle
.
concat
(
pred_list
,
0
)
preds
.
append
(
pred_x
)
else
:
pred_list
=
[]
paddle
.
distributed
.
all_gather
(
pred_list
,
out
)
preds
=
paddle
.
concat
(
pred_list
,
0
)
if
accum_samples
>
total_samples
and
not
engine
.
use_dali
:
pred
=
pred
[:
total_samples
+
current_samples
-
if
accum_samples
>
total_samples
and
not
engine
.
use_dali
:
preds
=
preds
[:
total_samples
+
current_samples
-
accum_samples
]
labels
=
labels
[:
total_samples
+
current_samples
-
accum_samples
]
labels
=
labels
[:
total_samples
+
current_samples
-
accum_samples
]
current_samples
=
total_samples
+
current_samples
-
accum_samples
metric_dict
=
engine
.
eval_metric_func
(
pred
,
labels
)
current_samples
=
total_samples
+
current_samples
-
accum_samples
else
:
labels
=
batch
[
1
]
preds
=
out
# calc loss
if
engine
.
eval_loss_func
is
not
None
:
if
engine
.
amp
and
engine
.
config
[
"AMP"
].
get
(
"use_fp16_test"
,
False
):
amp_level
=
engine
.
config
[
'AMP'
].
get
(
"level"
,
"O1"
).
upper
()
with
paddle
.
amp
.
auto_cast
(
custom_black_list
=
{
"flatten_contiguous_range"
,
"greater_than"
},
level
=
amp_level
):
loss_dict
=
engine
.
eval_loss_func
(
preds
,
labels
)
else
:
metric_dict
=
engine
.
eval_metric_func
(
out
,
batch
[
1
]
)
loss_dict
=
engine
.
eval_loss_func
(
preds
,
labels
)
for
key
in
loss_dict
:
if
key
not
in
output_info
:
output_info
[
key
]
=
AverageMeter
(
key
,
'7.5f'
)
output_info
[
key
].
update
(
loss_dict
[
key
].
numpy
()[
0
],
current_samples
)
# calc metric
if
engine
.
eval_metric_func
is
not
None
:
metric_dict
=
engine
.
eval_metric_func
(
preds
,
labels
)
for
key
in
metric_dict
:
if
metric_key
is
None
:
metric_key
=
key
...
...
ppcls/engine/evaluation/retrieval.py
浏览文件 @
4e6c36e2
...
...
@@ -89,9 +89,6 @@ def retrieval_eval(engine, epoch_id=0):
def
cal_feature
(
engine
,
name
=
'gallery'
):
all_feas
=
None
all_image_id
=
None
all_unique_id
=
None
has_unique_id
=
False
if
name
==
'gallery'
:
...
...
@@ -103,6 +100,9 @@ def cal_feature(engine, name='gallery'):
else
:
raise
RuntimeError
(
"Only support gallery or query dataset"
)
batch_feas_list
=
[]
img_id_list
=
[]
unique_id_list
=
[]
max_iter
=
len
(
dataloader
)
-
1
if
platform
.
system
()
==
"Windows"
else
len
(
dataloader
)
for
idx
,
batch
in
enumerate
(
dataloader
):
# load is very time-consuming
...
...
@@ -140,32 +140,39 @@ def cal_feature(engine, name='gallery'):
if
engine
.
config
[
"Global"
].
get
(
"feature_binarize"
)
==
"sign"
:
batch_feas
=
paddle
.
sign
(
batch_feas
).
astype
(
"float32"
)
if
all_feas
is
None
:
all_feas
=
batch_feas
if
paddle
.
distributed
.
get_world_size
()
>
1
:
batch_feas_gather
=
[]
img_id_gather
=
[]
unique_id_gather
=
[]
paddle
.
distributed
.
all_gather
(
batch_feas_gather
,
batch_feas
)
paddle
.
distributed
.
all_gather
(
img_id_gather
,
batch
[
1
])
batch_feas_list
.
append
(
paddle
.
concat
(
batch_feas_gather
))
img_id_list
.
append
(
paddle
.
concat
(
img_id_gather
))
if
has_unique_id
:
all_unique_id
=
batch
[
2
]
all_image_id
=
batch
[
1
]
paddle
.
distributed
.
all_gather
(
unique_id_gather
,
batch
[
2
])
unique_id_list
.
append
(
paddle
.
concat
(
unique_id_gather
))
else
:
all_feas
=
paddle
.
concat
([
all_feas
,
batch_feas
]
)
all_image_id
=
paddle
.
concat
([
all_image_id
,
batch
[
1
]
])
batch_feas_list
.
append
(
batch_feas
)
img_id_list
.
append
(
batch
[
1
])
if
has_unique_id
:
all_unique_id
=
paddle
.
concat
([
all_unique_id
,
batch
[
2
]
])
unique_id_list
.
append
(
batch
[
2
])
if
engine
.
use_dali
:
dataloader
.
reset
()
if
paddle
.
distributed
.
get_world_size
()
>
1
:
feat_list
=
[]
img_id_list
=
[]
unique_id_list
=
[]
paddle
.
distributed
.
all_gather
(
feat_list
,
all_feas
)
paddle
.
distributed
.
all_gather
(
img_id_list
,
all_image_id
)
all_feas
=
paddle
.
concat
(
feat_list
,
axis
=
0
)
all_image_id
=
paddle
.
concat
(
img_id_list
,
axis
=
0
)
if
has_unique_id
:
paddle
.
distributed
.
all_gather
(
unique_id_list
,
all_unique_id
)
all_unique_id
=
paddle
.
concat
(
unique_id_list
,
axis
=
0
)
all_feas
=
paddle
.
concat
(
batch_feas_list
)
all_img_id
=
paddle
.
concat
(
img_id_list
)
if
has_unique_id
:
all_unique_id
=
paddle
.
concat
(
unique_id_list
)
# just for DistributedBatchSampler issue: repeat sampling
total_samples
=
len
(
dataloader
.
dataset
)
if
not
engine
.
use_dali
else
dataloader
.
size
all_feas
=
all_feas
[:
total_samples
]
all_img_id
=
all_img_id
[:
total_samples
]
if
has_unique_id
:
all_unique_id
=
all_unique_id
[:
total_samples
]
logger
.
info
(
"Build {} done, all feat shape: {}, begin to eval.."
.
format
(
name
,
all_feas
.
shape
))
return
all_feas
,
all_im
age
_id
,
all_unique_id
return
all_feas
,
all_im
g
_id
,
all_unique_id
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录