Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
hapi
提交
47cd178a
H
hapi
项目概览
PaddlePaddle
/
hapi
通知
11
Star
2
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
4
列表
看板
标记
里程碑
合并请求
7
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
H
hapi
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
4
Issue
4
列表
看板
标记
里程碑
合并请求
7
合并请求
7
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
47cd178a
编写于
3月 23, 2020
作者:
L
LielinJiang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix eval samples
上级
abc1ecaa
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
25 addition
and
14 deletion
+25
-14
distributed.py
distributed.py
+5
-5
model.py
model.py
+20
-9
未找到文件。
distributed.py
浏览文件 @
47cd178a
...
...
@@ -110,6 +110,10 @@ class DistributedBatchSampler(BatchSampler):
return
num_samples
//
self
.
batch_size
def
_all_gather
(
x
,
nranks
,
ring_id
=
0
,
use_calc_stream
=
True
):
return
_c_allgather
(
x
,
nranks
,
ring_id
=
ring_id
,
use_calc_stream
=
use_calc_stream
)
def
get_local_rank
():
return
Env
().
local_rank
...
...
@@ -203,11 +207,7 @@ def prepare_distributed_context(place=None):
exe
.
run
(
communicator_prog
)
if
fluid
.
in_dygraph_mode
():
cnt
=
0
while
fluid
.
in_dygraph_mode
():
cnt
+=
1
print
(
'debug'
,
cnt
)
fluid
.
disable_dygraph
()
fluid
.
disable_dygraph
()
_init_context
()
fluid
.
enable_dygraph
(
place
)
else
:
...
...
model.py
浏览文件 @
47cd178a
...
...
@@ -143,7 +143,9 @@ class StaticGraphAdapter(object):
self
.
_progs
=
{}
self
.
_compiled_progs
=
{}
self
.
_merge_count
=
{
'eval'
:
0
,
'test'
:
0
}
self
.
_merge_count
=
{
'eval_total'
:
0
,
'test_total'
:
0
,
'eval_batch'
:
0
,
'test_batch'
:
0
}
self
.
_nranks
=
distributed
.
Env
().
nranks
self
.
_local_rank
=
distributed
.
Env
().
local_rank
...
...
@@ -354,12 +356,14 @@ class StaticGraphAdapter(object):
total_size
=
len
(
self
.
model
.
_test_dataloader
.
dataset
)
# TODO: fixme if have better way to get batch size
samples
=
state
[
0
].
shape
[
0
]
current_count
=
self
.
_merge_count
.
get
(
self
.
mode
,
0
)
current_count
=
self
.
_merge_count
.
get
(
self
.
mode
+
'_total'
,
0
)
if
current_count
+
samples
>
total_size
:
state
=
[
s
[:
total_size
-
current_count
,
...]
for
s
in
state
]
self
.
_merge_count
[
self
.
mode
]
=
0
self
.
_merge_count
[
self
.
mode
+
'_total'
]
=
0
self
.
_merge_count
[
self
.
mode
+
'_batch'
]
=
total_size
-
current_count
else
:
self
.
_merge_count
[
self
.
mode
]
+=
samples
self
.
_merge_count
[
self
.
mode
+
'_total'
]
+=
samples
self
.
_merge_count
[
self
.
mode
+
'_batch'
]
=
samples
metrics
.
append
(
metric
.
update
(
*
state
))
return
(
losses
,
metrics
)
if
len
(
metrics
)
>
0
else
losses
...
...
@@ -498,7 +502,8 @@ class DynamicGraphAdapter(object):
self
.
model
=
model
self
.
_nranks
=
distributed
.
Env
().
nranks
self
.
_local_rank
=
distributed
.
Env
().
local_rank
self
.
_merge_count
=
{
'eval'
:
0
,
'test'
:
0
}
self
.
_merge_count
=
{
'eval_total'
:
0
,
'test_total'
:
0
,
'eval_batch'
:
0
,
'test_batch'
:
0
}
if
self
.
_nranks
>
1
:
self
.
ddp_model
=
distributed
.
DistributedDataParallel
(
self
.
model
)
...
...
@@ -564,13 +569,16 @@ class DynamicGraphAdapter(object):
if
self
.
model
.
_test_dataloader
is
not
None
and
self
.
_nranks
>
1
:
total_size
=
len
(
self
.
model
.
_test_dataloader
.
dataset
)
samples
=
outputs
[
0
].
shape
[
0
]
current_count
=
self
.
_merge_count
.
get
(
self
.
mode
,
0
)
current_count
=
self
.
_merge_count
.
get
(
self
.
mode
+
'_total'
,
0
)
if
current_count
+
samples
>
total_size
:
outputs
=
[
o
[:
total_size
-
metric
.
count
[
0
]]
for
o
in
outputs
]
labels
=
[
l
[:
total_size
-
metric
.
count
[
0
]]
for
l
in
labels
]
self
.
_merge_count
[
self
.
mode
]
=
0
self
.
_merge_count
[
self
.
mode
+
'_total'
]
=
0
self
.
_merge_count
[
self
.
mode
+
'_batch'
]
=
total_size
-
current_count
else
:
self
.
_merge_count
[
self
.
mode
]
+=
samples
self
.
_merge_count
[
self
.
mode
+
'_total'
]
+=
samples
self
.
_merge_count
[
self
.
mode
+
'_batch'
]
=
samples
metric_outs
=
metric
.
add_metric_op
(
to_list
(
outputs
),
labels
)
m
=
metric
.
update
(
*
[
to_numpy
(
m
)
for
m
in
to_list
(
metric_outs
)])
...
...
@@ -966,7 +974,10 @@ class Model(fluid.dygraph.Layer):
logs
[
k
]
=
v
logs
[
'step'
]
=
step
logs
[
'batch_size'
]
=
batch_size
if
mode
==
'train'
or
self
.
_adapter
.
_merge_count
[
mode
+
'_batch'
]
<=
0
:
logs
[
'batch_size'
]
=
batch_size
*
distributed
.
Env
().
nranks
else
:
logs
[
'batch_size'
]
=
self
.
_adapter
.
_merge_count
[
mode
+
'_batch'
]
cbks
.
on_batch_end
(
mode
,
step
,
logs
)
self
.
_reset_metrics
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录