Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
45df9be8
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
45df9be8
编写于
9月 28, 2022
作者:
R
Ruibiao Chen
提交者:
GitHub
9月 28, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Print IPS in auto parallel Engine (#46554)
上级
3cbf0e93
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
32 addition
and
26 deletion
+32
-26
python/paddle/distributed/auto_parallel/engine.py
python/paddle/distributed/auto_parallel/engine.py
+32
-26
未找到文件。
python/paddle/distributed/auto_parallel/engine.py
浏览文件 @
45df9be8
...
...
@@ -23,7 +23,7 @@ from collections import defaultdict
import
paddle
import
paddle.utils
as
utils
from
paddle
import
fluid
,
static
from
paddle
import
fluid
,
profiler
,
static
from
paddle.jit
import
to_static
from
paddle.metric
import
Metric
from
paddle.static
import
InputSpec
...
...
@@ -570,7 +570,8 @@ class Engine:
step
=
None
,
lr
=
None
,
fetch_new_names
=
None
,
fetch_sections
=
None
):
fetch_sections
=
None
,
profiler_log
=
""
):
prefix
=
"[{}] "
.
format
(
mode
)
logs
=
{}
if
epoch
is
not
None
:
...
...
@@ -596,7 +597,7 @@ class Engine:
else
:
for
i
in
range
(
section_start
,
section_end
):
logs
[
fetch_new_names
[
i
]
+
": {} "
]
=
outs
[
i
]
string
=
prefix
+
''
.
join
(
list
(
logs
.
keys
()))
string
=
prefix
+
''
.
join
(
list
(
logs
.
keys
()))
+
profiler_log
self
.
_logger
.
info
(
string
.
format
(
*
list
(
logs
.
values
())))
def
fit
(
self
,
...
...
@@ -695,29 +696,34 @@ class Engine:
mode
=
self
.
mode
)
lr_scheduler
=
self
.
_get_lr_scheduler
(
self
.
main_program
)
for
epoch
in
range
(
epochs
):
for
step
,
_
in
enumerate
(
train_dataloader
):
try
:
outs
=
self
.
_executor
.
run
(
self
.
main_program
,
fetch_list
=
fetch_list
,
use_program_cache
=
self
.
_strategy
.
use_cache
,
return_numpy
=
self
.
_strategy
.
return_numpy
)
except
core
.
EOFException
:
break
if
lr_scheduler
and
step
%
self
.
_k_steps
==
0
:
lr_scheduler
.
step
()
lr
=
self
.
_get_lr
(
self
.
_lr_optimizer
)
self
.
_print_log
(
outs
,
self
.
mode
,
epoch
,
step
,
lr
,
fetch_new_names
,
fetch_sections
)
if
valid_data
and
epoch
%
valid_freq
==
0
:
self
.
evaluate
(
valid_data
,
valid_sample_split
,
batch_size
,
valid_steps
,
collate_fn
,
callbacks
)
self
.
_switch_mode
(
"train"
)
else
:
self
.
_reset_metrics
()
return
outs
with
profiler
.
Profiler
(
timer_only
=
True
)
as
prof
:
for
epoch
in
range
(
epochs
):
for
step
,
_
in
enumerate
(
train_dataloader
):
try
:
outs
=
self
.
_executor
.
run
(
self
.
main_program
,
fetch_list
=
fetch_list
,
use_program_cache
=
self
.
_strategy
.
use_cache
,
return_numpy
=
self
.
_strategy
.
return_numpy
)
except
core
.
EOFException
:
break
if
lr_scheduler
and
step
%
self
.
_k_steps
==
0
:
lr_scheduler
.
step
()
lr
=
self
.
_get_lr
(
self
.
_lr_optimizer
)
prof
.
step
()
self
.
_print_log
(
outs
,
self
.
mode
,
epoch
,
step
,
lr
,
fetch_new_names
,
fetch_sections
,
prof
.
step_info
())
if
valid_data
and
epoch
%
valid_freq
==
0
:
self
.
evaluate
(
valid_data
,
valid_sample_split
,
batch_size
,
valid_steps
,
collate_fn
,
callbacks
)
self
.
_switch_mode
(
"train"
)
else
:
self
.
_reset_metrics
()
return
outs
def
evaluate
(
self
,
valid_data
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录