Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PGL
提交
99f816d3
P
PGL
项目概览
PaddlePaddle
/
PGL
通知
76
Star
4
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
11
列表
看板
标记
里程碑
合并请求
1
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PGL
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
11
Issue
11
列表
看板
标记
里程碑
合并请求
1
合并请求
1
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
99f816d3
编写于
5月 27, 2020
作者:
D
DesmonDay
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Function add: add the VisualDL log writer for pgl
上级
5782fb81
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
29 addition
and
15 deletion
+29
-15
examples/erniesage/learner.py
examples/erniesage/learner.py
+2
-2
ogb_examples/graphproppred/mol/monitor/train_monitor.py
ogb_examples/graphproppred/mol/monitor/train_monitor.py
+8
-8
ogb_examples/linkproppred/ogbl-ppa/monitor/train_monitor.py
ogb_examples/linkproppred/ogbl-ppa/monitor/train_monitor.py
+5
-5
pgl/utils/log_writer.py
pgl/utils/log_writer.py
+14
-0
未找到文件。
examples/erniesage/learner.py
浏览文件 @
99f816d3
...
...
@@ -17,6 +17,7 @@ role = os.getenv("TRAINING_ROLE", "TRAINER")
import
numpy
as
np
from
pgl.utils.logger
import
log
from
pgl.utils.log_writer
import
LogWriter
import
paddle.fluid
as
F
import
paddle.fluid.layers
as
L
from
paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler
import
StrategyFactory
...
...
@@ -25,7 +26,6 @@ from paddle.fluid.transpiler.distribute_transpiler import DistributeTranspilerCo
from
paddle.fluid.incubate.fleet.collective
import
fleet
as
cfleet
from
paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler
import
fleet
as
tfleet
import
paddle.fluid.incubate.fleet.base.role_maker
as
role_maker
from
tensorboardX
import
SummaryWriter
from
paddle.fluid.transpiler.distribute_transpiler
import
DistributedMode
from
paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy
import
TrainerRuntimeConfig
...
...
@@ -77,7 +77,7 @@ class Learner(object):
start
=
time
.
time
()
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
,
"0"
))
if
trainer_id
==
0
:
writer
=
Summary
Writer
(
os
.
path
.
join
(
self
.
config
.
output_path
,
"train_history"
))
writer
=
Log
Writer
(
os
.
path
.
join
(
self
.
config
.
output_path
,
"train_history"
))
for
epoch_idx
in
range
(
self
.
config
.
epoch
):
for
idx
,
batch_feed_dict
in
enumerate
(
self
.
model
.
data_loader
()):
...
...
ogb_examples/graphproppred/mol/monitor/train_monitor.py
浏览文件 @
99f816d3
...
...
@@ -19,10 +19,10 @@ import os
from
datetime
import
datetime
import
logging
from
collections
import
defaultdict
from
tensorboardX
import
SummaryWriter
import
paddle.fluid
as
F
from
pgl.utils.logger
import
log
from
pgl.utils.log_writer
import
LogWriter
def
multi_device
(
reader
,
dev_count
):
...
...
@@ -79,10 +79,10 @@ def train_and_evaluate(exe,
global_step
=
0
timestamp
=
datetime
.
now
().
strftime
(
"%Hh%Mm%Ss"
)
log_path
=
os
.
path
.
join
(
args
.
log_dir
,
"
tensorboard_
log_%s"
%
timestamp
)
log_path
=
os
.
path
.
join
(
args
.
log_dir
,
"log_%s"
%
timestamp
)
_create_if_not_exist
(
log_path
)
writer
=
Summary
Writer
(
log_path
)
writer
=
Log
Writer
(
log_path
)
best_valid_score
=
0.0
for
e
in
range
(
args
.
epoch
):
...
...
@@ -99,7 +99,7 @@ def train_and_evaluate(exe,
ret
=
model
.
metrics
.
parse
(
ret
)
if
global_step
%
args
.
train_log_step
==
0
:
writer
.
add_scalar
(
"batch_loss"
,
ret
[
'loss'
],
global_step
=
global_step
)
"batch_loss"
,
ret
[
'loss'
],
global_step
)
log
.
info
(
"epoch: %d | step: %d | loss: %.4f "
%
(
e
,
global_step
,
ret
[
'loss'
]))
...
...
@@ -111,7 +111,7 @@ def train_and_evaluate(exe,
for
key
,
value
in
valid_ret
.
items
():
message
+=
"%s %.4f | "
%
(
key
,
value
)
writer
.
add_scalar
(
"eval_%s"
%
key
,
value
,
global_step
=
global_step
)
"eval_%s"
%
key
,
value
,
global_step
)
log
.
info
(
message
)
# testing
...
...
@@ -120,7 +120,7 @@ def train_and_evaluate(exe,
for
key
,
value
in
test_ret
.
items
():
message
+=
"%s %.4f | "
%
(
key
,
value
)
writer
.
add_scalar
(
"test_%s"
%
key
,
value
,
global_step
=
global_step
)
"test_%s"
%
key
,
value
,
global_step
)
log
.
info
(
message
)
# evaluate after one epoch
...
...
@@ -128,7 +128,7 @@ def train_and_evaluate(exe,
message
=
"epoch %s valid: "
%
e
for
key
,
value
in
valid_ret
.
items
():
message
+=
"%s %.4f | "
%
(
key
,
value
)
writer
.
add_scalar
(
"eval_%s"
%
key
,
value
,
global_step
=
global_step
)
writer
.
add_scalar
(
"eval_%s"
%
key
,
value
,
global_step
)
log
.
info
(
message
)
# testing
...
...
@@ -136,7 +136,7 @@ def train_and_evaluate(exe,
message
=
"epoch %s test: "
%
e
for
key
,
value
in
test_ret
.
items
():
message
+=
"%s %.4f | "
%
(
key
,
value
)
writer
.
add_scalar
(
"test_%s"
%
key
,
value
,
global_step
=
global_step
)
writer
.
add_scalar
(
"test_%s"
%
key
,
value
,
global_step
)
log
.
info
(
message
)
message
=
"epoch %s best %s result | "
%
(
e
,
args
.
eval_metrics
)
...
...
ogb_examples/linkproppred/ogbl-ppa/monitor/train_monitor.py
浏览文件 @
99f816d3
...
...
@@ -18,7 +18,7 @@ import numpy as np
import
sys
import
os
import
paddle.fluid
as
F
from
tensorboardX
import
Summary
Writer
from
pgl.utils.log_writer
import
Log
Writer
from
ogb.linkproppred
import
Evaluator
from
ogb.linkproppred
import
LinkPropPredDataset
...
...
@@ -115,7 +115,7 @@ def train_and_evaluate(exe,
log_path
=
os
.
path
.
join
(
output_path
,
"log"
)
_create_if_not_exist
(
log_path
)
writer
=
Summary
Writer
(
log_path
)
writer
=
Log
Writer
(
log_path
)
best_model
=
0
for
e
in
range
(
epoch
):
...
...
@@ -134,7 +134,7 @@ def train_and_evaluate(exe,
if
global_step
%
train_log_step
==
0
:
for
key
,
value
in
ret
.
items
():
writer
.
add_scalar
(
'train_'
+
key
,
value
,
global_step
=
global_step
)
'train_'
+
key
,
value
,
global_step
)
global_step
+=
1
if
global_step
%
eval_step
==
0
:
...
...
@@ -149,7 +149,7 @@ def train_and_evaluate(exe,
sys
.
stderr
.
write
(
json
.
dumps
(
eval_ret
,
indent
=
4
)
+
"
\n
"
)
for
key
,
value
in
eval_ret
.
items
():
writer
.
add_scalar
(
key
,
value
,
global_step
=
global_step
)
writer
.
add_scalar
(
key
,
value
,
global_step
)
if
eval_ret
[
"valid_hits@100"
]
>
best_model
:
F
.
io
.
save_persistables
(
...
...
@@ -170,7 +170,7 @@ def train_and_evaluate(exe,
sys
.
stderr
.
write
(
json
.
dumps
(
eval_ret
,
indent
=
4
)
+
"
\n
"
)
for
key
,
value
in
eval_ret
.
items
():
writer
.
add_scalar
(
key
,
value
,
global_step
=
global_step
)
writer
.
add_scalar
(
key
,
value
,
global_step
)
if
eval_ret
[
"valid_hits@100"
]
>
best_model
:
F
.
io
.
save_persistables
(
exe
,
...
...
pgl/utils/log_writer.py
0 → 100644
浏览文件 @
99f816d3
""" log writer setup
"""
import
sys
LogWriter
=
None
if
int
(
sys
.
version
[
0
])
==
3
:
from
visualdl
import
LogWriter
LogWriter
=
LogWriter
else
:
from
tensorboardX
import
SummaryWriter
LogWriter
=
SummaryWriter
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录