提交 99f816d3 编写于 作者: D DesmonDay

Function add: add the VisualDL log writer for pgl

上级 5782fb81
......@@ -17,6 +17,7 @@ role = os.getenv("TRAINING_ROLE", "TRAINER")
import numpy as np
from pgl.utils.logger import log
from pgl.utils.log_writer import LogWriter
import paddle.fluid as F
import paddle.fluid.layers as L
from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import StrategyFactory
......@@ -25,7 +26,6 @@ from paddle.fluid.transpiler.distribute_transpiler import DistributeTranspilerCo
from paddle.fluid.incubate.fleet.collective import fleet as cfleet
from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet as tfleet
import paddle.fluid.incubate.fleet.base.role_maker as role_maker
from tensorboardX import SummaryWriter
from paddle.fluid.transpiler.distribute_transpiler import DistributedMode
from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import TrainerRuntimeConfig
......@@ -77,7 +77,7 @@ class Learner(object):
start = time.time()
trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0"))
if trainer_id == 0:
writer = SummaryWriter(os.path.join(self.config.output_path, "train_history"))
writer = LogWriter(os.path.join(self.config.output_path, "train_history"))
for epoch_idx in range(self.config.epoch):
for idx, batch_feed_dict in enumerate(self.model.data_loader()):
......
......@@ -19,10 +19,10 @@ import os
from datetime import datetime
import logging
from collections import defaultdict
from tensorboardX import SummaryWriter
import paddle.fluid as F
from pgl.utils.logger import log
from pgl.utils.log_writer import LogWriter
def multi_device(reader, dev_count):
......@@ -79,10 +79,10 @@ def train_and_evaluate(exe,
global_step = 0
timestamp = datetime.now().strftime("%Hh%Mm%Ss")
log_path = os.path.join(args.log_dir, "tensorboard_log_%s" % timestamp)
log_path = os.path.join(args.log_dir, "log_%s" % timestamp)
_create_if_not_exist(log_path)
writer = SummaryWriter(log_path)
writer = LogWriter(log_path)
best_valid_score = 0.0
for e in range(args.epoch):
......@@ -99,7 +99,7 @@ def train_and_evaluate(exe,
ret = model.metrics.parse(ret)
if global_step % args.train_log_step == 0:
writer.add_scalar(
"batch_loss", ret['loss'], global_step=global_step)
"batch_loss", ret['loss'], global_step)
log.info("epoch: %d | step: %d | loss: %.4f " %
(e, global_step, ret['loss']))
......@@ -111,7 +111,7 @@ def train_and_evaluate(exe,
for key, value in valid_ret.items():
message += "%s %.4f | " % (key, value)
writer.add_scalar(
"eval_%s" % key, value, global_step=global_step)
"eval_%s" % key, value, global_step)
log.info(message)
# testing
......@@ -120,7 +120,7 @@ def train_and_evaluate(exe,
for key, value in test_ret.items():
message += "%s %.4f | " % (key, value)
writer.add_scalar(
"test_%s" % key, value, global_step=global_step)
"test_%s" % key, value, global_step)
log.info(message)
# evaluate after one epoch
......@@ -128,7 +128,7 @@ def train_and_evaluate(exe,
message = "epoch %s valid: " % e
for key, value in valid_ret.items():
message += "%s %.4f | " % (key, value)
writer.add_scalar("eval_%s" % key, value, global_step=global_step)
writer.add_scalar("eval_%s" % key, value, global_step)
log.info(message)
# testing
......@@ -136,7 +136,7 @@ def train_and_evaluate(exe,
message = "epoch %s test: " % e
for key, value in test_ret.items():
message += "%s %.4f | " % (key, value)
writer.add_scalar("test_%s" % key, value, global_step=global_step)
writer.add_scalar("test_%s" % key, value, global_step)
log.info(message)
message = "epoch %s best %s result | " % (e, args.eval_metrics)
......
......@@ -18,7 +18,7 @@ import numpy as np
import sys
import os
import paddle.fluid as F
from tensorboardX import SummaryWriter
from pgl.utils.log_writer import LogWriter
from ogb.linkproppred import Evaluator
from ogb.linkproppred import LinkPropPredDataset
......@@ -115,7 +115,7 @@ def train_and_evaluate(exe,
log_path = os.path.join(output_path, "log")
_create_if_not_exist(log_path)
writer = SummaryWriter(log_path)
writer = LogWriter(log_path)
best_model = 0
for e in range(epoch):
......@@ -134,7 +134,7 @@ def train_and_evaluate(exe,
if global_step % train_log_step == 0:
for key, value in ret.items():
writer.add_scalar(
'train_' + key, value, global_step=global_step)
'train_' + key, value, global_step)
global_step += 1
if global_step % eval_step == 0:
......@@ -149,7 +149,7 @@ def train_and_evaluate(exe,
sys.stderr.write(json.dumps(eval_ret, indent=4) + "\n")
for key, value in eval_ret.items():
writer.add_scalar(key, value, global_step=global_step)
writer.add_scalar(key, value, global_step)
if eval_ret["valid_hits@100"] > best_model:
F.io.save_persistables(
......@@ -170,7 +170,7 @@ def train_and_evaluate(exe,
sys.stderr.write(json.dumps(eval_ret, indent=4) + "\n")
for key, value in eval_ret.items():
writer.add_scalar(key, value, global_step=global_step)
writer.add_scalar(key, value, global_step)
if eval_ret["valid_hits@100"] > best_model:
F.io.save_persistables(exe,
......
""" log writer setup
"""
import sys
LogWriter = None
if int(sys.version[0]) == 3:
from visualdl import LogWriter
LogWriter = LogWriter
else:
from tensorboardX import SummaryWriter
LogWriter = SummaryWriter
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册