未验证 提交 cc6dcc7d 编写于 作者: C Chitsing KUI 提交者: GitHub

[LAUNCH] add log overwrite flag (#53608) (#53757)

* add log overwrite flag

* use strtobool
上级 adaa2510
......@@ -14,6 +14,7 @@
import os
from argparse import REMAINDER, ArgumentParser
from distutils.util import strtobool
env_args_mapping = {
'POD_IP': 'host',
......@@ -22,6 +23,7 @@ env_args_mapping = {
'PADDLE_NNODES': 'nnodes',
'PADDLE_RUN_MODE': 'run_mode',
'PADDLE_LOG_LEVEL': 'log_level',
'PADDLE_LOG_OVERWRITE': 'log_overwrite',
'PADDLE_NPROC_PER_NODE': 'nproc_per_node',
'PADDLE_JOB_ID': 'job_id',
'PADDLE_RANK': 'rank',
......@@ -61,7 +63,7 @@ def parse_args():
)
base_group.add_argument(
"--legacy", type=bool, default=False, help="use legacy launch"
"--legacy", type=strtobool, default=False, help="use legacy launch"
)
base_group.add_argument(
......@@ -72,6 +74,13 @@ def parse_args():
"--log_level", type=str, default="INFO", help="log level. Default INFO"
)
base_group.add_argument(
"--log_overwrite",
type=strtobool,
default=False,
help="overwrite exits logfiles. Default False",
)
base_group.add_argument(
"--nnodes",
type=str,
......
......@@ -205,6 +205,7 @@ class Controller(ControllerBase):
c = Container(
entrypoint=(entrypoint or self._get_entrypoint()),
env=(self.ctx.get_envs() if use_ctx_env else {}),
overwrite_log=self.ctx.args.log_overwrite,
)
c.outfile, c.errfile = self._get_out_err_file(out, err)
c.update_env(envs)
......@@ -286,7 +287,7 @@ class Controller(ControllerBase):
)
try:
os.makedirs(os.path.dirname(f), exist_ok=True)
with open(f, 'w') as fd:
with open(f, container.log_mode) as fd:
for k, v in sorted(container.env.items()):
fd.write(str(f"{k}={v}\n"))
except Exception as e:
......
......@@ -25,7 +25,7 @@ class Container:
TODO(kuizhiqing) A container can be run by process/thread or just a callable function
'''
def __init__(self, entrypoint=[], rank=-1, env={}):
def __init__(self, entrypoint=[], rank=-1, env={}, overwrite_log=False):
self._entrypoint = entrypoint
self._rank = rank
self._out = None
......@@ -39,6 +39,8 @@ class Container:
self._log_handler = None
self._shell = False
self.log_mode = 'w' if overwrite_log else 'a'
@property
def env(self):
return self._env
......@@ -104,7 +106,7 @@ class Container:
d = os.path.dirname(pth)
if not os.path.isdir(d):
os.makedirs(d, exist_ok=True)
return open(pth, 'a')
return open(pth, self.log_mode)
except:
return None
......@@ -120,7 +122,7 @@ class Container:
elif self._err:
self._stderr = self._get_fd(self._err) or sys.stderr
if not self._log_handler:
if self._out and not self._log_handler:
self._log_handler = open(self._out)
self._log_handler.seek(0, 2)
self._log_start_offset = self._log_handler.tell()
......@@ -179,7 +181,7 @@ class Container:
def logs(self, fn=None, offset=0, whence=1, limit=1000):
if not self._log_handler:
self._log_handler = open(self._out)
return
if fn is None:
fn = sys.stdout
......@@ -201,7 +203,7 @@ class Container:
def tail(self, length=3000):
if not self._log_handler:
self._log_handler = open(self._out)
return
try:
self._log_handler.seek(0, 2)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册