未验证 提交 90b95bec 编写于 作者: K kuizhiqing 提交者: GitHub

[launch] fix log more stable; default to stdout (#41314)

上级 0f6412c0
......@@ -44,6 +44,7 @@ class Node(object):
return self.free_ports
def get_free_port(self):
# for loop to avoid port conflict
for _ in range(100):
with closing(socket.socket(socket.AF_INET,
socket.SOCK_STREAM)) as s:
......
......@@ -75,8 +75,9 @@ class ControllerBase(object):
while not self.ctx.status.is_done():
status = self.pod.watch(timeout=2)
if self.ctx.continous_log():
self.pod.logs()
#if self.ctx.continous_log():
# default to print log
self.pod.logs()
# completed
if status == self.ctx.status.COMPLETED:
......
......@@ -145,31 +145,34 @@ class Container(object):
self.errfile,
self._env, )
def logs(self, fn=None, offset=0, whence=1, lines=1000):
def logs(self, fn=None, offset=0, whence=1, limit=1000):
if not self._log_handler:
self._log_handler = open(self._out)
if fn is None:
fn = sys.stdout
self._log_handler.seek(offset, whence)
try:
idx = 0
for line in self._log_handler:
fn.write(line)
idx += 1
if idx > lines:
if offset != 0 or whence != 1:
self._log_handler.seek(offset, whence)
for _ in range(limit):
line = self._log_handler.readline()
if not line:
break
finally:
fn.write(line)
except:
return
def tail(self, length=3000):
if not self._log_handler:
self._log_handler = open(self._out)
self._log_handler.seek(0, 2)
ed = self._log_handler.tell()
try:
self._log_handler.seek(0, 2)
ed = self._log_handler.tell()
except:
pass
if ed > length:
self.logs(offset=ed - length, whence=0)
......
......@@ -40,7 +40,7 @@ def launch():
- ``--rank``: The rank of the node, can be auto assigned by master. Default ``--rank=-1``.
- ``--log_level``: The log level to set for logging.setLevel which can be CRITICAL/ERROR/WARNING/INFO/DEBUG/NOTSET, case insensitive. The rank 0 log will not print in the terminal by default, while you can enable it by adding --log_level=debug. Default ``--log_level=INFO``.
- ``--log_level``: The log level to set for logging.setLevel which can be CRITICAL/ERROR/WARNING/INFO/DEBUG/NOTSET, case insensitive. Default ``--log_level=INFO``.
- ``--nnodes``: The number of nodes for a distributed job, it can be a range in elastic mode, e.g., ``--nnodes=2:3``. Default ``--nnodes=1``.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册