未验证 提交 90b95bec 编写于 作者: K kuizhiqing 提交者: GitHub

[launch] fix log more stable; default to stdout (#41314)

上级 0f6412c0
...@@ -44,6 +44,7 @@ class Node(object): ...@@ -44,6 +44,7 @@ class Node(object):
return self.free_ports return self.free_ports
def get_free_port(self): def get_free_port(self):
# for loop to avoid port conflict
for _ in range(100): for _ in range(100):
with closing(socket.socket(socket.AF_INET, with closing(socket.socket(socket.AF_INET,
socket.SOCK_STREAM)) as s: socket.SOCK_STREAM)) as s:
......
...@@ -75,8 +75,9 @@ class ControllerBase(object): ...@@ -75,8 +75,9 @@ class ControllerBase(object):
while not self.ctx.status.is_done(): while not self.ctx.status.is_done():
status = self.pod.watch(timeout=2) status = self.pod.watch(timeout=2)
if self.ctx.continous_log(): #if self.ctx.continous_log():
self.pod.logs() # default to print log
self.pod.logs()
# completed # completed
if status == self.ctx.status.COMPLETED: if status == self.ctx.status.COMPLETED:
......
...@@ -145,31 +145,34 @@ class Container(object): ...@@ -145,31 +145,34 @@ class Container(object):
self.errfile, self.errfile,
self._env, ) self._env, )
def logs(self, fn=None, offset=0, whence=1, lines=1000): def logs(self, fn=None, offset=0, whence=1, limit=1000):
if not self._log_handler: if not self._log_handler:
self._log_handler = open(self._out) self._log_handler = open(self._out)
if fn is None: if fn is None:
fn = sys.stdout fn = sys.stdout
self._log_handler.seek(offset, whence)
try: try:
idx = 0 if offset != 0 or whence != 1:
for line in self._log_handler: self._log_handler.seek(offset, whence)
fn.write(line)
idx += 1 for _ in range(limit):
if idx > lines: line = self._log_handler.readline()
if not line:
break break
finally: fn.write(line)
except:
return return
def tail(self, length=3000): def tail(self, length=3000):
if not self._log_handler: if not self._log_handler:
self._log_handler = open(self._out) self._log_handler = open(self._out)
self._log_handler.seek(0, 2) try:
ed = self._log_handler.tell() self._log_handler.seek(0, 2)
ed = self._log_handler.tell()
except:
pass
if ed > length: if ed > length:
self.logs(offset=ed - length, whence=0) self.logs(offset=ed - length, whence=0)
......
...@@ -40,7 +40,7 @@ def launch(): ...@@ -40,7 +40,7 @@ def launch():
- ``--rank``: The rank of the node, can be auto assigned by master. Default ``--rank=-1``. - ``--rank``: The rank of the node, can be auto assigned by master. Default ``--rank=-1``.
- ``--log_level``: The log level to set for logging.setLevel which can be CRITICAL/ERROR/WARNING/INFO/DEBUG/NOTSET, case insensitive. The rank 0 log will not print in the terminal by default, while you can enable it by adding --log_level=debug. Default ``--log_level=INFO``. - ``--log_level``: The log level to set for logging.setLevel which can be CRITICAL/ERROR/WARNING/INFO/DEBUG/NOTSET, case insensitive. Default ``--log_level=INFO``.
- ``--nnodes``: The number of nodes for a distributed job, it can be a range in elastic mode, e.g., ``--nnodes=2:3``. Default ``--nnodes=1``. - ``--nnodes``: The number of nodes for a distributed job, it can be a range in elastic mode, e.g., ``--nnodes=2:3``. Default ``--nnodes=1``.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册