diff --git a/python/paddle/distributed/launch/controllers/watcher.py b/python/paddle/distributed/launch/controllers/watcher.py index 131d915292e17bbceaa17354e1b65ea2011a50c6..6e8a2cc4e87818902dc100f5361e9c1873b88205 100644 --- a/python/paddle/distributed/launch/controllers/watcher.py +++ b/python/paddle/distributed/launch/controllers/watcher.py @@ -24,7 +24,7 @@ class Watcher(object): def __init__(self, ctx): self.ctx = ctx - self.interval = 10 + self.interval = 30 self.gpu_util = [] @@ -80,7 +80,7 @@ class Watcher(object): self.gpu_fd.flush() except: - self.ctx.log.error("save gpu info failed") + self.ctx.logger.warning("save gpu info failed") def _save_gpu_log(self, util_key): try: @@ -89,7 +89,7 @@ class Watcher(object): self.gpu_fd.write('\n') self.gpu_fd.flush() except: - self.ctx.log.error("save gpu log failed") + self.ctx.logger.warning("save gpu log failed") def stop(self): if hasattr(self, "proc"):