From ef445ec8f8c30894333506790f1c324bcb52ff77 Mon Sep 17 00:00:00 2001 From: Chitsing KUI Date: Sun, 25 Jun 2023 16:30:24 +0800 Subject: [PATCH] add flag to disable gpu capture (#54762) --- python/paddle/distributed/launch/context/args_envs.py | 7 +++++++ python/paddle/distributed/launch/controllers/watcher.py | 3 +++ 2 files changed, 10 insertions(+) diff --git a/python/paddle/distributed/launch/context/args_envs.py b/python/paddle/distributed/launch/context/args_envs.py index 8026ef3d0fb..df81569be95 100644 --- a/python/paddle/distributed/launch/context/args_envs.py +++ b/python/paddle/distributed/launch/context/args_envs.py @@ -89,6 +89,13 @@ def parse_args(): help="rank node by ip. Default False", ) + base_group.add_argument( + "--enable_gpu_log", + type=strtobool, + default=True, + help="enable capture gpu log while running. Default True", + ) + base_group.add_argument( "--nnodes", type=str, diff --git a/python/paddle/distributed/launch/controllers/watcher.py b/python/paddle/distributed/launch/controllers/watcher.py index ad7bc5a84b8..25855572620 100644 --- a/python/paddle/distributed/launch/controllers/watcher.py +++ b/python/paddle/distributed/launch/controllers/watcher.py @@ -27,6 +27,9 @@ class Watcher: self.gpu_util = [] + if not self.ctx.args.enable_gpu_log: + return + # gpu log file self.gpus = self.ctx.args.devices or self.ctx.node.device.labels if len(self.gpus) > 0: -- GitLab