From a2e43ae5ce691fef18c0e6600dde7c8c4e5d1c27 Mon Sep 17 00:00:00 2001 From: "yi.wu" Date: Mon, 25 Jun 2018 17:16:00 +0800 Subject: [PATCH] fix trainer nccl2 env --- python/paddle/fluid/trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/fluid/trainer.py b/python/paddle/fluid/trainer.py index 45ab889bea..fc4d7cba71 100644 --- a/python/paddle/fluid/trainer.py +++ b/python/paddle/fluid/trainer.py @@ -315,7 +315,7 @@ class Trainer(object): for ip in worker_ips.split(","): worker_endpoints.append(':'.join([ip, port])) self.num_trainers = len(worker_endpoints) - current_endpoint = os.getenv("POD_IP") + ":" + port + current_endpoint = os.getenv("PADDLE_CURRENT_IP") + ":" + port worker_endpoints.remove(current_endpoint) # TODO(wuyi): use self.nccl_id_var, self.num_trainers and self.trainer_id # in ParallelExecutor to start -- GitLab