From 3667578ec2f820dddf5067bab5e918313d8bf383 Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Sat, 5 May 2018 17:34:56 +0800 Subject: [PATCH] testing --- paddle/fluid/platform/nccl_helper.h | 4 ++-- python/paddle/fluid/parallel_executor.py | 8 ++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/platform/nccl_helper.h b/paddle/fluid/platform/nccl_helper.h index 3b52587a286..f3c4c92afaa 100644 --- a/paddle/fluid/platform/nccl_helper.h +++ b/paddle/fluid/platform/nccl_helper.h @@ -108,8 +108,8 @@ struct NCCLContextMap { for (auto &gpu_id : order_) { int rank = trainer_id * order_.size() + gpu_id; PADDLE_ENFORCE(cudaSetDevice(gpu_id)); - PADDLE_ENFORCE( - ncclCommInitRank(comms.get() + gpu_id, nranks, *nccl_id, rank)); + PADDLE_ENFORCE(platform::dynload::ncclCommInitRank( + comms.get() + gpu_id, nranks, *nccl_id, rank)); } } } diff --git a/python/paddle/fluid/parallel_executor.py b/python/paddle/fluid/parallel_executor.py index f4128dcbe93..34899a54b61 100644 --- a/python/paddle/fluid/parallel_executor.py +++ b/python/paddle/fluid/parallel_executor.py @@ -30,7 +30,9 @@ class ParallelExecutor(object): num_threads=None, allow_op_delay=False, share_vars_from=None, - use_default_grad_scale=True): + use_default_grad_scale=True, + num_nodes=0, + trainer_id=0): """ ParallelExecutor can run program in parallel. @@ -129,7 +131,9 @@ class ParallelExecutor(object): scope, local_scopes, allow_op_delay, - use_default_grad_scale) + use_default_grad_scale, + num_nodes, + trainer_id) self.scope = scope def run(self, fetch_list, feed=None, feed_dict=None): -- GitLab