提交 3667578e 编写于 作者: T typhoonzero

testing

上级 d9320dcd
......@@ -108,8 +108,8 @@ struct NCCLContextMap {
for (auto &gpu_id : order_) {
int rank = trainer_id * order_.size() + gpu_id;
PADDLE_ENFORCE(cudaSetDevice(gpu_id));
PADDLE_ENFORCE(
ncclCommInitRank(comms.get() + gpu_id, nranks, *nccl_id, rank));
PADDLE_ENFORCE(platform::dynload::ncclCommInitRank(
comms.get() + gpu_id, nranks, *nccl_id, rank));
}
}
}
......
......@@ -30,7 +30,9 @@ class ParallelExecutor(object):
num_threads=None,
allow_op_delay=False,
share_vars_from=None,
use_default_grad_scale=True):
use_default_grad_scale=True,
num_nodes=0,
trainer_id=0):
"""
ParallelExecutor can run program in parallel.
......@@ -129,7 +131,9 @@ class ParallelExecutor(object):
scope,
local_scopes,
allow_op_delay,
use_default_grad_scale)
use_default_grad_scale,
num_nodes,
trainer_id)
self.scope = scope
def run(self, fetch_list, feed=None, feed_dict=None):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册