From 392ae69650ea453accdbdd2b5ed84f3764b2d2c6 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Tue, 11 Sep 2018 22:46:36 +0800 Subject: [PATCH] Set parallel executor thread num under nccl2 distributed env (#13207) --- python/paddle/fluid/parallel_executor.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/python/paddle/fluid/parallel_executor.py b/python/paddle/fluid/parallel_executor.py index 4790e0f6119..bd9f8b3c356 100644 --- a/python/paddle/fluid/parallel_executor.py +++ b/python/paddle/fluid/parallel_executor.py @@ -128,6 +128,13 @@ class ParallelExecutor(object): os.environ.get('CPU_NUM', multiprocessing.cpu_count())) exec_strategy.num_threads = cpu_num * 2 + # Set 1 thread num under nccl2 distribute + # env to make sure all gpus run ops in same order. + if num_trainers > 1: + assert (use_cuda) + # FIXME(gongwb): avoid this set. + exec_strategy.num_threads = 1 + if build_strategy is None: build_strategy = BuildStrategy() -- GitLab