diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_profiler.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_profiler.py index 62ecb2207cded5f1e77592f178c0af2b7b40de0d..0fac0610fd22d1b474a55b7bf3dacbf9da7ce2a5 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_profiler.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_profiler.py @@ -19,6 +19,14 @@ import numpy as np import paddle.fluid as fluid import paddle.fluid.core as core from paddle.fluid.tests.unittests.test_profiler import TestProfiler +import os + +# NCCL 2.7 decides to use shared memory while NCCL 2.6 didn't, hence causing the error. +# include/shm.h:28 NCCL WARN Call to posix_fallocate failed: No space left on device +# +# Set environment variables NCCL_SHM_DISABLE=1 to disables the Shared Memory (SHM) transports +# and force to use P2P which is the default transports way of NCCL2.6. +os.environ['NCCL_SHM_DISABLE'] = str(1) class TestPEProfiler(TestProfiler):