diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index f5ab5d6ee5dc800632febc38184850b1fbb52284..ce37aa4855c8b370d00ab1897c41d84a20fd0bcc 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -328,6 +328,12 @@ ParallelExecutor::ParallelExecutor(const std::vector &places, "the number of places must be greater than 1."); } + LOG(WARNING) << string::Sprintf( + "The number of %s, which is used in ParallelExecutor, is %lu. And " + "the Program will be copied %lu copies", + (member_->use_cuda_ ? "CUDAPlace" : "CPUPlace"), places.size(), + places.size()); + // Step 1. Bcast the bcast_vars to devs. // Create local scopes if (local_scopes.empty()) { diff --git a/python/paddle/dataset/flowers.py b/python/paddle/dataset/flowers.py index e048639ae1e9e627ab7ae92f2492eed8f5c0b5da..969ad3c922f9c15b2e39f71ae4359cd3d2fcdcce 100644 --- a/python/paddle/dataset/flowers.py +++ b/python/paddle/dataset/flowers.py @@ -138,8 +138,7 @@ def reader_creator(data_file, break if use_xmap: - cpu_num = int(os.environ.get('CPU_NUM', cpu_count())) - return xmap_readers(mapper, reader, cpu_num, buffered_size) + return xmap_readers(mapper, reader, min(4, cpu_count()), buffered_size) else: return map_readers(mapper, reader) diff --git a/python/paddle/fluid/contrib/slim/tests/test_graph_wrapper.py b/python/paddle/fluid/contrib/slim/tests/test_graph_wrapper.py index 0ab8052d7ab16743bb6589dbb44203e70fa907d0..69080cf50ecaf8a290984f2792ec697a1edf3234 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_graph_wrapper.py +++ b/python/paddle/fluid/contrib/slim/tests/test_graph_wrapper.py @@ -19,6 +19,8 @@ import six import numpy as np from paddle.fluid.contrib.slim.graph import GraphWrapper from paddle.fluid import core +import os +os.environ['CPU_NUM'] = str(4) def residual_block(num): diff --git a/python/paddle/fluid/data_feeder.py b/python/paddle/fluid/data_feeder.py index 32b2c8014ca562d368842208ebf7737cd900341e..1090c781422045a2005ae1fb536a17d15005a8ad 100644 --- a/python/paddle/fluid/data_feeder.py +++ b/python/paddle/fluid/data_feeder.py @@ -22,7 +22,7 @@ from six.moves import zip, range, xrange import multiprocessing from .framework import Variable, default_main_program, _current_expected_place - +from .framework import _cpu_num, _cuda_ids __all__ = ['DataFeeder'] @@ -359,11 +359,9 @@ class DataFeeder(object): if num_places is not None: return int(num_places) elif isinstance(self.place, core.CUDAPlace): - return core.get_cuda_device_count() + return len(_cuda_ids()) else: - cpu_num = int( - os.environ.get('CPU_NUM', multiprocessing.cpu_count())) - return cpu_num + return _cpu_num() def decorate_reader(self, reader, diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 012d15f45a4a04d0262b113ffed77d3e4d810274..f7d487b640e52f04eb81b22d4ddd3dd1d16a8b91 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -82,7 +82,24 @@ def _current_expected_place(): def _cpu_num(): - return int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) + cpu_num = os.environ.get('CPU_NUM', None) + if cpu_num is None: + sys.stderr.write( + 'The CPU_NUM is not specified, you should set CPU_NUM in ' + 'the environment variable list, i.e export CPU_NUM=1. CPU_NUM ' + 'indicates that how many CPUPlace are used in the current task.\n' + '!!! The default number of CPUPlaces is 1.') + os.environ['CPU_NUM'] = str(1) + return int(cpu_num) + + +def _cuda_ids(): + gpus_env = os.getenv("FLAGS_selected_gpus") + if gpus_env: + device_ids = [int(s) for s in gpus_env.split(",")] + else: + device_ids = six.moves.range(core.get_cuda_device_count()) + return device_ids def cuda_places(device_ids=None): @@ -116,11 +133,7 @@ def cuda_places(device_ids=None): assert core.is_compiled_with_cuda(), \ "Not compiled with CUDA" if device_ids is None: - gpus_env = os.getenv("FLAGS_selected_gpus") - if gpus_env: - device_ids = [int(s) for s in gpus_env.split(",")] - else: - device_ids = six.moves.range(core.get_cuda_device_count()) + device_ids = _cuda_ids() elif not isinstance(device_ids, (list, tuple)): device_ids = [device_ids] return [core.CUDAPlace(dev_id) for dev_id in device_ids] diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py index d0eca7d6dfbdf03828125508c798a9bd31f8bbd6..328b3a4813eec261d39985ef80c47d0c827380ca 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py @@ -17,6 +17,8 @@ from paddle.fluid import compiler import unittest import logging import six +import os +os.environ['CPU_NUM'] = str(4) class TestBase(unittest.TestCase): diff --git a/python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py b/python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py index a3701f0808b98b80b62866ffe1250d065361025c..e4fb9b1970a8da4bfec5d48f1182e9552aa77ca8 100644 --- a/python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py +++ b/python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py @@ -22,6 +22,7 @@ import numpy as np import threading import multiprocessing import os +os.environ['CPU_NUM'] = str(4) def as_tensor(np_array_or_tensor, place=None):