diff --git a/python/paddle/dataset/flowers.py b/python/paddle/dataset/flowers.py index f082e33be3357fbe405ab1a1ef5e0e601108a363..0d1eaeed0971e514fc4368e2f58ba844bd8118ae 100644 --- a/python/paddle/dataset/flowers.py +++ b/python/paddle/dataset/flowers.py @@ -119,7 +119,7 @@ def reader_creator(data_file, yield sample, int(label) - 1 if use_xmap: - return xmap_readers(mapper, reader, cpu_count(), buffered_size) + return xmap_readers(mapper, reader, min(4, cpu_count()), buffered_size) else: return map_readers(mapper, reader) diff --git a/python/paddle/fluid/data_feeder.py b/python/paddle/fluid/data_feeder.py index 7940dabcfb03cc9eb46f678365685a6e99bcceec..0fd696510e5dcf57b95e92e430feb27a8aedd3f8 100644 --- a/python/paddle/fluid/data_feeder.py +++ b/python/paddle/fluid/data_feeder.py @@ -150,7 +150,7 @@ class DataFeeder(object): elif isinstance(self.place, core.CUDAPlace): return core.get_cuda_device_count() else: - return multiprocessing.cpu_count() + return min(4, multiprocessing.cpu_count()) def decorate_reader(self, reader, diff --git a/python/paddle/fluid/parallel_executor.py b/python/paddle/fluid/parallel_executor.py index 3117dfe00c7a3df1035c439dc31b81e67781d0cc..70437399d72c25043d1ed929c1dad1190d55cb83 100644 --- a/python/paddle/fluid/parallel_executor.py +++ b/python/paddle/fluid/parallel_executor.py @@ -101,7 +101,7 @@ class ParallelExecutor(object): p.set_place(self._act_places[-1]) self._places.append(p) else: - for i in xrange(multiprocessing.cpu_count()): + for i in xrange(min(4, multiprocessing.cpu_count())): p = core.Place() self._act_places.append(core.CPUPlace()) p.set_place(self._act_places[-1]) @@ -110,10 +110,7 @@ class ParallelExecutor(object): if exec_strategy is None: exec_strategy = ExecutionStrategy() - if use_cuda: - exec_strategy.use_event = True - else: - exec_strategy.use_event = False + exec_strategy.use_event = use_cuda if exec_strategy.num_threads == 0: if use_cuda: diff --git a/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py b/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py index c9c3c648717814c28c39a401487925824e885946..566b676777cc329dce02f1875abf0d72176c1c00 100644 --- a/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py +++ b/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py @@ -23,6 +23,7 @@ __all__ = ['TestParallelExecutorBase'] class TestParallelExecutorBase(unittest.TestCase): def check_network_convergence(self, method, + use_cuda=True, memory_opt=True, iter=50, batch_size=None, @@ -53,7 +54,7 @@ class TestParallelExecutorBase(unittest.TestCase): adam.minimize(loss) if memory_opt: fluid.memory_optimize(main) - place = fluid.CUDAPlace(0) + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() startup_exe = fluid.Executor(place) startup_exe.run(startup) exec_strategy = fluid.ExecutionStrategy() @@ -64,7 +65,7 @@ class TestParallelExecutorBase(unittest.TestCase): if use_parallel_executor: exe = fluid.ParallelExecutor( - True, + use_cuda, loss_name=loss.name, exec_strategy=exec_strategy, build_strategy=build_strategy) diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py index 015703c3e25f4e11e64ab6a7de99da12bee608f6..52dfb9620f87306c18eb5149d49ab534e4db410c 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py @@ -99,7 +99,9 @@ class TestMNIST(TestParallelExecutorBase): fluid.recordio_writer.convert_reader_to_recordio_file( MNIST_RECORDIO_FILE, reader, feeder) - def check_simple_fc_convergence(self, balance_parameter_opt_between_cards): + def check_simple_fc_convergence(self, + balance_parameter_opt_between_cards, + use_cuda=True): self.check_network_convergence(simple_fc_net) self.check_network_convergence(simple_fc_net, allow_op_delay=True) @@ -109,17 +111,19 @@ class TestMNIST(TestParallelExecutorBase): simple_fc_net, feed_dict={"image": img, "label": label}, + use_cuda=use_cuda, balance_parameter_opt_between_cards=balance_parameter_opt_between_cards ) def test_simple_fc(self): - self.check_simple_fc_convergence(False) + self.check_simple_fc_convergence(False, use_cuda=True) def test_simple_fc_with_new_strategy(self): - self.check_simple_fc_convergence(True) + self.check_simple_fc_convergence(True, use_cuda=True) def check_simple_fc_parallel_accuracy(self, - balance_parameter_opt_between_cards): + balance_parameter_opt_between_cards, + use_cuda=True): img = np.zeros(shape=[32, 784], dtype='float32') label = np.ones(shape=[32, 1], dtype='int64') single_first_loss, single_last_loss = self.check_network_convergence( @@ -127,6 +131,7 @@ class TestMNIST(TestParallelExecutorBase): seed=1000, feed_dict={"image": img, "label": label}, + use_cuda=use_cuda, use_parallel_executor=False) parallel_first_loss, parallel_last_loss = self.check_network_convergence( method=simple_fc_net, @@ -143,13 +148,15 @@ class TestMNIST(TestParallelExecutorBase): self.assertAlmostEquals(p_l, single_last_loss[0], delta=1e-6) def test_simple_fc_parallel_accuracy(self): - self.check_simple_fc_parallel_accuracy(False) + self.check_simple_fc_parallel_accuracy(False, use_cuda=True) + self.check_simple_fc_parallel_accuracy(False, use_cuda=False) def test_simple_fc_parallel_accuracy_with_new_strategy(self): - self.check_simple_fc_parallel_accuracy(True) + self.check_simple_fc_parallel_accuracy(True, use_cuda=True) + self.check_simple_fc_parallel_accuracy(True, use_cuda=False) - def check_batchnorm_fc_convergence(self, - balance_parameter_opt_between_cards): + def check_batchnorm_fc_convergence( + self, balance_parameter_opt_between_cards, use_cuda): self.check_network_convergence(fc_with_batchnorm) img = np.zeros(shape=[32, 784], dtype='float32') label = np.ones(shape=[32, 1], dtype='int64') @@ -157,14 +164,17 @@ class TestMNIST(TestParallelExecutorBase): fc_with_batchnorm, feed_dict={"image": img, "label": label}, + use_cuda=use_cuda, balance_parameter_opt_between_cards=balance_parameter_opt_between_cards ) def test_batchnorm_fc(self): - self.check_batchnorm_fc_convergence(False) + self.check_batchnorm_fc_convergence(False, use_cuda=True) + self.check_batchnorm_fc_convergence(False, use_cuda=False) def test_batchnorm_fc_with_new_strategy(self): - self.check_batchnorm_fc_convergence(True) + self.check_batchnorm_fc_convergence(True, use_cuda=True) + self.check_batchnorm_fc_convergence(True, use_cuda=False) if __name__ == '__main__': diff --git a/python/paddle/v2/dataset/flowers.py b/python/paddle/v2/dataset/flowers.py index 7bdddeaabec733ef26b3f766c6437f5c53d65044..6c2c0d5cc50dad04123bf864488457fc91036ffc 100644 --- a/python/paddle/v2/dataset/flowers.py +++ b/python/paddle/v2/dataset/flowers.py @@ -119,7 +119,7 @@ def reader_creator(data_file, yield sample, int(label) - 1 if use_xmap: - return xmap_readers(mapper, reader, cpu_count(), buffered_size) + return xmap_readers(mapper, reader, min(4, cpu_count()), buffered_size) else: return map_readers(mapper, reader)