From 23eb8c4299ce9908d07505df413c4a2b79f14d32 Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Thu, 13 Dec 2018 14:02:15 +0800 Subject: [PATCH] fix ci test=develop --- .../framework/details/multi_devices_graph_pass.cc | 10 +++++++--- paddle/fluid/operators/reader/ctr_reader.h | 2 +- paddle/fluid/pybind/pybind.cc | 13 ++++++++++++- .../unittests/test_parallel_executor_dry_run.py | 10 ++++++---- 4 files changed, 26 insertions(+), 9 deletions(-) diff --git a/paddle/fluid/framework/details/multi_devices_graph_pass.cc b/paddle/fluid/framework/details/multi_devices_graph_pass.cc index e264906b57f..6c4e0e9168a 100644 --- a/paddle/fluid/framework/details/multi_devices_graph_pass.cc +++ b/paddle/fluid/framework/details/multi_devices_graph_pass.cc @@ -386,12 +386,16 @@ std::unique_ptr MultiDevSSAGraphBuilder::ApplyImpl( CreateComputationalOps(&result, node, places_.size()); } - // if (!is_forwarding && (places_.size() > 1 || num_trainers > 1)) { - // insert synchronous ops at the backpropagation; and - // insert synchronous ops if the graph contains mutilple places. +// insert synchronous ops at the backpropagation; and +// insert synchronous ops if the graph contains mutilple places. + +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) if (!is_forwarding && (places_.size() > 1 || num_trainers > 1 || (nccl_ctxs_ && nccl_ctxs_->contexts_.size() > 1))) { +#else + if (!is_forwarding && (places_.size() > 1 || num_trainers > 1)) { +#endif // Currently, we assume that once gradient is generated, it can be // broadcast, and each gradient is only broadcast once. if (static_cast(boost::get(node->Op()->GetAttr( diff --git a/paddle/fluid/operators/reader/ctr_reader.h b/paddle/fluid/operators/reader/ctr_reader.h index 517d6697443..635483158fc 100644 --- a/paddle/fluid/operators/reader/ctr_reader.h +++ b/paddle/fluid/operators/reader/ctr_reader.h @@ -95,7 +95,7 @@ class CTRReader : public framework::FileReader { queue_->ReOpen(); VLOG(3) << "reopen success"; VLOG(3) << "thread_num " << thread_num_; - for (int thread_id = 0; thread_id < thread_num_; thread_id++) { + for (size_t thread_id = 0; thread_id < thread_num_; thread_id++) { read_threads_.emplace_back(new std::thread( std::bind(&ReadThread, file_groups_[thread_id], slots_, batch_size_, thread_id, &read_thread_status_, queue_))); diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 9cebdda6938..3beb93e7b3e 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -789,7 +789,18 @@ All parameter, weight, gradient are variables in Paddle. [](ExecutionStrategy &self, ExecutionStrategy::ExecutorType type) { self.type_ = type; }, - R"DOC()DOC"); + R"DOC(The type is ExecutorType which is the enum ranging from Default, +ParallelGraph and Experiment: + +Default: Compile the main_program into a multi-devices graph, + and execute this graph on multi-devices with multiple threads which + specified by build_strategy.num_threads. +ParallelGraph: Compile the main_program into multiple graphs, and execute each of the graphs on one + device with one thread. Please note, this mode only supports all-reduce mode and use_cuda=True. + This approach can achieve better performance in some scenarios. +Experimental: Compile the main_program into a multi-devices graph, + and executor this graph with a faster execution mode than the Default, + this approach is on the experiments.)DOC"); py::class_ build_strategy(pe, "BuildStrategy", R"DOC( BuildStrategy allows the user to more preciously control how to diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py index 18d95c94ad3..eff76ce0d49 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py @@ -17,6 +17,8 @@ import unittest import logging import six +ExecutorType = fluid.ExecutionStrategy().ExecutorType + class TestBase(unittest.TestCase): def main(self, @@ -24,7 +26,7 @@ class TestBase(unittest.TestCase): iter=10, iter_per_pe=10, use_gpu=True, - use_experimental_executor=False): + exec_type=ExecutorType.Default): if use_gpu and not fluid.core.is_compiled_with_cuda(): logging.warning( "Paddle is not compiled with CUDA, skip GPU unittests") @@ -43,7 +45,7 @@ class TestBase(unittest.TestCase): for _ in six.moves.xrange(iter): exe_strategy = fluid.ExecutionStrategy() exe_strategy._dry_run = True - exe_strategy.use_experimental_executor = use_experimental_executor + exe_strategy.executor_type = exec_type pe = fluid.ParallelExecutor( use_cuda=use_gpu, loss_name=loss.name, @@ -56,11 +58,11 @@ class TestBase(unittest.TestCase): class TestMNISTDryRun(TestBase): def test_mnist_dry_run(self): for use_gpu in (False, True): - for use_experimental_executor in (False, True): + for exec_type in (ExecutorType.Default, ExecutorType.Experimental): self.main( network_func=TestMNISTDryRun.network_func, use_gpu=use_gpu, - use_experimental_executor=use_experimental_executor) + exec_type=exec_type) @staticmethod def network_func(): -- GitLab