提交 23eb8c42 编写于 作者: Y Yancey1989

fix ci test=develop

上级 106e2852
...@@ -386,12 +386,16 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl( ...@@ -386,12 +386,16 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl(
CreateComputationalOps(&result, node, places_.size()); CreateComputationalOps(&result, node, places_.size());
} }
// if (!is_forwarding && (places_.size() > 1 || num_trainers > 1)) { // insert synchronous ops at the backpropagation; and
// insert synchronous ops at the backpropagation; and // insert synchronous ops if the graph contains mutilple places.
// insert synchronous ops if the graph contains mutilple places.
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
if (!is_forwarding && if (!is_forwarding &&
(places_.size() > 1 || num_trainers > 1 || (places_.size() > 1 || num_trainers > 1 ||
(nccl_ctxs_ && nccl_ctxs_->contexts_.size() > 1))) { (nccl_ctxs_ && nccl_ctxs_->contexts_.size() > 1))) {
#else
if (!is_forwarding && (places_.size() > 1 || num_trainers > 1)) {
#endif
// Currently, we assume that once gradient is generated, it can be // Currently, we assume that once gradient is generated, it can be
// broadcast, and each gradient is only broadcast once. // broadcast, and each gradient is only broadcast once.
if (static_cast<bool>(boost::get<int>(node->Op()->GetAttr( if (static_cast<bool>(boost::get<int>(node->Op()->GetAttr(
......
...@@ -95,7 +95,7 @@ class CTRReader : public framework::FileReader { ...@@ -95,7 +95,7 @@ class CTRReader : public framework::FileReader {
queue_->ReOpen(); queue_->ReOpen();
VLOG(3) << "reopen success"; VLOG(3) << "reopen success";
VLOG(3) << "thread_num " << thread_num_; VLOG(3) << "thread_num " << thread_num_;
for (int thread_id = 0; thread_id < thread_num_; thread_id++) { for (size_t thread_id = 0; thread_id < thread_num_; thread_id++) {
read_threads_.emplace_back(new std::thread( read_threads_.emplace_back(new std::thread(
std::bind(&ReadThread, file_groups_[thread_id], slots_, batch_size_, std::bind(&ReadThread, file_groups_[thread_id], slots_, batch_size_,
thread_id, &read_thread_status_, queue_))); thread_id, &read_thread_status_, queue_)));
......
...@@ -789,7 +789,18 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -789,7 +789,18 @@ All parameter, weight, gradient are variables in Paddle.
[](ExecutionStrategy &self, ExecutionStrategy::ExecutorType type) { [](ExecutionStrategy &self, ExecutionStrategy::ExecutorType type) {
self.type_ = type; self.type_ = type;
}, },
R"DOC()DOC"); R"DOC(The type is ExecutorType which is the enum ranging from Default,
ParallelGraph and Experiment:
Default: Compile the main_program into a multi-devices graph,
and execute this graph on multi-devices with multiple threads which
specified by build_strategy.num_threads.
ParallelGraph: Compile the main_program into multiple graphs, and execute each of the graphs on one
device with one thread. Please note, this mode only supports all-reduce mode and use_cuda=True.
This approach can achieve better performance in some scenarios.
Experimental: Compile the main_program into a multi-devices graph,
and executor this graph with a faster execution mode than the Default,
this approach is on the experiments.)DOC");
py::class_<BuildStrategy> build_strategy(pe, "BuildStrategy", R"DOC( py::class_<BuildStrategy> build_strategy(pe, "BuildStrategy", R"DOC(
BuildStrategy allows the user to more preciously control how to BuildStrategy allows the user to more preciously control how to
......
...@@ -17,6 +17,8 @@ import unittest ...@@ -17,6 +17,8 @@ import unittest
import logging import logging
import six import six
ExecutorType = fluid.ExecutionStrategy().ExecutorType
class TestBase(unittest.TestCase): class TestBase(unittest.TestCase):
def main(self, def main(self,
...@@ -24,7 +26,7 @@ class TestBase(unittest.TestCase): ...@@ -24,7 +26,7 @@ class TestBase(unittest.TestCase):
iter=10, iter=10,
iter_per_pe=10, iter_per_pe=10,
use_gpu=True, use_gpu=True,
use_experimental_executor=False): exec_type=ExecutorType.Default):
if use_gpu and not fluid.core.is_compiled_with_cuda(): if use_gpu and not fluid.core.is_compiled_with_cuda():
logging.warning( logging.warning(
"Paddle is not compiled with CUDA, skip GPU unittests") "Paddle is not compiled with CUDA, skip GPU unittests")
...@@ -43,7 +45,7 @@ class TestBase(unittest.TestCase): ...@@ -43,7 +45,7 @@ class TestBase(unittest.TestCase):
for _ in six.moves.xrange(iter): for _ in six.moves.xrange(iter):
exe_strategy = fluid.ExecutionStrategy() exe_strategy = fluid.ExecutionStrategy()
exe_strategy._dry_run = True exe_strategy._dry_run = True
exe_strategy.use_experimental_executor = use_experimental_executor exe_strategy.executor_type = exec_type
pe = fluid.ParallelExecutor( pe = fluid.ParallelExecutor(
use_cuda=use_gpu, use_cuda=use_gpu,
loss_name=loss.name, loss_name=loss.name,
...@@ -56,11 +58,11 @@ class TestBase(unittest.TestCase): ...@@ -56,11 +58,11 @@ class TestBase(unittest.TestCase):
class TestMNISTDryRun(TestBase): class TestMNISTDryRun(TestBase):
def test_mnist_dry_run(self): def test_mnist_dry_run(self):
for use_gpu in (False, True): for use_gpu in (False, True):
for use_experimental_executor in (False, True): for exec_type in (ExecutorType.Default, ExecutorType.Experimental):
self.main( self.main(
network_func=TestMNISTDryRun.network_func, network_func=TestMNISTDryRun.network_func,
use_gpu=use_gpu, use_gpu=use_gpu,
use_experimental_executor=use_experimental_executor) exec_type=exec_type)
@staticmethod @staticmethod
def network_func(): def network_func():
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册