提交 23eb8c42 编写于 作者: Y Yancey1989

fix ci test=develop

上级 106e2852
......@@ -386,12 +386,16 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl(
CreateComputationalOps(&result, node, places_.size());
}
// if (!is_forwarding && (places_.size() > 1 || num_trainers > 1)) {
// insert synchronous ops at the backpropagation; and
// insert synchronous ops if the graph contains mutilple places.
// insert synchronous ops at the backpropagation; and
// insert synchronous ops if the graph contains mutilple places.
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
if (!is_forwarding &&
(places_.size() > 1 || num_trainers > 1 ||
(nccl_ctxs_ && nccl_ctxs_->contexts_.size() > 1))) {
#else
if (!is_forwarding && (places_.size() > 1 || num_trainers > 1)) {
#endif
// Currently, we assume that once gradient is generated, it can be
// broadcast, and each gradient is only broadcast once.
if (static_cast<bool>(boost::get<int>(node->Op()->GetAttr(
......
......@@ -95,7 +95,7 @@ class CTRReader : public framework::FileReader {
queue_->ReOpen();
VLOG(3) << "reopen success";
VLOG(3) << "thread_num " << thread_num_;
for (int thread_id = 0; thread_id < thread_num_; thread_id++) {
for (size_t thread_id = 0; thread_id < thread_num_; thread_id++) {
read_threads_.emplace_back(new std::thread(
std::bind(&ReadThread, file_groups_[thread_id], slots_, batch_size_,
thread_id, &read_thread_status_, queue_)));
......
......@@ -789,7 +789,18 @@ All parameter, weight, gradient are variables in Paddle.
[](ExecutionStrategy &self, ExecutionStrategy::ExecutorType type) {
self.type_ = type;
},
R"DOC()DOC");
R"DOC(The type is ExecutorType which is the enum ranging from Default,
ParallelGraph and Experiment:
Default: Compile the main_program into a multi-devices graph,
and execute this graph on multi-devices with multiple threads which
specified by build_strategy.num_threads.
ParallelGraph: Compile the main_program into multiple graphs, and execute each of the graphs on one
device with one thread. Please note, this mode only supports all-reduce mode and use_cuda=True.
This approach can achieve better performance in some scenarios.
Experimental: Compile the main_program into a multi-devices graph,
and executor this graph with a faster execution mode than the Default,
this approach is on the experiments.)DOC");
py::class_<BuildStrategy> build_strategy(pe, "BuildStrategy", R"DOC(
BuildStrategy allows the user to more preciously control how to
......
......@@ -17,6 +17,8 @@ import unittest
import logging
import six
ExecutorType = fluid.ExecutionStrategy().ExecutorType
class TestBase(unittest.TestCase):
def main(self,
......@@ -24,7 +26,7 @@ class TestBase(unittest.TestCase):
iter=10,
iter_per_pe=10,
use_gpu=True,
use_experimental_executor=False):
exec_type=ExecutorType.Default):
if use_gpu and not fluid.core.is_compiled_with_cuda():
logging.warning(
"Paddle is not compiled with CUDA, skip GPU unittests")
......@@ -43,7 +45,7 @@ class TestBase(unittest.TestCase):
for _ in six.moves.xrange(iter):
exe_strategy = fluid.ExecutionStrategy()
exe_strategy._dry_run = True
exe_strategy.use_experimental_executor = use_experimental_executor
exe_strategy.executor_type = exec_type
pe = fluid.ParallelExecutor(
use_cuda=use_gpu,
loss_name=loss.name,
......@@ -56,11 +58,11 @@ class TestBase(unittest.TestCase):
class TestMNISTDryRun(TestBase):
def test_mnist_dry_run(self):
for use_gpu in (False, True):
for use_experimental_executor in (False, True):
for exec_type in (ExecutorType.Default, ExecutorType.Experimental):
self.main(
network_func=TestMNISTDryRun.network_func,
use_gpu=use_gpu,
use_experimental_executor=use_experimental_executor)
exec_type=exec_type)
@staticmethod
def network_func():
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册