diff --git a/paddle/fluid/framework/details/computation_op_handle.h b/paddle/fluid/framework/details/computation_op_handle.h index 5b8b70c5641672f3904f657c9a087dc3156ee525..601ae4f8c6de11b0bf25d4f9a92ef8eada67be3d 100644 --- a/paddle/fluid/framework/details/computation_op_handle.h +++ b/paddle/fluid/framework/details/computation_op_handle.h @@ -17,7 +17,6 @@ #include #include -#include "paddle/fluid/framework/details/container_cast.h" #include "paddle/fluid/framework/details/op_handle_base.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" diff --git a/paddle/fluid/framework/details/multi_devices_graph_pass.cc b/paddle/fluid/framework/details/multi_devices_graph_pass.cc index 5b82805ad9391d82fd9b6cf020658cb0a2801c1d..2ab7da2d57c7a55ec496390b7364c55b56e8d65e 100644 --- a/paddle/fluid/framework/details/multi_devices_graph_pass.cc +++ b/paddle/fluid/framework/details/multi_devices_graph_pass.cc @@ -134,6 +134,7 @@ static const char kParams[] = "params"; static const char kLocalScopes[] = "local_scopes"; static const char kStrategy[] = "strategy"; static const char kNumTrainers[] = "num_trainers"; +static const char kNumLossScaled[] = "num_loss_scaled"; void MultiDevSSAGraphBuilder::Init() const { all_vars_.clear(); diff --git a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc index edb7b5e70ac0a082fac9906bb2fa6bc2064ffde9..f43207908762de1f0ab1e2a55ce06fadb1a67e63 100644 --- a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc @@ -41,10 +41,12 @@ FeedFetchList ScopeBufferedSSAGraphExecutor::Run( Scope &local_scope = scope->NewScope(); *scope->Var(details::kLocalExecScopeName)->GetMutable() = &local_scope; + for (auto &info : var_infos_) { if (scope->FindVar(info.name_) != nullptr) { continue; } + if (info.persistable_) { // Persistable InitializeVariable(scope->Var(info.name_), info.type_); } else { diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h index b45afbc0461d212095f422a71e0eee27572a2f39..24da56c09e3e0f3894d58e5af8838c98e3e1e67c 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h @@ -24,7 +24,6 @@ #include #include "ThreadPool.h" // ThreadPool in thrird party #include "paddle/fluid/framework/blocking_queue.h" -#include "paddle/fluid/framework/details/computation_op_handle.h" #include "paddle/fluid/framework/details/exception_holder.h" #include "paddle/fluid/framework/details/execution_strategy.h" #include "paddle/fluid/framework/details/fetch_op_handle.h" diff --git a/paddle/fluid/framework/details/var_handle.cc b/paddle/fluid/framework/details/var_handle.cc index 7de6025a28a1992786b73d53d456984e0cf418c5..30da029ca2a90e7faa6288557ff2f1aeb21cc1c6 100644 --- a/paddle/fluid/framework/details/var_handle.cc +++ b/paddle/fluid/framework/details/var_handle.cc @@ -20,7 +20,7 @@ namespace details { VarHandleBase::~VarHandleBase() {} -VarHandle::~VarHandle() { VLOG(5) << "deleting var handle " << DebugString(); } +VarHandle::~VarHandle() { VLOG(4) << "deleting var handle " << DebugString(); } std::string VarHandle::DebugString() const { std::stringstream ss; diff --git a/paddle/fluid/framework/ir/node.h b/paddle/fluid/framework/ir/node.h index 10ae3a1c74842ca02002d40dac1c1f54627479c6..d2a393b3f19e9aab79098757dae663d030b0fa2b 100644 --- a/paddle/fluid/framework/ir/node.h +++ b/paddle/fluid/framework/ir/node.h @@ -49,6 +49,7 @@ class Node { public: virtual ~Node() { if (!wrapper_.empty()) { + VLOG(4) << "ir::Node deleting a wrapper node " << Name(); wrapper_deleter_(); } } diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py index eff76ce0d49df52b0219ba920103a3252e6cc026..18d95c94ad36316b7149eb5412260b40a57ac002 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py @@ -17,8 +17,6 @@ import unittest import logging import six -ExecutorType = fluid.ExecutionStrategy().ExecutorType - class TestBase(unittest.TestCase): def main(self, @@ -26,7 +24,7 @@ class TestBase(unittest.TestCase): iter=10, iter_per_pe=10, use_gpu=True, - exec_type=ExecutorType.Default): + use_experimental_executor=False): if use_gpu and not fluid.core.is_compiled_with_cuda(): logging.warning( "Paddle is not compiled with CUDA, skip GPU unittests") @@ -45,7 +43,7 @@ class TestBase(unittest.TestCase): for _ in six.moves.xrange(iter): exe_strategy = fluid.ExecutionStrategy() exe_strategy._dry_run = True - exe_strategy.executor_type = exec_type + exe_strategy.use_experimental_executor = use_experimental_executor pe = fluid.ParallelExecutor( use_cuda=use_gpu, loss_name=loss.name, @@ -58,11 +56,11 @@ class TestBase(unittest.TestCase): class TestMNISTDryRun(TestBase): def test_mnist_dry_run(self): for use_gpu in (False, True): - for exec_type in (ExecutorType.Default, ExecutorType.Experimental): + for use_experimental_executor in (False, True): self.main( network_func=TestMNISTDryRun.network_func, use_gpu=use_gpu, - exec_type=exec_type) + use_experimental_executor=use_experimental_executor) @staticmethod def network_func(): diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py index c8ac6a90c1b795c0f353c77b28ad3abf712a4b2b..7d2349fad4c84923589708f4c70848f535fd61db 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py @@ -79,26 +79,25 @@ class TestMNIST(TestParallelExecutorBase): return img, label = self._init_data() - """ + all_reduce_first_loss, all_reduce_last_loss = self.check_network_convergence( model, feed_dict={"image": img, "label": label}, use_cuda=use_cuda, use_reduce=False) - """ + reduce_first_loss, reduce_last_loss = self.check_network_convergence( model, feed_dict={"image": img, "label": label}, use_cuda=use_cuda, use_reduce=True) - """ + for loss in zip(all_reduce_first_loss, reduce_first_loss): self.assertAlmostEqual(loss[0], loss[1], delta=1e-6) for loss in zip(all_reduce_last_loss, reduce_last_loss): self.assertAlmostEqual(loss[0], loss[1], delta=1e-4) - """ # simple_fc def check_simple_fc_convergence(self, @@ -118,7 +117,7 @@ class TestMNIST(TestParallelExecutorBase): use_reduce=use_reduce, use_parallel_graph=use_parallel_graph) - def notest_simple_fc(self): + def test_simple_fc(self): # use_cuda if core.is_compiled_with_cuda(): self.check_simple_fc_convergence(True) @@ -126,7 +125,7 @@ class TestMNIST(TestParallelExecutorBase): True, use_reduce=False, use_parallel_graph=True) self.check_simple_fc_convergence(False) - def notest_simple_fc_with_new_strategy(self): + def test_simple_fc_with_new_strategy(self): # use_cuda, use_reduce self._compare_reduce_and_allreduce(simple_fc_net, True) self._compare_reduce_and_allreduce(simple_fc_net, False) @@ -163,7 +162,7 @@ class TestMNIST(TestParallelExecutorBase): self.assertAlmostEquals( np.mean(parallel_last_loss), single_last_loss, delta=1e-6) - def notest_simple_fc_parallel_accuracy(self): + def test_simple_fc_parallel_accuracy(self): if core.is_compiled_with_cuda(): self.check_simple_fc_parallel_accuracy(True) self.check_simple_fc_parallel_accuracy( @@ -192,7 +191,9 @@ class TestMNIST(TestParallelExecutorBase): for use_cuda in (False, True): for use_fast_executor in (False, True): self.check_batchnorm_fc_convergence(use_cuda, use_fast_executor) - self.check_batchnorm_fc_convergence(use_cuda, False, True) + + self.check_batchnorm_fc_convergence( + use_cuda=True, use_fast_executor=False, use_parallel_graph=True) def test_batchnorm_fc_with_new_strategy(self): # FIXME(zcd): close this test temporally.