From 8149a07a418029dcb87280e74a598d8c719e7789 Mon Sep 17 00:00:00 2001 From: minqiyang Date: Fri, 21 Dec 2018 17:11:42 +0800 Subject: [PATCH] Fix wait stream two times bug test=develop --- paddle/fluid/framework/details/execution_strategy.h | 2 +- .../details/scope_buffered_ssa_graph_executor.cc | 12 +++++------- .../details/scope_buffered_ssa_graph_executor.h | 8 ++++++++ paddle/fluid/pybind/pybind.cc | 2 +- 4 files changed, 15 insertions(+), 9 deletions(-) diff --git a/paddle/fluid/framework/details/execution_strategy.h b/paddle/fluid/framework/details/execution_strategy.h index 37b07e5736..15c496130c 100644 --- a/paddle/fluid/framework/details/execution_strategy.h +++ b/paddle/fluid/framework/details/execution_strategy.h @@ -25,7 +25,7 @@ struct ExecutionStrategy { size_t num_threads_{0}; bool use_cuda_{true}; bool allow_op_delay_{false}; - size_t num_iteration_per_drop_scope_{1}; + size_t num_iteration_per_drop_scope_{100}; ExecutorType type_{kDefault}; bool dry_run_{false}; }; diff --git a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc index 22bf0d308b..00b8136dc2 100644 --- a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc @@ -66,17 +66,15 @@ FeedFetchList ScopeBufferedSSAGraphExecutor::Run( platform::RecordEvent e("ScopeBufferedSSAGraphExecutorAfterRun", nullptr); ++drop_scope_counter_; + bool stream_end = false; if (!fetch_tensors.empty()) { - // Wait All computational streams - for (auto p : places_) { - platform::DeviceContextPool::Instance().Get(p)->Wait(); - } + WaitComputationalStreams(); + stream_end = true; } if (drop_scope_counter_ == strategy_.num_iteration_per_drop_scope_) { - // Wait All computational streams - for (auto p : places_) { - platform::DeviceContextPool::Instance().Get(p)->Wait(); + if (!stream_end) { + WaitComputationalStreams(); } for (auto &scope : local_scopes_) { diff --git a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h index 5e87e0bf50..0f6340213d 100644 --- a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h @@ -47,6 +47,14 @@ class ScopeBufferedSSAGraphExecutor : public SSAGraphExecutor { FeedFetchList Run(const std::vector& fetch_tensors) override; + private: + inline void WaitComputationalStreams() { + // Wait All computational streams + for (auto p : places_) { + platform::DeviceContextPool::Instance().Get(p)->Wait(); + } + } + private: size_t drop_scope_counter_{0}; diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index a63c71aad2..d590c3a3c6 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -815,7 +815,7 @@ All parameter, weight, gradient are variables in Paddle. R"DOC(The type is INT, num_iteration_per_drop_scope indicates how many iterations to clean up the temp variables which is generated during execution. It may make the execution faster, - because the temp variable's shape maybe the same between two iterations. Default 1. + because the temp variable's shape maybe the same between two iterations. Default 100. NOTES: 1. If you fetch data when calling the 'run', the ParallelExecutor -- GitLab