From 0023c3bcf52c7bde221a32fb898f52a9aac635c2 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 16:29:41 +0800 Subject: [PATCH] Use atomic bool --- paddle/fluid/framework/parallel_executor.cc | 6 +++--- paddle/fluid/framework/parallel_executor.h | 5 +++-- paddle/fluid/platform/profiler_test.cc | 9 +++++++++ 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 178243092..c8dd3f915 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -633,7 +633,7 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, auto fetched_data = std::make_shared(fetch_tensors.size()); // Version --> VarHandle member_->exception_.reset(); - std::unordered_map pending_vars; + std::unordered_map> pending_vars; std::unordered_map pending_ops; for (auto &place_pair : member_->vars_) { @@ -737,9 +737,9 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, } void ParallelExecutor::RunOp( - std::unordered_map &pending_vars, + std::unordered_map> &pending_vars, OpHandle *op) const { - std::vector ready_buffer; + std::vector *> ready_buffer; for (auto *var : op->outputs_) { ready_buffer.emplace_back(&pending_vars[var]); } diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index e4857f0ee..c3cebcfc5 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -60,8 +60,9 @@ class ParallelExecutor { void BuildNCCLCommunicator() const; - void RunOp(std::unordered_map& pending_vars, - OpHandle* op) const; + void RunOp( + std::unordered_map>& pending_vars, + OpHandle* op) const; void PolishGraphToSupportDataHarzaeds() const; }; diff --git a/paddle/fluid/platform/profiler_test.cc b/paddle/fluid/platform/profiler_test.cc index fc77e0f32..366c82bf9 100644 --- a/paddle/fluid/platform/profiler_test.cc +++ b/paddle/fluid/platform/profiler_test.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/profiler.h" +#include "cuda_runtime.h" #include "gtest/gtest.h" TEST(Event, CpuElapsedTime) { @@ -157,3 +158,11 @@ TEST(RecordEvent, RecordEvent) { // Will remove parsing-related code from test later DisableProfiler(EventSortingKey::kTotal, "/tmp/profiler"); } + +TEST(TMP, stream_wait) { + cudaStream_t stream; + cudaStreamCreate(&stream); + cudaStreamSynchronize(stream); + cudaStreamSynchronize(stream); + cudaStreamSynchronize(stream); +} -- GitLab