diff --git a/paddle/fluid/framework/details/var_handle.h b/paddle/fluid/framework/details/var_handle.h index d8c2bc40b9458a1d5a7dd8a32277d04f69295f09..a1f458c660ce9f73bc9ac2ed194091ad0b8f8400 100644 --- a/paddle/fluid/framework/details/var_handle.h +++ b/paddle/fluid/framework/details/var_handle.h @@ -49,6 +49,8 @@ struct VarHandleBase { void AddOutput(OpHandleBase* out, ir::Node* node) { if (pending_ops_.find(out) == pending_ops_.end()) { + PADDLE_ENFORCE(out != nullptr, "The output of %s should not be nullptr", + this->Node()->Name()); pending_ops_.insert(out); node_->outputs.push_back(node); } diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index e8adabd26540754d5b9206294eeeed79757220bf..093108cb54779eb0cf35dd83e63eb0b1abb66dcd 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -299,6 +299,12 @@ void ParallelExecutor::FeedAndSplitTensorIntoLocalScopes( } ParallelExecutor::~ParallelExecutor() { + const auto dev_ctxs = + platform::DeviceContextPool::Instance().GetAllDeviceContexts(); + for (auto &dev_ctx : dev_ctxs) { + dev_ctx->Wait(); + } + if (member_->own_local_scope_) { for (size_t i = 1; i < member_->local_scopes_.size(); ++i) { Scope *local_scope = member_->local_scopes_[i]; diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 4286242b2a93d7046e7349a99d1d1a09dca09113..7d1cf57253819b34fedfb292ad1635650f53f20f 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -35,6 +35,16 @@ platform::DeviceContext* DeviceContextPool::Get(const platform::Place& place) { return it->second.get(); } +const std::vector +DeviceContextPool::GetAllDeviceContexts() const { + std::vector all_device_ctx; + all_device_ctx.reserve(device_contexts_.size()); + for (auto& dev_ctx : device_contexts_) { + all_device_ctx.emplace_back(dev_ctx.second.get()); + } + return all_device_ctx; +} + DeviceContextPool::DeviceContextPool( const std::vector& places) { PADDLE_ENFORCE_GT(places.size(), 0); diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index e1ff1a1746952de5aa4bead361b50af4e99bc9bc..999bbe00f1659881050cb0dc89570b74b201aca7 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -217,6 +217,9 @@ class DeviceContextPool { /*! \brief Return handle of single device context. */ platform::DeviceContext* Get(const platform::Place& place); + /*! \brief Return all the device contexts. */ + const std::vector GetAllDeviceContexts() const; + template const typename DefaultDeviceContextType::TYPE* GetByPlace( const Place& place) {