diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 78963fd5684e5083d03690c1a42d6cdd0ed3f6f2..dc17f6a21fab23e77de30f473afd128b8748c828 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -27,15 +27,16 @@ namespace framework { class ParallelExecutorPrivate { public: explicit ParallelExecutorPrivate(const std::vector &places) - : places_(places), fetch_dev_ctxs_(places) {} + : places_(places) {} std::vector places_; - platform::DeviceContextPool fetch_dev_ctxs_; std::vector local_scopes_; Scope *global_scope_; + std::unique_ptr executor_; +#ifdef PADDLE_WITH_CUDA std::unique_ptr nccl_ctxs_; - std::unique_ptr executor_; +#endif }; ParallelExecutor::ParallelExecutor( @@ -54,8 +55,10 @@ ParallelExecutor::ParallelExecutor( member_->local_scopes_.push_back(&scope->NewScope()); } - // Bcast Parameters to all GPUs - BuildNCCLCommunicator(); +// Bcast Parameters to all GPUs +#ifdef PADDLE_WITH_CUDA + member_->nccl_ctxs_.reset(new platform::NCCLContextMap(member_->places_)); +#endif if (platform::is_gpu_place(places[0]) && member_->local_scopes_.size() != 1) { // Is CUDA BCastParamsToGPUs(startup_program); @@ -123,12 +126,6 @@ void ParallelExecutor::BCastParamsToGPUs( #endif } -void ParallelExecutor::BuildNCCLCommunicator() const { -#ifdef PADDLE_WITH_CUDA - member_->nccl_ctxs_.reset(new platform::NCCLContextMap(member_->places_)); -#endif -} - void ParallelExecutor::Run(const std::vector &fetch_tensors, const std::string &fetched_var_name) { auto fetch_data = member_->executor_->Run(fetch_tensors); diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index 39a1c51b9e76ee8d1244e44340f39868ebc4bb74..14489a18c3afb67e663ffe568df54375bbfa0843 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -31,6 +31,8 @@ namespace framework { class ParallelExecutorPrivate; class ParallelExecutor { + DISABLE_COPY_AND_ASSIGN(ParallelExecutor); + public: explicit ParallelExecutor(size_t num_threads, const std::vector& places, @@ -46,8 +48,6 @@ class ParallelExecutor { ParallelExecutorPrivate* member_; void BCastParamsToGPUs(const ProgramDesc& startup_program) const; - - void BuildNCCLCommunicator() const; }; } // namespace framework