diff --git a/paddle/fluid/framework/fleet/CMakeLists.txt b/paddle/fluid/framework/fleet/CMakeLists.txt index 7d363d1afdc8ac72741e6e4fea02fb96fe9347fa..12fc454fd262cdcf30f64757a6199c6a9331e1a2 100644 --- a/paddle/fluid/framework/fleet/CMakeLists.txt +++ b/paddle/fluid/framework/fleet/CMakeLists.txt @@ -3,3 +3,5 @@ if(WITH_PSLIB) else() cc_library(fleet_wrapper SRCS fleet_wrapper.cc DEPS framework_proto variable_helper scope) endif(WITH_PSLIB) + +cc_library(nccl_wrapper SRCS nccl_wrapper.cc DEPS framework_proto variable_helper scope) diff --git a/paddle/fluid/framework/fleet/nccl_wrapper.cc b/paddle/fluid/framework/fleet/nccl_wrapper.cc new file mode 100644 index 0000000000000000000000000000000000000000..051f4b013c6eeb55f733910c14cae178f2f8b416 --- /dev/null +++ b/paddle/fluid/framework/fleet/nccl_wrapper.cc @@ -0,0 +1,78 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/fleet/nccl_wrapper.h" +#include +#include "paddle/fluid/framework/data_feed.h" +#include "paddle/fluid/framework/scope.h" + +namespace paddle { +namespace framework { + +std::shared_ptr NCCLWrapper::s_instance_ = NULL; +bool NCCLWrapper::is_initialized_ = false; + +void NCCLWrapper::InitNCCL() { +#ifdef PADDLE_WITH_CUDA + PADDLE_ENFORCE(platform::dynload::ncclCommInitRank( + &(nccl_info_.comm_), nccl_info_.global_ranks_, nccl_info_.nccl_id_, + nccl_info_.my_global_rank_)); +#endif + return; +} + +void NCCLWrapper::SetNCCLId(const NCCLInfo& nccl_info) { +#ifdef PADDLE_WITH_CUDA + nccl_info_.nccl_id_ = nccl_info.nccl_id_; +#endif + return; +} + +NCCLInfo NCCLWrapper::GetNCCLId() { +#ifdef PADDLE_WITH_CUDA + PADDLE_ENFORCE(platform::dynload::ncclGetUniqueId(&(nccl_info_.nccl_id_))); +#endif + return nccl_info_; +} + +void NCCLWrapper::SetRankInfo(const int local_rank, const int global_rank, + const int ranks) { +#ifdef PADDLE_WITH_CUDA + nccl_info_.local_rank_ = local_rank; + nccl_info_.my_global_rank_ = global_rank; + nccl_info_.global_ranks_ = ranks; + PADDLE_ENFORCE(cudaSetDevice(local_rank)); + PADDLE_ENFORCE(cudaStreamCreate(&(nccl_info_.stream_))); +#endif + return; +} + +void NCCLWrapper::SyncVar(const int root_rank, const Scope& scope, + const std::vector& var_names) { +#ifdef PADDLE_WITH_CUDA + for (auto& name : var_names) { + auto var = scope.FindVar(name); + LoDTensor* tensor = var->GetMutable(); + int32_t total_size = tensor->numel(); + PADDLE_ENFORCE(platform::dynload::ncclBcast( + reinterpret_cast(tensor->data()), total_size, ncclFloat, + root_rank, nccl_info_.comm_, nccl_info_.stream_)); + cudaStreamSynchronize(nccl_info_.stream_); + } +#endif + return; +} + +} // end namespace framework +} // end namespace paddle diff --git a/paddle/fluid/framework/fleet/nccl_wrapper.h b/paddle/fluid/framework/fleet/nccl_wrapper.h new file mode 100644 index 0000000000000000000000000000000000000000..f29aa225419f4f62c302c8a93c8a9b89c29d15f5 --- /dev/null +++ b/paddle/fluid/framework/fleet/nccl_wrapper.h @@ -0,0 +1,83 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/framework/variable_helper.h" +#ifdef PADDLE_WITH_CUDA +#include "paddle/fluid/platform/dynload/nccl.h" +#endif +#include "paddle/fluid/platform/macros.h" // for DISABLE_COPY_AND_ASSIGN + +namespace paddle { +namespace framework { + +class NCCLInfo { + public: + NCCLInfo() {} + virtual ~NCCLInfo() {} + + public: + int local_rank_; + int global_ranks_; + int my_global_rank_; +#ifdef PADDLE_WITH_CUDA + ncclUniqueId nccl_id_; + ncclComm_t comm_; + cudaStream_t stream_; +#endif +}; + +class NCCLWrapper { + public: + virtual ~NCCLWrapper() {} + NCCLWrapper() {} + + void InitNCCL(); + void SetNCCLId(const NCCLInfo& nccl_info); + NCCLInfo GetNCCLId(); + void SetRankInfo(const int local_rank, const int global_rank, + const int ranks); + void SyncVar(const int root_rank, const Scope& scope, + const std::vector& var_names); + + static std::shared_ptr GetInstance() { + if (NULL == s_instance_) { + s_instance_.reset(new paddle::framework::NCCLWrapper()); + } + return s_instance_; + } + + public: + NCCLInfo nccl_info_; + + private: + static std::shared_ptr s_instance_; + + protected: + static bool is_initialized_; + DISABLE_COPY_AND_ASSIGN(NCCLWrapper); +}; + +} // end namespace framework +} // end namespace paddle diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index 16365c1fd0b0adb914cdfd08e3f6542fca952e06..900c1a0ca690378ab89568e7de503c660cd85b79 100644 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -1,11 +1,11 @@ -set(PYBIND_DEPS pybind python proto_desc memory executor async_executor fleet_wrapper prune +set(PYBIND_DEPS pybind python proto_desc memory executor async_executor fleet_wrapper nccl_wrapper prune feed_fetch_method pass_builder parallel_executor profiler layer scope_pool tracer analysis_predictor imperative_profiler nccl_context) if(WITH_PYTHON) list(APPEND PYBIND_DEPS py_func_op) endif() -set(PYBIND_SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc reader_py.cc async_executor_py.cc fleet_wrapper_py.cc data_set_py.cc imperative.cc ir.cc inference_api.cc) +set(PYBIND_SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc reader_py.cc async_executor_py.cc fleet_wrapper_py.cc nccl_wrapper_py.cc data_set_py.cc imperative.cc ir.cc inference_api.cc) if(WITH_PYTHON) if(WITH_AMD_GPU) diff --git a/paddle/fluid/pybind/fleet_wrapper_py.cc b/paddle/fluid/pybind/fleet_wrapper_py.cc index 77f15db8d68da131c892b1a65946c1994b90fd04..2f6a7d2480aedd5bd37d0dbd5ccf64447e4a21ff 100644 --- a/paddle/fluid/pybind/fleet_wrapper_py.cc +++ b/paddle/fluid/pybind/fleet_wrapper_py.cc @@ -36,7 +36,6 @@ limitations under the License. */ #include "paddle/fluid/pybind/fleet_wrapper_py.h" namespace py = pybind11; -namespace pd = paddle::framework; namespace paddle { namespace pybind { diff --git a/paddle/fluid/pybind/nccl_wrapper_py.cc b/paddle/fluid/pybind/nccl_wrapper_py.cc new file mode 100644 index 0000000000000000000000000000000000000000..bbba03f6660fe9ddb14764709ea81a9a82b1b386 --- /dev/null +++ b/paddle/fluid/pybind/nccl_wrapper_py.cc @@ -0,0 +1,53 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#include + +#ifdef _POSIX_C_SOURCE +#undef _POSIX_C_SOURCE +#endif + +#ifdef _XOPEN_SOURCE +#undef _XOPEN_SOURCE +#endif + +#include +#include + +#include "google/protobuf/io/zero_copy_stream_impl.h" +#include "google/protobuf/text_format.h" +#include "paddle/fluid/framework/async_executor.h" +#include "paddle/fluid/framework/data_feed.h" +#include "paddle/fluid/framework/data_feed.pb.h" +#include "paddle/fluid/framework/fleet/nccl_wrapper.h" +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/inference/io.h" +#include "paddle/fluid/platform/place.h" +#include "paddle/fluid/platform/variant.h" +#include "paddle/fluid/pybind/nccl_wrapper_py.h" + +namespace py = pybind11; +namespace pd = paddle::framework; + +namespace paddle { +namespace pybind { +void BindNCCLWrapper(py::module* m) { + py::class_(*m, "Nccl") + .def(py::init()) + .def("init_nccl", &framework::NCCLWrapper::InitNCCL) + .def("set_nccl_id", &framework::NCCLWrapper::SetNCCLId) + .def("set_rank_info", &framework::NCCLWrapper::SetRankInfo) + .def("sync_var", &framework::NCCLWrapper::SyncVar); +} // end NCCLWrapper +} // end namespace pybind +} // end namespace paddle diff --git a/paddle/fluid/pybind/nccl_wrapper_py.h b/paddle/fluid/pybind/nccl_wrapper_py.h new file mode 100644 index 0000000000000000000000000000000000000000..683eb4d61e00abf4e7192efb1d102ff73cb9e02e --- /dev/null +++ b/paddle/fluid/pybind/nccl_wrapper_py.h @@ -0,0 +1,28 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "pybind11/pybind11.h" +#include "pybind11/stl.h" + +namespace py = pybind11; + +namespace paddle { +namespace pybind { + +void BindNCCLWrapper(py::module* m); + +} // namespace pybind +} // namespace paddle diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index a8a2a94d473b18fdcd78771063ef4565c7fe0e42..6a5f5f60bca1974635730ce746869b95cf4e80ed 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -58,6 +58,7 @@ limitations under the License. */ #include "paddle/fluid/pybind/imperative.h" #include "paddle/fluid/pybind/inference_api.h" #include "paddle/fluid/pybind/ir.h" +#include "paddle/fluid/pybind/nccl_wrapper_py.h" #include "paddle/fluid/pybind/protobuf.h" #include "paddle/fluid/pybind/pybind.h" // NOLINT #include "paddle/fluid/pybind/reader_py.h" @@ -1405,6 +1406,7 @@ All parameter, weight, gradient are variables in Paddle. BindRecordIOWriter(&m); BindAsyncExecutor(&m); BindFleetWrapper(&m); + BindNCCLWrapper(&m); BindGraph(&m); BindNode(&m); BindInferenceApi(&m);