From de009152a7c99115cd1681a38a6a3960119e267f Mon Sep 17 00:00:00 2001 From: Wilber Date: Mon, 10 Feb 2020 14:51:19 +0800 Subject: [PATCH] Compile without nccl deps. [2/2] (#22484) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compile without nccl deps. [1/2] Co-authored-by: 石晓伟 <39303645+Shixiaowei02@users.noreply.github.com> --- paddle/fluid/operators/collective/c_gen_nccl_id_op.cc | 4 ++-- .../fluid/operators/collective/c_reducescatter_op.cu.cc | 4 ++-- .../fluid/operators/collective/c_sync_calc_stream_op.cc | 4 ++-- .../fluid/operators/collective/c_sync_comm_stream_op.cc | 6 +++--- .../operators/distributed/brpc/brpc_sendrecvop_utils.cc | 5 +++-- paddle/fluid/operators/distributed/grpc/grpc_serde.cc | 6 +++--- .../operators/distributed/grpc/grpc_variable_response.cc | 2 +- paddle/fluid/operators/distributed/sendrecvop_utils.cc | 2 +- paddle/fluid/operators/distributed_ops/allreduce_op.h | 4 ++-- paddle/fluid/operators/distributed_ops/broadcast_op.cu.cc | 4 ++-- paddle/fluid/platform/collective_helper.cc | 2 +- paddle/fluid/platform/collective_helper.h | 2 +- paddle/fluid/platform/enforce.h | 6 +++--- paddle/fluid/platform/nccl_helper.h | 2 +- paddle/fluid/pybind/pybind.cc | 8 ++++---- 15 files changed, 31 insertions(+), 30 deletions(-) diff --git a/paddle/fluid/operators/collective/c_gen_nccl_id_op.cc b/paddle/fluid/operators/collective/c_gen_nccl_id_op.cc index 61d97aa2e9..e2b09be5a9 100644 --- a/paddle/fluid/operators/collective/c_gen_nccl_id_op.cc +++ b/paddle/fluid/operators/collective/c_gen_nccl_id_op.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) +#if defined(PADDLE_WITH_NCCL) #include #endif @@ -27,7 +27,7 @@ limitations under the License. */ #include "paddle/fluid/operators/distributed/distributed.h" #include "paddle/fluid/operators/distributed/request_handler_impl.h" -#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) +#if defined(PADDLE_WITH_NCCL) #include "paddle/fluid/platform/nccl_helper.h" #endif diff --git a/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc b/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc index 0115946141..0d94707513 100644 --- a/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc +++ b/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc @@ -14,7 +14,7 @@ limitations under the License. */ #include "paddle/fluid/operators/collective/c_reducescatter_op.h" -#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) +#if defined(PADDLE_WITH_NCCL) #include "paddle/fluid/platform/collective_helper.h" #include "paddle/fluid/platform/nccl_helper.h" #endif @@ -26,7 +26,7 @@ template class CReduceScatterOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { -#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) +#if defined(PADDLE_WITH_NCCL) auto in = ctx.Input("X"); auto out = ctx.Output("Out"); diff --git a/paddle/fluid/operators/collective/c_sync_calc_stream_op.cc b/paddle/fluid/operators/collective/c_sync_calc_stream_op.cc index fe74fc5977..5a7368982d 100644 --- a/paddle/fluid/operators/collective/c_sync_calc_stream_op.cc +++ b/paddle/fluid/operators/collective/c_sync_calc_stream_op.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) +#if defined(PADDLE_WITH_NCCL) #include #endif @@ -21,7 +21,7 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" -#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) +#if defined(PADDLE_WITH_NCCL) #include "paddle/fluid/platform/collective_helper.h" #endif diff --git a/paddle/fluid/operators/collective/c_sync_comm_stream_op.cc b/paddle/fluid/operators/collective/c_sync_comm_stream_op.cc index 320c850703..676c3003d5 100644 --- a/paddle/fluid/operators/collective/c_sync_comm_stream_op.cc +++ b/paddle/fluid/operators/collective/c_sync_comm_stream_op.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) +#if defined(PADDLE_WITH_NCCL) #include #endif @@ -20,7 +20,7 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" -#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) +#if defined(PADDLE_WITH_NCCL) #include "paddle/fluid/platform/collective_helper.h" #include "paddle/fluid/platform/nccl_helper.h" #endif @@ -41,7 +41,7 @@ class CSyncCommStreamOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ(is_gpu_place(place), true, "Sync stream op can run on gpu place only for now."); -#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) +#if defined(PADDLE_WITH_NCCL) int ring_id = Attr("ring_id"); auto stream = platform::NCCLCommContext::Instance().Get(ring_id, place)->stream(); diff --git a/paddle/fluid/operators/distributed/brpc/brpc_sendrecvop_utils.cc b/paddle/fluid/operators/distributed/brpc/brpc_sendrecvop_utils.cc index 49e048f07a..6ba279b2f6 100644 --- a/paddle/fluid/operators/distributed/brpc/brpc_sendrecvop_utils.cc +++ b/paddle/fluid/operators/distributed/brpc/brpc_sendrecvop_utils.cc @@ -12,11 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#ifdef PADDLE_WITH_CUDA +#ifdef PADDLE_WITH_NCCL #include #endif #include #include +#include #include // NOLINT #include "paddle/fluid/framework/data_type.h" @@ -139,7 +140,7 @@ void SerializeToIOBuf(const std::string& name, framework::Variable* var, } else if (var->IsType()) { request->set_type(::sendrecv::SELECTED_ROWS); payload.reset(new TensorPayload(GetSelectedRowsPayload(var, ctx, request))); -#ifdef PADDLE_WITH_CUDA +#ifdef PADDLE_WITH_NCCL } else if (var->IsType()) { request->set_type(::sendrecv::NCCL_ID); const ncclUniqueId& uid = var->Get(); diff --git a/paddle/fluid/operators/distributed/grpc/grpc_serde.cc b/paddle/fluid/operators/distributed/grpc/grpc_serde.cc index 91c398d0c8..6c89d8fb31 100644 --- a/paddle/fluid/operators/distributed/grpc/grpc_serde.cc +++ b/paddle/fluid/operators/distributed/grpc/grpc_serde.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#ifdef PADDLE_WITH_CUDA +#ifdef PADDLE_WITH_NCCL #include #endif #include @@ -68,7 +68,7 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var, } else if (var->IsType()) { request.set_type(::sendrecv::SELECTED_ROWS); payload = new TensorPayload(GetSelectedRowsPayload(var, ctx, &request)); -#ifdef PADDLE_WITH_CUDA +#ifdef PADDLE_WITH_NCCL } else if (var->IsType()) { request.set_type(::sendrecv::NCCL_ID); #endif @@ -85,7 +85,7 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var, e.WriteRawBytes(std::string(header.data(), header.size())); // NCCLID is copied directly to the message, return bytebuffer // with only one slice if serializing NCCLID. -#ifdef PADDLE_WITH_CUDA +#ifdef PADDLE_WITH_NCCL if (var->IsType()) { e.WriteVarlengthBeginning(VarMsg::kSerializedFieldNumber, NCCL_UNIQUE_ID_BYTES); diff --git a/paddle/fluid/operators/distributed/grpc/grpc_variable_response.cc b/paddle/fluid/operators/distributed/grpc/grpc_variable_response.cc index 87e83ca53b..e46d2fbe01 100644 --- a/paddle/fluid/operators/distributed/grpc/grpc_variable_response.cc +++ b/paddle/fluid/operators/distributed/grpc/grpc_variable_response.cc @@ -15,7 +15,7 @@ #include #include #include -#ifdef PADDLE_WITH_CUDA +#ifdef PADDLE_WITH_NCCL #include #endif diff --git a/paddle/fluid/operators/distributed/sendrecvop_utils.cc b/paddle/fluid/operators/distributed/sendrecvop_utils.cc index 548277139e..7ab30c114d 100644 --- a/paddle/fluid/operators/distributed/sendrecvop_utils.cc +++ b/paddle/fluid/operators/distributed/sendrecvop_utils.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#ifdef PADDLE_WITH_CUDA +#ifdef PADDLE_WITH_NCCL #include #endif #include diff --git a/paddle/fluid/operators/distributed_ops/allreduce_op.h b/paddle/fluid/operators/distributed_ops/allreduce_op.h index 0275f6a9cf..89d02eb0a0 100644 --- a/paddle/fluid/operators/distributed_ops/allreduce_op.h +++ b/paddle/fluid/operators/distributed_ops/allreduce_op.h @@ -21,7 +21,7 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" -#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) +#if defined(PADDLE_WITH_NCCL) #include "paddle/fluid/platform/nccl_helper.h" #endif @@ -35,7 +35,7 @@ class AllReduceOpKernel : public framework::OpKernel { auto place = ctx.GetPlace(); PADDLE_ENFORCE(is_gpu_place(place), "AllReduce op can run on gpu place only for now."); -#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) +#if defined(PADDLE_WITH_NCCL) auto& dev_ctx = ctx.template device_context(); auto in = ctx.Input("X"); auto out = ctx.Output("Out"); diff --git a/paddle/fluid/operators/distributed_ops/broadcast_op.cu.cc b/paddle/fluid/operators/distributed_ops/broadcast_op.cu.cc index 5b1f917cc5..2d87b48ba8 100644 --- a/paddle/fluid/operators/distributed_ops/broadcast_op.cu.cc +++ b/paddle/fluid/operators/distributed_ops/broadcast_op.cu.cc @@ -20,7 +20,7 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" -#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) +#if defined(PADDLE_WITH_NCCL) #include "paddle/fluid/platform/nccl_helper.h" #endif @@ -37,7 +37,7 @@ class NCCLBroadcastOpKernel : public framework::OpKernel { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), "The place of ExecutionContext should be CUDAPlace."); -#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) +#if defined(PADDLE_WITH_NCCL) int dev_id = boost::get(ctx.GetPlace()).device; int root_dev_id = ctx.Attr("root"); diff --git a/paddle/fluid/platform/collective_helper.cc b/paddle/fluid/platform/collective_helper.cc index 2025e5346f..7e1ad018f3 100644 --- a/paddle/fluid/platform/collective_helper.cc +++ b/paddle/fluid/platform/collective_helper.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) +#if defined(PADDLE_WITH_NCCL) #include "paddle/fluid/platform/collective_helper.h" #include diff --git a/paddle/fluid/platform/collective_helper.h b/paddle/fluid/platform/collective_helper.h index 747e840037..a2b1e06de1 100644 --- a/paddle/fluid/platform/collective_helper.h +++ b/paddle/fluid/platform/collective_helper.h @@ -14,7 +14,7 @@ #pragma once -#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) +#if defined(PADDLE_WITH_NCCL) #include #include #include diff --git a/paddle/fluid/platform/enforce.h b/paddle/fluid/platform/enforce.h index c08ec4fe41..e2bd12a26a 100644 --- a/paddle/fluid/platform/enforce.h +++ b/paddle/fluid/platform/enforce.h @@ -48,7 +48,7 @@ limitations under the License. */ #include "paddle/fluid/platform/dynload/cublas.h" #include "paddle/fluid/platform/dynload/cudnn.h" #include "paddle/fluid/platform/dynload/curand.h" -#if !defined(__APPLE__) && !defined(_WIN32) +#if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL) #include "paddle/fluid/platform/dynload/nccl.h" #endif // __APPLE__ #endif // PADDLE_WITH_CUDA @@ -462,7 +462,7 @@ inline void throw_on_error(cublasStatus_t stat, const std::string& msg) { #endif } -#if !defined(__APPLE__) && !defined(_WIN32) +#if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL) inline bool is_error(ncclResult_t nccl_result) { return nccl_result != ncclSuccess; } @@ -502,7 +502,7 @@ DEFINE_CUDA_STATUS_TYPE(curandStatus_t, CURAND_STATUS_SUCCESS); DEFINE_CUDA_STATUS_TYPE(cudnnStatus_t, CUDNN_STATUS_SUCCESS); DEFINE_CUDA_STATUS_TYPE(cublasStatus_t, CUBLAS_STATUS_SUCCESS); -#if !defined(__APPLE__) && !defined(_WIN32) +#if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL) DEFINE_CUDA_STATUS_TYPE(ncclResult_t, ncclSuccess); #endif diff --git a/paddle/fluid/platform/nccl_helper.h b/paddle/fluid/platform/nccl_helper.h index b89d08ba1a..0d04b997cb 100644 --- a/paddle/fluid/platform/nccl_helper.h +++ b/paddle/fluid/platform/nccl_helper.h @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef _WIN32 +#ifdef PADDLE_WITH_NCCL #pragma once #include diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 901843b6fc..33fb6d0484 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -68,7 +68,7 @@ limitations under the License. */ #include "paddle/fluid/pybind/ir.h" #include "paddle/fluid/pybind/pybind_boost_headers.h" -#ifndef _WIN32 +#ifdef PADDLE_WITH_NCCL #include "paddle/fluid/pybind/nccl_wrapper_py.h" #endif #include "paddle/fluid/framework/data_type.h" @@ -78,7 +78,7 @@ limitations under the License. */ #include "paddle/fluid/pybind/tensor_py.h" #include "paddle/fluid/string/to_string.h" #ifdef PADDLE_WITH_CUDA -#ifndef _WIN32 +#ifdef PADDLE_WITH_NCCL #include "paddle/fluid/operators/nccl/nccl_gpu_common.h" #endif #include "paddle/fluid/platform/cuda_profiler.h" @@ -926,7 +926,7 @@ All parameter, weight, gradient are variables in Paddle. .def("get_lod_tensor_array", [](Variable &self) { return self.GetMutable(); }, py::return_value_policy::reference) -#if (defined(PADDLE_WITH_CUDA) && !defined(_WIN32)) +#if (defined(PADDLE_WITH_NCCL)) .def("get_communicator", [](Variable &self) -> platform::Communicator * { return self.GetMutable(); @@ -1174,7 +1174,7 @@ All parameter, weight, gradient are variables in Paddle. #endif });; // clang-format on -#if (defined(PADDLE_WITH_CUDA) && !defined(_WIN32)) +#if defined(PADDLE_WITH_NCCL) py::class_(m, "Communicator").def(py::init<>()); #endif py::class_(m, "CUDAPlace", R"DOC( -- GitLab