diff --git a/paddle/fluid/operators/collective/c_gen_nccl_id_op.cc b/paddle/fluid/operators/collective/c_gen_nccl_id_op.cc index 61d97aa2e9fbb749614b527acb43cf82ab79b4d3..e2b09be5a9dfff0111ab80d89bdd76b99517738f 100644 --- a/paddle/fluid/operators/collective/c_gen_nccl_id_op.cc +++ b/paddle/fluid/operators/collective/c_gen_nccl_id_op.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) +#if defined(PADDLE_WITH_NCCL) #include #endif @@ -27,7 +27,7 @@ limitations under the License. */ #include "paddle/fluid/operators/distributed/distributed.h" #include "paddle/fluid/operators/distributed/request_handler_impl.h" -#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) +#if defined(PADDLE_WITH_NCCL) #include "paddle/fluid/platform/nccl_helper.h" #endif diff --git a/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc b/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc index 0115946141276845a44b750f13a17ccf50506d03..0d94707513b946427173a41edd1decd3f2709cae 100644 --- a/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc +++ b/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc @@ -14,7 +14,7 @@ limitations under the License. */ #include "paddle/fluid/operators/collective/c_reducescatter_op.h" -#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) +#if defined(PADDLE_WITH_NCCL) #include "paddle/fluid/platform/collective_helper.h" #include "paddle/fluid/platform/nccl_helper.h" #endif @@ -26,7 +26,7 @@ template class CReduceScatterOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { -#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) +#if defined(PADDLE_WITH_NCCL) auto in = ctx.Input("X"); auto out = ctx.Output("Out"); diff --git a/paddle/fluid/operators/collective/c_sync_calc_stream_op.cc b/paddle/fluid/operators/collective/c_sync_calc_stream_op.cc index fe74fc597732d7fe1034ad95cc7f8f8e8109f302..5a7368982dd70e6c70a9df05d38c7016242059ed 100644 --- a/paddle/fluid/operators/collective/c_sync_calc_stream_op.cc +++ b/paddle/fluid/operators/collective/c_sync_calc_stream_op.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) +#if defined(PADDLE_WITH_NCCL) #include #endif @@ -21,7 +21,7 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" -#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) +#if defined(PADDLE_WITH_NCCL) #include "paddle/fluid/platform/collective_helper.h" #endif diff --git a/paddle/fluid/operators/collective/c_sync_comm_stream_op.cc b/paddle/fluid/operators/collective/c_sync_comm_stream_op.cc index 320c85070385de24461e2121af3d7cfa2c8a6f36..676c3003d58b5ce5a0e23ffcb4aaf67f324b7c95 100644 --- a/paddle/fluid/operators/collective/c_sync_comm_stream_op.cc +++ b/paddle/fluid/operators/collective/c_sync_comm_stream_op.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) +#if defined(PADDLE_WITH_NCCL) #include #endif @@ -20,7 +20,7 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" -#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) +#if defined(PADDLE_WITH_NCCL) #include "paddle/fluid/platform/collective_helper.h" #include "paddle/fluid/platform/nccl_helper.h" #endif @@ -41,7 +41,7 @@ class CSyncCommStreamOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ(is_gpu_place(place), true, "Sync stream op can run on gpu place only for now."); -#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) +#if defined(PADDLE_WITH_NCCL) int ring_id = Attr("ring_id"); auto stream = platform::NCCLCommContext::Instance().Get(ring_id, place)->stream(); diff --git a/paddle/fluid/operators/distributed/brpc/brpc_sendrecvop_utils.cc b/paddle/fluid/operators/distributed/brpc/brpc_sendrecvop_utils.cc index 49e048f07a2396824a51db5c6012206bd8848e82..6ba279b2f69b27664d1bbfd1245ef3cdd5c13948 100644 --- a/paddle/fluid/operators/distributed/brpc/brpc_sendrecvop_utils.cc +++ b/paddle/fluid/operators/distributed/brpc/brpc_sendrecvop_utils.cc @@ -12,11 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#ifdef PADDLE_WITH_CUDA +#ifdef PADDLE_WITH_NCCL #include #endif #include #include +#include #include // NOLINT #include "paddle/fluid/framework/data_type.h" @@ -139,7 +140,7 @@ void SerializeToIOBuf(const std::string& name, framework::Variable* var, } else if (var->IsType()) { request->set_type(::sendrecv::SELECTED_ROWS); payload.reset(new TensorPayload(GetSelectedRowsPayload(var, ctx, request))); -#ifdef PADDLE_WITH_CUDA +#ifdef PADDLE_WITH_NCCL } else if (var->IsType()) { request->set_type(::sendrecv::NCCL_ID); const ncclUniqueId& uid = var->Get(); diff --git a/paddle/fluid/operators/distributed/grpc/grpc_serde.cc b/paddle/fluid/operators/distributed/grpc/grpc_serde.cc index 91c398d0c84db1fc67740cd2368d178610ef0841..6c89d8fb3163115aff484e38fc1e4ff7554c5393 100644 --- a/paddle/fluid/operators/distributed/grpc/grpc_serde.cc +++ b/paddle/fluid/operators/distributed/grpc/grpc_serde.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#ifdef PADDLE_WITH_CUDA +#ifdef PADDLE_WITH_NCCL #include #endif #include @@ -68,7 +68,7 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var, } else if (var->IsType()) { request.set_type(::sendrecv::SELECTED_ROWS); payload = new TensorPayload(GetSelectedRowsPayload(var, ctx, &request)); -#ifdef PADDLE_WITH_CUDA +#ifdef PADDLE_WITH_NCCL } else if (var->IsType()) { request.set_type(::sendrecv::NCCL_ID); #endif @@ -85,7 +85,7 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var, e.WriteRawBytes(std::string(header.data(), header.size())); // NCCLID is copied directly to the message, return bytebuffer // with only one slice if serializing NCCLID. -#ifdef PADDLE_WITH_CUDA +#ifdef PADDLE_WITH_NCCL if (var->IsType()) { e.WriteVarlengthBeginning(VarMsg::kSerializedFieldNumber, NCCL_UNIQUE_ID_BYTES); diff --git a/paddle/fluid/operators/distributed/grpc/grpc_variable_response.cc b/paddle/fluid/operators/distributed/grpc/grpc_variable_response.cc index 87e83ca53bf13ac4a015d56572ba073e51722c3e..e46d2fbe01c16a8b0cbf402f6ffff4907adf7356 100644 --- a/paddle/fluid/operators/distributed/grpc/grpc_variable_response.cc +++ b/paddle/fluid/operators/distributed/grpc/grpc_variable_response.cc @@ -15,7 +15,7 @@ #include #include #include -#ifdef PADDLE_WITH_CUDA +#ifdef PADDLE_WITH_NCCL #include #endif diff --git a/paddle/fluid/operators/distributed/sendrecvop_utils.cc b/paddle/fluid/operators/distributed/sendrecvop_utils.cc index 548277139eb856e2ebd2cac2ef33154e767aa570..7ab30c114d0511ae49c2e02590f93d837bf02789 100644 --- a/paddle/fluid/operators/distributed/sendrecvop_utils.cc +++ b/paddle/fluid/operators/distributed/sendrecvop_utils.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#ifdef PADDLE_WITH_CUDA +#ifdef PADDLE_WITH_NCCL #include #endif #include diff --git a/paddle/fluid/operators/distributed_ops/allreduce_op.h b/paddle/fluid/operators/distributed_ops/allreduce_op.h index 0275f6a9cf3aa8bab89b3d8c599b304702f590a8..89d02eb0a066c08b8979ce47c7e6e6b5b149c1c5 100644 --- a/paddle/fluid/operators/distributed_ops/allreduce_op.h +++ b/paddle/fluid/operators/distributed_ops/allreduce_op.h @@ -21,7 +21,7 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" -#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) +#if defined(PADDLE_WITH_NCCL) #include "paddle/fluid/platform/nccl_helper.h" #endif @@ -35,7 +35,7 @@ class AllReduceOpKernel : public framework::OpKernel { auto place = ctx.GetPlace(); PADDLE_ENFORCE(is_gpu_place(place), "AllReduce op can run on gpu place only for now."); -#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) +#if defined(PADDLE_WITH_NCCL) auto& dev_ctx = ctx.template device_context(); auto in = ctx.Input("X"); auto out = ctx.Output("Out"); diff --git a/paddle/fluid/operators/distributed_ops/broadcast_op.cu.cc b/paddle/fluid/operators/distributed_ops/broadcast_op.cu.cc index 5b1f917cc52fd49bfdce8f2b18989f3178a14be3..2d87b48ba80f279abf9a4c4c40691c5a285db38a 100644 --- a/paddle/fluid/operators/distributed_ops/broadcast_op.cu.cc +++ b/paddle/fluid/operators/distributed_ops/broadcast_op.cu.cc @@ -20,7 +20,7 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" -#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) +#if defined(PADDLE_WITH_NCCL) #include "paddle/fluid/platform/nccl_helper.h" #endif @@ -37,7 +37,7 @@ class NCCLBroadcastOpKernel : public framework::OpKernel { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), "The place of ExecutionContext should be CUDAPlace."); -#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) +#if defined(PADDLE_WITH_NCCL) int dev_id = boost::get(ctx.GetPlace()).device; int root_dev_id = ctx.Attr("root"); diff --git a/paddle/fluid/platform/collective_helper.cc b/paddle/fluid/platform/collective_helper.cc index 2025e5346f66565e9dd9fccc5a4f3051fb8467b2..7e1ad018f3c8fe80d5dcf6e9f95a2060841eb606 100644 --- a/paddle/fluid/platform/collective_helper.cc +++ b/paddle/fluid/platform/collective_helper.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) +#if defined(PADDLE_WITH_NCCL) #include "paddle/fluid/platform/collective_helper.h" #include diff --git a/paddle/fluid/platform/collective_helper.h b/paddle/fluid/platform/collective_helper.h index 747e840037ee96eba0abc8e9355c6e2a31a57338..a2b1e06de1b5e87341b8a7438bd450c9b6c564f7 100644 --- a/paddle/fluid/platform/collective_helper.h +++ b/paddle/fluid/platform/collective_helper.h @@ -14,7 +14,7 @@ #pragma once -#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) +#if defined(PADDLE_WITH_NCCL) #include #include #include diff --git a/paddle/fluid/platform/enforce.h b/paddle/fluid/platform/enforce.h index c08ec4fe413f9ccf1164751a52fd0bc297e941de..e2bd12a26acd2d4da5af3d78ab04647cd462f822 100644 --- a/paddle/fluid/platform/enforce.h +++ b/paddle/fluid/platform/enforce.h @@ -48,7 +48,7 @@ limitations under the License. */ #include "paddle/fluid/platform/dynload/cublas.h" #include "paddle/fluid/platform/dynload/cudnn.h" #include "paddle/fluid/platform/dynload/curand.h" -#if !defined(__APPLE__) && !defined(_WIN32) +#if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL) #include "paddle/fluid/platform/dynload/nccl.h" #endif // __APPLE__ #endif // PADDLE_WITH_CUDA @@ -462,7 +462,7 @@ inline void throw_on_error(cublasStatus_t stat, const std::string& msg) { #endif } -#if !defined(__APPLE__) && !defined(_WIN32) +#if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL) inline bool is_error(ncclResult_t nccl_result) { return nccl_result != ncclSuccess; } @@ -502,7 +502,7 @@ DEFINE_CUDA_STATUS_TYPE(curandStatus_t, CURAND_STATUS_SUCCESS); DEFINE_CUDA_STATUS_TYPE(cudnnStatus_t, CUDNN_STATUS_SUCCESS); DEFINE_CUDA_STATUS_TYPE(cublasStatus_t, CUBLAS_STATUS_SUCCESS); -#if !defined(__APPLE__) && !defined(_WIN32) +#if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL) DEFINE_CUDA_STATUS_TYPE(ncclResult_t, ncclSuccess); #endif diff --git a/paddle/fluid/platform/nccl_helper.h b/paddle/fluid/platform/nccl_helper.h index b89d08ba1ae81cecc03d0fca2961b32008e2bf64..0d04b997cbb769e3c6011dde7867e4d5eb59e63b 100644 --- a/paddle/fluid/platform/nccl_helper.h +++ b/paddle/fluid/platform/nccl_helper.h @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef _WIN32 +#ifdef PADDLE_WITH_NCCL #pragma once #include diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index ff530d4987fc681984932d6f36095d6b13b5727c..2e8d5f976310a7714e8b461e8c471f85d366b4bf 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -68,7 +68,7 @@ limitations under the License. */ #include "paddle/fluid/pybind/ir.h" #include "paddle/fluid/pybind/pybind_boost_headers.h" -#ifndef _WIN32 +#ifdef PADDLE_WITH_NCCL #include "paddle/fluid/pybind/nccl_wrapper_py.h" #endif #include "paddle/fluid/framework/data_type.h" @@ -78,7 +78,7 @@ limitations under the License. */ #include "paddle/fluid/pybind/tensor_py.h" #include "paddle/fluid/string/to_string.h" #ifdef PADDLE_WITH_CUDA -#ifndef _WIN32 +#ifdef PADDLE_WITH_NCCL #include "paddle/fluid/operators/nccl/nccl_gpu_common.h" #endif #include "paddle/fluid/platform/cuda_profiler.h" @@ -926,7 +926,7 @@ All parameter, weight, gradient are variables in Paddle. .def("get_lod_tensor_array", [](Variable &self) { return self.GetMutable(); }, py::return_value_policy::reference) -#if (defined(PADDLE_WITH_CUDA) && !defined(_WIN32)) +#if (defined(PADDLE_WITH_NCCL)) .def("get_communicator", [](Variable &self) -> platform::Communicator * { return self.GetMutable(); @@ -1174,7 +1174,7 @@ All parameter, weight, gradient are variables in Paddle. #endif });; // clang-format on -#if (defined(PADDLE_WITH_CUDA) && !defined(_WIN32)) +#if defined(PADDLE_WITH_NCCL) py::class_(m, "Communicator").def(py::init<>()); #endif py::class_(m, "CUDAPlace", R"DOC(